# Brooks Tokenizer Analytics

Quickly explore frequency and co-occurrence statistics for Brooks-style tokens generated from your data.


In [None]:
from trading_transformers.features.tokens import BrooksTokenizer
from trading_transformers.tokenizers import BrooksTokenVocabulary, compute_stats, cooccurrence_matrix
import pandas as pd

# Synthetic OHLCV data for demonstration
data = pd.DataFrame({
    'open': [100, 101, 102, 101, 100],
    'high': [101, 102, 103, 102, 101],
    'low': [99, 100, 101, 100, 99],
    'close': [100.5, 101.5, 102.5, 101.0, 100.0],
})

tokenizer = BrooksTokenizer()
tokens = tokenizer.transform(data)
tokens


In [None]:
# Compute summary stats
stats = compute_stats(tokens, top_k=5)
stats


In [None]:
# Build sequences (e.g., by grouping batches) and generate co-occurrence matrix
sequences = [tokens[:3], tokens[2:]]
co_matrix = cooccurrence_matrix(sequences)
co_matrix


In [None]:
# Create a vocabulary for downstream models
vocab = BrooksTokenVocabulary.from_sequences(tokens)
len(vocab.tokens)
