In [1]:
import torch
from chronos import BaseChronosPipeline

import numpy as np
import pandas as pd

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import rrt_utils as rrt
import attn_lens as attn

[2025-05-24 12:47:06,846] [INFO] [real_accelerator.py:239:get_accelerator] Setting ds_accelerator to cpu (auto detect)


In [60]:
# define the vocab as all the tokens except the special tokens
vocab = torch.tensor([i for i in range(4096) if i > 1])

batch_size = 1
num_unique_sequences = 1 # number of unique sequences in the batch
repeat_factor = 10 # number of times to repeat the sequences
extension = 0 # after repeating everything, repeat this many sequences again
sub_extension = 3 # some additional tokens in the next sequence
sequence_length = 10 # length of the sequences

tokens = [rrt.generate_random_token_ids(vocab, sequence_length, batch_size=batch_size, include_eos=False) for _ in range(num_unique_sequences)]
enc_ids = rrt.stack_sequences(tokens * repeat_factor + tokens[:extension] + [tokens[extension][:,:sub_extension]])
# token_ids = rrt.stack_sequences(tokens * repeat_factor + tokens[:extension])
attention_mask = torch.ones_like(enc_ids, dtype=torch.bool)

dec_ids = torch.cat([torch.zeros((batch_size,1), dtype=torch.long), tokens[extension][:,sub_extension:sub_extension+1]], dim=1)

enc_ids.shape, enc_ids[0,:], dec_ids.shape, dec_ids[0,:]

(torch.Size([1, 104]),
 tensor([ 217, 3167, 3739,  972, 3832, 3480, 1446, 3464, 1511, 2239,  217, 3167,
         3739,  972, 3832, 3480, 1446, 3464, 1511, 2239,  217, 3167, 3739,  972,
         3832, 3480, 1446, 3464, 1511, 2239,  217, 3167, 3739,  972, 3832, 3480,
         1446, 3464, 1511, 2239,  217, 3167, 3739,  972, 3832, 3480, 1446, 3464,
         1511, 2239,  217, 3167, 3739,  972, 3832, 3480, 1446, 3464, 1511, 2239,
          217, 3167, 3739,  972, 3832, 3480, 1446, 3464, 1511, 2239,  217, 3167,
         3739,  972, 3832, 3480, 1446, 3464, 1511, 2239,  217, 3167, 3739,  972,
         3832, 3480, 1446, 3464, 1511, 2239,  217, 3167, 3739,  972, 3832, 3480,
         1446, 3464, 1511, 2239,  217, 3167, 3739,    1]),
 torch.Size([1, 2]),
 tensor([  0, 972]))

In [55]:
model_names = ["amazon/chronos-t5-mini", "amazon/chronos-t5-small", "amazon/chronos-t5-base", "amazon/chronos-t5-large"]
# model_names = ["amazon/chronos-t5-mini"]
results = {}
for model_name in model_names:
    pipeline = BaseChronosPipeline.from_pretrained(model_name)
    tokenizer, t5_model = pipeline.tokenizer, pipeline.model.model
    outputs = t5_model.generate(input_ids=enc_ids, 
                            attention_mask=attention_mask, 
                            max_new_tokens=10,
                            decoder_input_ids=dec_ids,
                            num_return_sequences=1,
                            do_sample=False,
                            use_cache=False,
                            output_attentions=True,
                            output_scores=True,
                            output_hidden_states=True,
                            return_dict_in_generate=True
                            )
    preds = tokenizer.output_transform(outputs.sequences[...,2:], scale=torch.tensor(1))
    results[model_name] = preds

results

{'amazon/chronos-t5-mini': tensor([[9.0909, 9.0909, 9.0909, 9.0909, 9.0909, 9.0909, 9.0909, 9.0909, 9.0909,
          9.0909]]),
 'amazon/chronos-t5-small': tensor([[ 2.5953,  2.5953, -4.3988, -4.3988, -4.3988, -1.2097, -4.4282, -4.4282,
          -4.4282, -4.4282]]),
 'amazon/chronos-t5-base': tensor([[ 2.2727,  2.5953, -4.1789, 13.0279, 10.0880, -1.2097, 13.0572,  0.7185,
          13.0279,  9.1422]]),
 'amazon/chronos-t5-large': tensor([[ 0.7185,  2.5953, -0.8431, -4.1789,  6.4883, -0.8431,  6.4883, -0.8431,
          -4.1716,  6.4883]])}

In [56]:
data = torch.cat((enc_ids[...,:-1], dec_ids[...,1:]), dim=-1)
preds = results
data, preds

fig = go.Figure()

# Add original series (the data)
fig.add_trace(go.Scatter(
    x=np.arange(len(data[0])),
    y=tokenizer.output_transform(data, scale=torch.tensor(1))[0].numpy(),
    mode='lines',
    name='Full Series',
    line=dict(color='royalblue')
))

# Add predictions for each model with different colors
colors = ['firebrick', 'green', 'purple', 'orange', 'teal']
for i, (model_name, model_preds) in enumerate(preds.items()):
    fig.add_trace(go.Scatter(
        x=np.arange(len(data[0])-1, len(data[0]) + len(model_preds[0])),
        y=np.concatenate([[tokenizer.output_transform(data, scale=torch.tensor(1))[0].numpy()[-1]], model_preds[0].numpy()]),
        mode='lines',
        name=f'Predictions ({model_name})',
        line=dict(color=colors[i % len(colors)], width=2)
    ))

# Update layout
fig.update_layout(
    title='Time Series Data and Model Predictions',
    xaxis_title='Token Position',
    yaxis_title='Value',
    legend_title='Series',
    template='plotly_white'
)

fig.show()


In [59]:
# Calculate FFT of the time series data
data_array = tokenizer.output_transform(data, scale=torch.tensor(1))[0].numpy()
fft_values = np.fft.fft(data_array)
fft_magnitude = np.abs(fft_values)
freq = np.fft.fftfreq(len(data_array), d=1)

# Create FFT plot
fig_fft = go.Figure()

# Add FFT magnitude spectrum
fig_fft.add_trace(go.Scatter(
    x=freq[:len(freq)//2],  # Only the positive frequencies
    y=fft_magnitude[:len(freq)//2],  # Only the positive frequencies
    mode='lines',
    name='FFT Magnitude',
    line=dict(color='royalblue', width=2)
))

# Update layout for FFT plot
fig_fft.update_layout(
    title='Frequency Spectrum (FFT) of Time Series Data',
    xaxis_title='Frequency',
    yaxis_title='Magnitude',
    xaxis=dict(range=[0, max(freq[:len(freq)//2])]),  # Limit x-axis to positive frequencies
    template='plotly_white'
)

# Show the plot
fig_fft.show()