In [19]:
import pandas as pd  # requires: pip install pandas
import torch
from chronos import BaseChronosPipeline
import plotly.express as px
import numpy as np

In [2]:
pipeline = BaseChronosPipeline.from_pretrained(
    "amazon/chronos-t5-small",  # use "amazon/chronos-bolt-small" for the corresponding Chronos-Bolt model
    device_map="cpu",  # use "cpu" for CPU inference
    torch_dtype=torch.bfloat16,
)

In [3]:
df = pd.read_csv(
    "https://raw.githubusercontent.com/AileenNielsen/TimeSeriesAnalysisWithPython/master/data/AirPassengers.csv"
)

# context must be either a 1D tensor, a list of 1D tensors,
# or a left-padded 2D tensor with batch as the first dimension
# quantiles is an fp32 tensor with shape [batch_size, prediction_length, num_quantile_levels]
# mean is an fp32 tensor with shape [batch_size, prediction_length]
quantiles, mean = pipeline.predict_quantiles(
    context=torch.tensor(df["#Passengers"]),
    prediction_length=12,
    quantile_levels=[0.1, 0.5, 0.9],
)

In [51]:
layer = pipeline.model
layer_name = 'model'
for name in layer_name.split('.'):
    # print("processing: ", name)
    layer = getattr(layer, name)
layer

T5ForConditionalGeneration(
  (shared): Embedding(4096, 512)
  (encoder): T5Stack(
    (embed_tokens): Embedding(4096, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 8)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropou

In [20]:
context = torch.tensor(df["#Passengers"])
context_tensor = pipeline._prepare_and_validate_context(context=context) # does nothing
token_ids, attention_mask, scale = pipeline.tokenizer.context_input_transform(context_tensor)
# context is just the data you want to predict
# token_ids is the token ids of the context, roughly follows the context
# scale is the mean of the context tensor

In [45]:
# Plot context
y = context.detach().cpu().numpy()

# Use Plotly Express to plot
fig = px.line(y=y,
              labels={'index': 'step', 'y': 'value'},
              title='Line plot of context tensor')

# plot scale as a horizontal line on the plot
fig.add_hline(y=scale[0], line_dash="dash")
fig.show()


In [104]:
# Get the token_ids from a previous cell
# We'll use the existing token_ids that were created in cell 20
# token_ids shape is [1, 145]

# The actual T5 model is accessed at pipeline.model.model
t5_model = pipeline.model.model

# Get the encoder embedding layer
encoder_embed_layer = t5_model.encoder.embed_tokens

# Use the embedding layer to convert token_ids to embeddings
with torch.no_grad():
    # Move token_ids to the right device if needed
    if token_ids.device != encoder_embed_layer.weight.device:
        token_ids = token_ids.to(encoder_embed_layer.weight.device)
    
    # Generate embeddings from token_ids
    # This will have shape [1, 145, 512] - [batch_size, seq_len, hidden_dim]
    encoder_embeddings = encoder_embed_layer(token_ids)
    
    # Also show the shared embedding layer (used for weight tying)
    
    # Print shapes and a sample of the embeddings
    print(f"context tensor shape: {context_tensor.shape}")
    print(f"Token IDs shape: {token_ids.shape}")
    print(f"Encoder embedding layer shape: {encoder_embed_layer.weight.shape}")
    print(f"Encoder embeddings shape: {encoder_embeddings.shape}")
    
    # Show sample of embeddings for first token
    print("\nSample of embeddings for first token (first 10 dimensions):")
    print(encoder_embeddings)

context tensor shape: torch.Size([1, 144])
Token IDs shape: torch.Size([1, 145])
Encoder embedding layer shape: torch.Size([4096, 512])
Encoder embeddings shape: torch.Size([1, 145, 512])

Sample of embeddings for first token (first 10 dimensions):
tensor([[[-0.1963,  0.7852,  0.6680,  ...,  1.1094, -0.3730,  0.4160],
         [ 0.4785, -1.2422,  0.5039,  ...,  0.3398, -0.1240,  0.4355],
         [ 0.1992,  0.1289,  0.7344,  ...,  1.6562, -0.4863,  0.4238],
         ...,
         [-0.6523,  0.0977,  0.4531,  ..., -0.4922,  0.3359,  0.2930],
         [-0.0312, -0.5391,  0.8672,  ..., -0.5273,  0.9141,  0.3477],
         [ 0.3828, -0.2490, -0.0140,  ...,  3.0000, -1.0469, -2.5938]]],
       dtype=torch.bfloat16)


In [116]:
encoder_embeddings = torch.stack([t5_model.encoder.embed_tokens.weight[i] for i in token_ids[0]]).unsqueeze(0)
print(encoder_embeddings.shape)

torch.Size([1, 145, 512])


In [129]:
encoder_output = t5_model.encoder(token_ids)
encoder_output.last_hidden_state.shape

torch.Size([1, 145, 512])