In [1]:
try:
    import odeformer
    from odeformer.model import SymbolicTransformerRegressor
except ImportError:
    print("Error: Please install odeformer package first using 'pip install odeformer'")
    exit(1)

try:
    dstr = SymbolicTransformerRegressor(from_pretrained=True)
    model_args = {'beam_size': 50, 'beam_temperature': 0.1}
    dstr.set_model_args(model_args)
except Exception as e:
    print(f"Error initializing the model: {str(e)}")
    exit(1)

Found pretrained model at odeformer.pt
Loaded pretrained model


In [2]:
import torch, numpy as np


layer_outputs = {}

# Function to store the output of each layer
def hook_fn(module, input, output, layer_name):
    layer_outputs[layer_name] = output.detach().cpu()

# Registering hooks for layers in the encoder and decoder
def register_hooks(model_part, part_name):
    for idx, module in enumerate(model_part.attentions):  # MultiHeadAttention layers
        layer_name = f"{part_name}_attention_{idx}"
        module.register_forward_hook(lambda module, input, output, name=layer_name: hook_fn(module, input, output, name))

    for idx, module in enumerate(model_part.ffns):  # FeedForward layers
        layer_name = f"{part_name}_ffn_{idx}"
        module.register_forward_hook(lambda module, input, output, name=layer_name: hook_fn(module, input, output, name))

    for idx, module in enumerate(model_part.layer_norm1):  # LayerNorm 1 layers
        layer_name = f"{part_name}_layer_norm1_{idx}"
        module.register_forward_hook(lambda module, input, output, name=layer_name: hook_fn(module, input, output, name))

    for idx, module in enumerate(model_part.layer_norm2):  # LayerNorm 2 layers
        layer_name = f"{part_name}_layer_norm2_{idx}"
        module.register_forward_hook(lambda module, input, output, name=layer_name: hook_fn(module, input, output, name))

# Registering hooks for the encoder and decoder parts
register_hooks(dstr.model.encoder, 'encoder')
register_hooks(dstr.model.decoder, 'decoder')

times = np.linspace(0, 10, 50)
x = 2.3 * np.cos(times + 0.5)
y = 1.2 * np.sin(times + 0.1)
trajectory = np.stack([x, y], axis=1)

# Passing data through the model to capture layer outputs
with torch.no_grad():
    dstr.fit(times, trajectory)

# Now, layer_outputs contains the outputs of the layers
for layer_name, output in layer_outputs.items():
    print(f"Layer: {layer_name}, Output Shape: {output.shape}")
    if (layer_name in ("encoder_ffn_3","decoder_ffn_3")):
      print(output)
      print("\n")

Layer: encoder_attention_0, Output Shape: torch.Size([1, 50, 256])
Layer: encoder_layer_norm1_0, Output Shape: torch.Size([1, 50, 256])
Layer: encoder_ffn_0, Output Shape: torch.Size([1, 50, 256])
Layer: encoder_layer_norm2_0, Output Shape: torch.Size([1, 50, 256])
Layer: encoder_attention_1, Output Shape: torch.Size([1, 50, 256])
Layer: encoder_layer_norm1_1, Output Shape: torch.Size([1, 50, 256])
Layer: encoder_ffn_1, Output Shape: torch.Size([1, 50, 256])
Layer: encoder_layer_norm2_1, Output Shape: torch.Size([1, 50, 256])
Layer: encoder_attention_2, Output Shape: torch.Size([1, 50, 256])
Layer: encoder_layer_norm1_2, Output Shape: torch.Size([1, 50, 256])
Layer: encoder_ffn_2, Output Shape: torch.Size([1, 50, 256])
Layer: encoder_layer_norm2_2, Output Shape: torch.Size([1, 50, 256])
Layer: encoder_attention_3, Output Shape: torch.Size([1, 50, 256])
Layer: encoder_layer_norm1_3, Output Shape: torch.Size([1, 50, 256])
Layer: encoder_ffn_3, Output Shape: torch.Size([1, 50, 256])
tenso