In [1]:
import os
import sys
import warnings
import numpy as np
from pathlib import Path
import numpy as np
import scipy.integrate
import sympy as sp
import pickle
import re
from tqdm import tqdm
import matplotlib.pyplot as plt

repo_path = Path("/content/odeformer")
if not repo_path.exists():
    !git clone https://github.com/sdascoli/odeformer.git {repo_path}

!pip install --quiet numexpr sympy==1.11.1 matplotlib numpy pandas requests scikit-learn scipy seaborn setproctitle torch tqdm wandb gdown regex
import torch
sys.path.append("/content/odeformer")
sys.path.append("/content/odeformer/envs")

sys.path.append("/content/odeformer/")
sys.path.append("/content/odeformer/odeformer")
sys.path.append("/content/odeformer/odeformer/envs")
#Ignore dependency issue with torchaudio and torchvision.

Cloning into '/content/odeformer'...
remote: Enumerating objects: 1984, done.[K
remote: Counting objects: 100% (332/332), done.[K
remote: Compressing objects: 100% (138/138), done.[K
remote: Total 1984 (delta 200), reused 308 (delta 191), pack-reused 1652 (from 1)[K
Receiving objects: 100% (1984/1984), 40.82 MiB | 16.44 MiB/s, done.
Resolving deltas: 100% (1436/1436), done.
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.5/6.5 MB[0m [31m43.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m797.1/797.1 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m410.6/410.6 MB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.1/14.1 MB[0m [31m92.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m55.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━

In [2]:
from google.colab import drive
drive.mount('/content/drive')
#Need to give access to drive


Mounted at /content/drive


In [3]:
#Need to create the folder "aisc" in MyDrive. Will create the sub folders if they don't exist
samples_path = '/content/drive/MyDrive/aisc/samples_manual_0'
activations_path = '/content/drive/MyDrive/aisc/activations_manual_0'

**Generate Samples, Pickle and save To drive**

In [4]:
#In order to make expressions for equations easier to read
def clean_expression(expression):
    cleaned = expression.replace('--', '')
    cleaned = cleaned.replace(' -', '-')
    cleaned = cleaned.replace('- ', '-')
    return cleaned


In [5]:
def generate_exp_decay_dictionaries(t, c_values, a_values):

    manual_samples = []

    for c_val in c_values:
        for a_val in a_values:
            trajectory = (c_val * np.exp(-a_val * t)).reshape(-1, 1)
            sample_dict = {
                'times': t,
                'trajectory': trajectory,
                'a': float(a_val),  # Convert to float for better serialization
                'c': float(c_val)   # Convert to float for better serialization
                ,'feature_dict': {"exponential_decay": 1, "Quadratic": 0}
                ,'expression': clean_expression(f"{c_val} * np.exp(-{a_val} * t)")
            }
            manual_samples.append(sample_dict)

    return manual_samples

In [6]:
def generate_quadratic_dictionaries(t, c_values, t0_values):

    manual_samples = []

    for c_val in c_values:
        for t0_val in t0_values:
            trajectory = (c_val / (t0_val - t)).reshape(-1, 1)
            sample_dict = {
                'times': t,
                'trajectory': trajectory,
                'a': float(t0_val),  # Convert to float for better serialization
                'c': float(c_val)   # Convert to float for better serialization
                ,'feature_dict': {"exponential_decay": 0, "Quadratic": 1}
                ,'expression': clean_expression(f"{c_val} / ({t0_val} - t)")
            }
            manual_samples.append(sample_dict)

    return manual_samples

In [7]:
#Specify time series and which constants to loop through.
t=np.linspace(1, 10, 50)

#c = np.linspace(-10,10, 21)
c = np.linspace(-10,10, 2)

#a = np.linspace(-10,10, 21)
a = np.linspace(-10,10, 2)

manual_samples = generate_exp_decay_dictionaries(t, c, a)

#c = np.linspace(-10,10, 21)
c = np.linspace(-10,10, 2)

#t0 = np.linspace(10.1, 100, 21)
t0 = np.linspace(10.1, 100, 2)

manual_samples = manual_samples + generate_quadratic_dictionaries(t, c, t0)

In [8]:
manual_samples[0]

{'times': array([ 1.        ,  1.18367347,  1.36734694,  1.55102041,  1.73469388,
         1.91836735,  2.10204082,  2.28571429,  2.46938776,  2.65306122,
         2.83673469,  3.02040816,  3.20408163,  3.3877551 ,  3.57142857,
         3.75510204,  3.93877551,  4.12244898,  4.30612245,  4.48979592,
         4.67346939,  4.85714286,  5.04081633,  5.2244898 ,  5.40816327,
         5.59183673,  5.7755102 ,  5.95918367,  6.14285714,  6.32653061,
         6.51020408,  6.69387755,  6.87755102,  7.06122449,  7.24489796,
         7.42857143,  7.6122449 ,  7.79591837,  7.97959184,  8.16326531,
         8.34693878,  8.53061224,  8.71428571,  8.89795918,  9.08163265,
         9.26530612,  9.44897959,  9.63265306,  9.81632653, 10.        ]),
 'trajectory': array([[-2.20264658e+05],
        [-1.38238356e+06],
        [-8.67585537e+06],
        [-5.44497695e+07],
        [-3.41727389e+08],
        [-2.14468508e+09],
        [-1.34600686e+10],
        [-8.44755474e+10],
        [-5.30169521e+11],
  

In [9]:
#index to name the different samples
i =0
for sample in manual_samples:
    sample_filename = f"sample_man_{i}.pt"
    sample_filepath = os.path.join(samples_path, sample_filename)
    os.makedirs(os.path.dirname(sample_filepath), exist_ok=True)
    # Save file on drive using pickle
    with open(sample_filepath, 'wb') as f:
      pickle.dump(sample, f)
    print(f"[INFO] Saved to {sample_filepath}")
    i+=1

[INFO] Saved to /content/drive/MyDrive/aisc/samples_manual_0/sample_man_0.pt
[INFO] Saved to /content/drive/MyDrive/aisc/samples_manual_0/sample_man_1.pt
[INFO] Saved to /content/drive/MyDrive/aisc/samples_manual_0/sample_man_2.pt
[INFO] Saved to /content/drive/MyDrive/aisc/samples_manual_0/sample_man_3.pt
[INFO] Saved to /content/drive/MyDrive/aisc/samples_manual_0/sample_man_4.pt
[INFO] Saved to /content/drive/MyDrive/aisc/samples_manual_0/sample_man_5.pt
[INFO] Saved to /content/drive/MyDrive/aisc/samples_manual_0/sample_man_6.pt
[INFO] Saved to /content/drive/MyDrive/aisc/samples_manual_0/sample_man_7.pt


**Load samples from drive, run them through network to generate activations, pickle the activations and save to drive**

In [10]:
from odeformer.model import SymbolicTransformerRegressor
dstr = SymbolicTransformerRegressor(from_pretrained=True)
model_args = {'beam_size': 50, 'beam_temperature': 0.1}
dstr.set_model_args(model_args)

Downloading pretrained model and saving to odeformer.pt


Downloading...
From (original): https://drive.google.com/uc?id=1L_UZ0qgrBVkRuhg5j3BQoGxlvMk_Pm1W
From (redirected): https://drive.google.com/uc?id=1L_UZ0qgrBVkRuhg5j3BQoGxlvMk_Pm1W&confirm=t&uuid=d2550677-8ac2-42fd-b616-0d79f76d2095
To: /content/odeformer.pt
100%|██████████| 465M/465M [00:13<00:00, 35.1MB/s]


Loaded pretrained model


In [11]:
layer_outputs = {}

# Function to store the output of each layer
def hook_fn(module, input, output, layer_name):
    layer_outputs[layer_name] = output.detach().cpu() #  detach to avoid unnecessary gradient tracking, and move to store in cpu

# Registering hooks for layers in the encoder and decoder
def register_hooks(model_part, part_name):
    for idx, module in enumerate(model_part.attentions):  # MultiHeadAttention layers
        layer_name = f"{part_name}_attention_{idx}"
        module.register_forward_hook(lambda module, input, output, name=layer_name: hook_fn(module, input, output, name))

    for idx, module in enumerate(model_part.ffns):  # FeedForward layers
        layer_name = f"{part_name}_ffn_{idx}"
        module.register_forward_hook(lambda module, input, output, name=layer_name: hook_fn(module, input, output, name))

    for idx, module in enumerate(model_part.layer_norm1):  # LayerNorm 1 layers
        layer_name = f"{part_name}_layer_norm1_{idx}"
        module.register_forward_hook(lambda module, input, output, name=layer_name: hook_fn(module, input, output, name))

    for idx, module in enumerate(model_part.layer_norm2):  # LayerNorm 2 layers
        layer_name = f"{part_name}_layer_norm2_{idx}"
        module.register_forward_hook(lambda module, input, output, name=layer_name: hook_fn(module, input, output, name))

# Registering hooks for the encoder and decoder parts
register_hooks(dstr.model.encoder, 'encoder')
register_hooks(dstr.model.decoder, 'decoder')

In [12]:
os.makedirs(activations_path, exist_ok=True)

samples_dir = os.fsencode(samples_path)
for sample in os.listdir(samples_dir):
  sample_name = os.fsdecode(sample)
  sample_path = os.path.join(samples_path, sample_name)
  with open(sample_path, 'rb') as f:
    test_sample = pickle.load(f)
  print(f"[INFO] Loaded sample from {sample_path}")
  with torch.no_grad():
    dstr.fit(test_sample['times'], test_sample['trajectory'])
  encoder_layer_outputs = {}
  decoder_layer_outputs = {}
  activations = {}

  for layer_name, output in layer_outputs.items():
      if 'ffn' in layer_name:
      # Look at ouputs of ffn layers since they come before layer norm.
      # Include the entire Residual stream (output[0:1, :, :] to grab first beam)
        if 'encoder' in layer_name:
          encoder_layer_outputs[layer_name] = output
        if 'decoder' in layer_name:
          decoder_layer_outputs[layer_name] = output
  #Save both encoder and decoder activations
  activations['encoder'] = encoder_layer_outputs
  activations['decoder'] = decoder_layer_outputs
  try:
    activations['operator_dict'] = test_sample['operator_dict']
  except KeyError:
    pass
  activations['feature_dict'] = test_sample['feature_dict']

  test_seed = re.findall(r'\d+', sample_name)[0]
  activation_filename = f"activation_{test_seed}.pt"
  activation_filepath = os.path.join(activations_path, activation_filename)
  with open(activation_filepath, 'wb') as f:
    pickle.dump(activations, f)
  print(f"[INFO] Saved activations to {activation_filepath}")

[INFO] Loaded sample from /content/drive/MyDrive/aisc/samples_manual_0/sample_man_0.pt
[INFO] Saved activations to /content/drive/MyDrive/aisc/activations_manual_0/activation_0.pt
[INFO] Loaded sample from /content/drive/MyDrive/aisc/samples_manual_0/sample_man_1.pt
[INFO] Saved activations to /content/drive/MyDrive/aisc/activations_manual_0/activation_1.pt
[INFO] Loaded sample from /content/drive/MyDrive/aisc/samples_manual_0/sample_man_2.pt
[INFO] Saved activations to /content/drive/MyDrive/aisc/activations_manual_0/activation_2.pt
[INFO] Loaded sample from /content/drive/MyDrive/aisc/samples_manual_0/sample_man_3.pt
[INFO] Saved activations to /content/drive/MyDrive/aisc/activations_manual_0/activation_3.pt
[INFO] Loaded sample from /content/drive/MyDrive/aisc/samples_manual_0/sample_man_4.pt
[INFO] Saved activations to /content/drive/MyDrive/aisc/activations_manual_0/activation_4.pt
[INFO] Loaded sample from /content/drive/MyDrive/aisc/samples_manual_0/sample_man_5.pt
[INFO] Saved 