# Setup

In [None]:
%pip install numpy==1.23.5
%pip install torch==2.0.0
# Added because the default colab instance now has a newer version of torch and numpy
# This ended up breaking the following lines below:
# dstr = SymbolicTransformerRegressor(from_pretrained=True)
# This is because PyTorch changed how it loads pickled files
# act_extractor.extract_activations(dstr, r2_samples_path, r2_activations_path, layers_to_extract=['ffn'])
# This is because when fitting the odeformer, it uses an old version of numpy infinity (np.infty instead of np.inf)

Collecting torch==2.0.0
  Using cached torch-2.0.0-cp311-cp311-manylinux1_x86_64.whl.metadata (24 kB)
Collecting nvidia-cuda-nvrtc-cu11==11.7.99 (from torch==2.0.0)
  Using cached nvidia_cuda_nvrtc_cu11-11.7.99-2-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu11==11.7.99 (from torch==2.0.0)
  Using cached nvidia_cuda_runtime_cu11-11.7.99-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cuda-cupti-cu11==11.7.101 (from torch==2.0.0)
  Using cached nvidia_cuda_cupti_cu11-11.7.101-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu11==8.5.0.96 (from torch==2.0.0)
  Using cached nvidia_cudnn_cu11-8.5.0.96-2-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu11==11.10.3.66 (from torch==2.0.0)
  Using cached nvidia_cublas_cu11-11.10.3.66-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cufft-cu11==10.9.0.58 (from torch==2.0.0)
  Using cached nvidia_cufft_cu11-10.9.0.58-py3-n

In [None]:
# give colab permission to access drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Update these as required
# repo_path = '/content/drive/MyDrive/github/subteams/LLMProbing'
# odeformer_path = '/content/drive/MyDrive/aisc' # This is because I cloned the odeformer repo into my aisc folder
# samples_path = '/content/drive/MyDrive/github/subteams/LLMProbing/local_experiment_data/samples'
# activations_path = '/content/drive/MyDrive/github/subteams/LLMProbing/local_experiment_data/activations'
# probes_path = '/content/drive/MyDrive/github/subteams/LLMProbing/local_experiment_data/probes'

In [None]:
# replace with your drive paths to MIVLE repo, odeformer repo, and shared probing_data folder
repo_path = '/content/drive/MyDrive/aisc/repos/MIVLDE/subteams/LLMProbing'
odeformer_path = '/content/drive/MyDrive/aisc/repos/odeformer'
samples_path = '/content/drive/MyDrive/aisc/probing_data/samples/'
activations_path = '/content/drive/MyDrive/aisc/probing_data/activations/'
probes_path = '/content/drive/MyDrive/aisc/probing_data/probes/'

In [None]:
import sys
import importlib
sys.path.append(repo_path)
sys.path.append(odeformer_path)

In [None]:
import numpy as np
import os
import pickle

In [None]:
from odeformer.model import SymbolicTransformerRegressor
dstr = SymbolicTransformerRegressor(from_pretrained=True)
model_args = {'beam_size': 10, 'beam_temperature': 0.8}
dstr.set_model_args(model_args)

Downloading pretrained model and saving to odeformer.pt


Downloading...
From (original): https://drive.google.com/uc?id=1L_UZ0qgrBVkRuhg5j3BQoGxlvMk_Pm1W
From (redirected): https://drive.google.com/uc?id=1L_UZ0qgrBVkRuhg5j3BQoGxlvMk_Pm1W&confirm=t&uuid=af52030d-a785-4eb5-b2e9-5d0bd89ec2d9
To: /content/drive/MyDrive/aisc/probing_data/activations/r2_expt/odeformer.pt
100%|██████████| 465M/465M [00:10<00:00, 44.4MB/s]


Loaded pretrained model


In [None]:
%load_ext autoreload
%autoreload 2

# Sample Generation

In [None]:
from src.sample_generation import RandomSamplesGenerator

In [None]:
operators_to_use = "id:1,add:1,mul:1,sub:1,inv:1,pow2:1,pow3:1,exp:1,log:1,sin:1,cos:1,tan:1,arcsin:1,arccos:1,arctan:1"
min_dimension = 1
max_dimension = 1
num_samples = 800
seed = 42
sample_descriptor = 'r2'

r2_samples_path = f'{samples_path}/r2_expt'

In [None]:
rsg = RandomSamplesGenerator()

In [None]:
rsg.generate_random_samples(r2_samples_path, seed=seed, num_samples=num_samples, \
                            operators_to_use=operators_to_use, min_dimension=min_dimension, \
                            max_dimension=max_dimension, sample_descriptor='r2')

Generating r2 samples: 100%|██████████| 800/800 [26:40<00:00,  2.00s/it]


[INFO] Data generation complete. Saved 800 r2 samples to /content/drive/MyDrive/aisc/probing_data/samples//r2_expt





# Activation Extraction

In [None]:
from src.activation_extraction import ActivationsExtractor

In [None]:
r2_activations_path = f'{activations_path}/r2_expt'

In [None]:
act_extractor = ActivationsExtractor()

In [None]:
act_extractor.extract_activations(dstr, r2_samples_path, r2_activations_path, layers_to_extract=['ffn'])

Extracting Activations:   2%|▏         | 13/800 [01:47<2:08:55,  9.83s/it]


nan in trajectory of sample r2_249467210


Extracting Activations:   2%|▏         | 14/800 [02:02<2:28:24, 11.33s/it]


nan in trajectory of sample r2_898717130


Extracting Activations:  12%|█▏        | 93/800 [07:59<1:48:31,  9.21s/it]


nan in trajectory of sample r2_200619113


Extracting Activations:  17%|█▋        | 137/800 [11:21<1:33:48,  8.49s/it]


nan in trajectory of sample r2_853477355


Extracting Activations:  18%|█▊        | 148/800 [12:19<1:40:41,  9.27s/it]


nan in trajectory of sample r2_455094650


Extracting Activations:  20%|█▉        | 158/800 [13:14<1:38:30,  9.21s/it]


nan in trajectory of sample r2_856634123


Extracting Activations:  27%|██▋       | 219/800 [17:40<41:04,  4.24s/it]


nan in trajectory of sample r2_195995197


Extracting Activations:  31%|███       | 246/800 [19:45<1:18:01,  8.45s/it]


nan in trajectory of sample r2_475584232


Extracting Activations:  31%|███       | 247/800 [20:09<2:01:38, 13.20s/it]


nan in trajectory of sample r2_235283714


Extracting Activations:  35%|███▌      | 283/800 [22:59<1:29:42, 10.41s/it]


nan in trajectory of sample r2_221099573


Extracting Activations:  50%|█████     | 402/800 [32:15<58:56,  8.89s/it]


nan in trajectory of sample r2_755593187


Extracting Activations:  72%|███████▏  | 574/800 [45:24<23:09,  6.15s/it]


nan in trajectory of sample r2_70009467


Extracting Activations:  84%|████████▍ | 674/800 [52:47<13:24,  6.39s/it]


nan in trajectory of sample r2_218825555


Extracting Activations:  90%|████████▉ | 716/800 [56:03<06:56,  4.96s/it]


nan in trajectory of sample r2_507514051


Extracting Activations: 100%|█████████▉| 799/800 [1:02:28<00:07,  7.48s/it]


nan in trajectory of sample r2_822617220


Extracting Activations: 100%|██████████| 800/800 [1:02:34<00:00,  4.69s/it]


[INFO] Activation extraction complete. Activations saved to /content/drive/MyDrive/aisc/probing_data/activations//r2_expt





# Probe Training

In [None]:
import experiments

In [None]:
r2_probes_path = f'{probes_path}/r2_expt'
lr = 0.01
num_epochs = 30
num_repeats = 3

In [None]:
r2_expt_results = experiments.r2_prediction_experiment(r2_activations_path, r2_probes_path, \
                                                       lr=lr, num_epochs=num_epochs, \
                                                       num_repeats=num_repeats)
# save experiment results
with open(f'{r2_probes_path}/experiment_data.pt', 'wb') as f:
    pickle.dump(r2_expt_results, f)

Repeat 0



Training LR Probe:   0%|          | 0/30 [00:00<?, ?it/s]
Training LR Probe:   3%|▎         | 1/30 [00:06<03:13,  6.66s/it]
Training LR Probe:   7%|▋         | 2/30 [00:13<03:06,  6.65s/it]
Training LR Probe:  10%|█         | 3/30 [00:20<03:01,  6.73s/it]
Training LR Probe:  13%|█▎        | 4/30 [00:26<02:53,  6.67s/it]
Training LR Probe:  17%|█▋        | 5/30 [00:33<02:46,  6.64s/it]
Training LR Probe:  20%|██        | 6/30 [00:40<02:47,  6.98s/it]
Training LR Probe:  23%|██▎       | 7/30 [00:49<02:54,  7.59s/it]
Training LR Probe:  27%|██▋       | 8/30 [00:56<02:39,  7.23s/it]
Training LR Probe:  30%|███       | 9/30 [01:02<02:28,  7.07s/it]
Training LR Probe:  33%|███▎      | 10/30 [01:09<02:15,  6.80s/it]
Training LR Probe:  37%|███▋      | 11/30 [01:16<02:09,  6.82s/it]
Training LR Probe:  40%|████      | 12/30 [01:22<01:59,  6.63s/it]
Training LR Probe:  43%|████▎     | 13/30 [01:29<01:54,  6.74s/it]
Training LR Probe:  47%|████▋     | 14/30 [01:35<01:46,  6.63s/it]
Training LR 


Training Set (Epoch 30 - Final): Loss 6.190443846414055e+21
Regression probe trained on layer -1: Test Set Loss 567805.516413762
Saved state dictionary to /content/drive/MyDrive/aisc/probing_data/probes//r2_expt/probe_r2_0.pt
Repeat 1



Training LR Probe:   0%|          | 0/30 [00:00<?, ?it/s]
Training LR Probe:   3%|▎         | 1/30 [00:07<03:29,  7.22s/it]
Training LR Probe:   7%|▋         | 2/30 [00:13<03:07,  6.68s/it]
Training LR Probe:  10%|█         | 3/30 [00:20<03:03,  6.79s/it]
Training LR Probe:  13%|█▎        | 4/30 [00:26<02:50,  6.54s/it]
Training LR Probe:  17%|█▋        | 5/30 [00:33<02:48,  6.72s/it]
Training LR Probe:  20%|██        | 6/30 [00:39<02:37,  6.55s/it]
Training LR Probe:  23%|██▎       | 7/30 [00:46<02:33,  6.67s/it]
Training LR Probe:  27%|██▋       | 8/30 [00:53<02:24,  6.55s/it]
Training LR Probe:  30%|███       | 9/30 [00:59<02:17,  6.56s/it]
Training LR Probe:  33%|███▎      | 10/30 [01:06<02:11,  6.59s/it]
Training LR Probe:  37%|███▋      | 11/30 [01:12<02:04,  6.53s/it]
Training LR Probe:  40%|████      | 12/30 [01:19<01:57,  6.51s/it]
Training LR Probe:  43%|████▎     | 13/30 [01:25<01:50,  6.48s/it]
Training LR Probe:  47%|████▋     | 14/30 [01:32<01:45,  6.59s/it]
Training LR 


Training Set (Epoch 30 - Final): Loss 6.190443846414055e+21
Regression probe trained on layer -1: Test Set Loss 604725.3987403283
Saved state dictionary to /content/drive/MyDrive/aisc/probing_data/probes//r2_expt/probe_r2_1.pt
Repeat 2



Training LR Probe:   0%|          | 0/30 [00:00<?, ?it/s]
Training LR Probe:   3%|▎         | 1/30 [00:06<03:10,  6.57s/it]
Training LR Probe:   7%|▋         | 2/30 [00:13<03:06,  6.67s/it]
Training LR Probe:  10%|█         | 3/30 [00:20<03:01,  6.74s/it]
Training LR Probe:  13%|█▎        | 4/30 [00:26<02:56,  6.78s/it]
Training LR Probe:  17%|█▋        | 5/30 [00:33<02:48,  6.73s/it]
Training LR Probe:  20%|██        | 6/30 [00:40<02:40,  6.70s/it]
Training LR Probe:  23%|██▎       | 7/30 [00:47<02:35,  6.75s/it]
Training LR Probe:  27%|██▋       | 8/30 [00:53<02:25,  6.61s/it]
Training LR Probe:  30%|███       | 9/30 [01:00<02:22,  6.78s/it]
Training LR Probe:  33%|███▎      | 10/30 [01:07<02:14,  6.71s/it]
Training LR Probe:  37%|███▋      | 11/30 [01:14<02:08,  6.78s/it]
Training LR Probe:  40%|████      | 12/30 [01:20<02:00,  6.67s/it]
Training LR Probe:  43%|████▎     | 13/30 [01:27<01:55,  6.81s/it]
Training LR Probe:  47%|████▋     | 14/30 [01:33<01:46,  6.67s/it]
Training LR 


Training Set (Epoch 30 - Final): Loss 6.190443846414055e+21
Regression probe trained on layer -1: Test Set Loss 592781.9946179512
Saved state dictionary to /content/drive/MyDrive/aisc/probing_data/probes//r2_expt/probe_r2_2.pt


In [None]:
r2_expt_results

Unnamed: 0,layer,run,test_loss,final_train_loss,final_val_loss
0,15,0,567805.516414,6.190444e+21,64917930000.0
1,15,1,604725.39874,6.190444e+21,64916390000.0
2,15,2,592781.994618,6.190444e+21,64916860000.0


# Data inspection

In [None]:
from src.datasets.activations_dataset import R2ActivationsDataset

In [None]:
r2_acts = R2ActivationsDataset(r2_activations_path)

In [None]:
for act, label, id in r2_acts:
  print(label)

tensor(0.9982)
tensor(1.0000)
tensor(0.9016)
tensor(0.9999)
tensor(0.9996)
tensor(0.9995)
tensor(0.9999)
tensor(0.5746)
tensor(0.9995)
tensor(0.9832)
tensor(-0.2268)
tensor(0.9994)
tensor(0.9999)
tensor(0.9995)
tensor(0.9834)
tensor(0.9999)
tensor(0.9804)
tensor(0.9990)
tensor(0.9800)
tensor(0.9998)
tensor(1.0000)
tensor(1.0000)
tensor(0.9989)
tensor(0.9893)
tensor(0.9996)
tensor(0.9999)
tensor(0.9927)
tensor(0.9991)


In [None]:
from src.datasets import ActivationsDataset

In [None]:
r2_acts_ds = ActivationsDataset(r2_activations_path, feature_label='r2_score', layer_idx=-1)
for act, label, id in r2_acts_ds:
  print(label)

tensor(0.9982)
tensor(1.0000)
tensor(0.9016)
tensor(0.9999)
tensor(0.9996)
tensor(0.9995)
tensor(0.9999)
tensor(0.5746)
tensor(0.9995)
tensor(0.9832)
tensor(-0.2268)
tensor(0.9994)
tensor(-inf)
tensor(-inf)
tensor(0.9999)
tensor(0.9995)
tensor(0.9834)
tensor(0.9999)
tensor(0.9804)
tensor(0.9990)
tensor(0.9800)
tensor(0.9998)
tensor(1.0000)
tensor(1.0000)
tensor(0.9989)
tensor(0.9893)
tensor(0.9996)
tensor(0.9999)
tensor(0.9927)
tensor(0.9991)
