In [1]:
# give colab permission to access drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# Update these as required
repo_path = '/content/drive/MyDrive/github/subteams/LLMProbing'
odeformer_path = '/content/drive/MyDrive/aisc' # This is because I cloned the odeformer repo into my aisc folder
samples_path = '/content/drive/MyDrive/github/subteams/LLMProbing/local_experiment_data/samples'
activations_path = '/content/drive/MyDrive/github/subteams/LLMProbing/local_experiment_data/activations'
probes_path = '/content/drive/MyDrive/github/subteams/LLMProbing/local_experiment_data/probes'

In [3]:
import sys
import importlib
sys.path.append(repo_path)
sys.path.append(odeformer_path)

In [4]:
import numpy as np
import os
import pickle

In [5]:
from odeformer.model import SymbolicTransformerRegressor
dstr = SymbolicTransformerRegressor(from_pretrained=True)
model_args = {'beam_size': 50, 'beam_temperature': 0.1}
dstr.set_model_args(model_args)

Found pretrained model at odeformer.pt
Loaded pretrained model


## Random Sample Generation

In [6]:
from src.sample_generation import RandomSamplesGenerator
# importlib.reload(RandomSamplesGenerator) # This doesn't work anymore since it's not reloading a module

In [7]:
operators_to_use = "id:1,add:1,mul:1,sin:0.5"
min_dimension = 1
max_dimension = 1
num_samples = 10

random_samples_path = f'{samples_path}/demo_random'

In [8]:
rsg = RandomSamplesGenerator(samples_path=random_samples_path, num_samples=num_samples, operators_to_use=operators_to_use, min_dimension=min_dimension, max_dimension=max_dimension)
# TODO: possibly restructure this class to give important parameters to generate_random_samples()
# Then we will be able to rerun generation with different parameters without having to instantiate a new instance of RSG
# Probably what is most useful is to specify samples_path only

In [9]:
rsg.generate_random_samples()

[INFO] Data generation complete. Saved 10 samples to /content/drive/MyDrive/github/subteams/LLMProbing/local_experiment_data/samples/demo_random


In [10]:
# Inspect a random sample to see its keys and some data
random_samples_dir = os.fsencode(random_samples_path)
for random_sample_file in os.listdir(random_samples_dir):
  random_sample_name = os.fsdecode(random_sample_file)
  random_sample_path = os.path.join(random_samples_path, random_sample_name)
  with open(random_sample_path, 'rb') as f:
      random_sample = pickle.load(f)
  print(random_sample.keys())
  print(f"Encoded equation: {random_sample['tree']}")
  print(f"Feature dictionary: {random_sample['feature_dict']}")
  break

dict_keys(['times', 'trajectory', 'tree_encoded', 'skeleton_tree_encoded', 'tree', 'skeleton_tree', 'infos', 'operator_dict', 'feature_dict'])
Encoded equation: -0.2069 * (x_0)**2
Feature dictionary: {'log': 0, 'exp': 0, 'tan': 0, 'arctan': 0, 'sin_cos': 0, 'arc_sin_cos': 0, 'pow2': 1, 'pow3': 0, 'inv': 0, 'sqrt': 0}


## Manual Sample Generation

In [11]:
from src.sample_generation import ManualSamplesGenerator

In [12]:
manual_samples_path = f'{samples_path}/demo_manual'
msg = ManualSamplesGenerator(samples_path=manual_samples_path)

In [13]:
# Times array
t_values = np.linspace(1, 10, 50)

# Exponential parameters
c_values = np.linspace(-10,10, 2)
a_values = np.linspace(-10,10, 5)

# Hyperbolic parameters
t0_values = np.linspace(10.1, 100, 5)

In [14]:
# Generate exponential samples
msg.generate_exponential_samples(t_values, c_values, a_values)

[INFO] Data generation complete. Saved 10 exponential samples to /content/drive/MyDrive/github/subteams/LLMProbing/local_experiment_data/samples/demo_manual


In [15]:
# Inspect an exponential sample to see its keys and some data
manual_samples_dir = os.fsencode(manual_samples_path)
for sample_file in os.listdir(manual_samples_dir):
  sample_name = os.fsdecode(sample_file)
  if "exp" in sample_name:
    exp_sample_path = os.path.join(manual_samples_path, sample_name)
    with open(exp_sample_path, 'rb') as f:
        exp_sample = pickle.load(f)
    print(exp_sample.keys())
    print(exp_sample['expression'])
    print(exp_sample['feature_dict'])
    break

dict_keys(['times', 'trajectory', 'parameters', 'feature_dict', 'expression'])
-10.0 * np.exp(10.0 * t)
{'exponential': 1, 'hyperbolic': 0}


In [16]:
# Generate hyperbolic samples
msg.generate_hyperbolic_samples(t_values, c_values, t0_values)

[INFO] Data generation complete. Saved 10 hyperbolic samples to /content/drive/MyDrive/github/subteams/LLMProbing/local_experiment_data/samples/demo_manual


In [17]:
# Inspect a hyperbolic sample to see its keys and some data
manual_samples_dir = os.fsencode(manual_samples_path)
for sample_file in os.listdir(manual_samples_dir):
  sample_name = os.fsdecode(sample_file)
  if "hyp" in sample_name:
    hyp_sample_path = os.path.join(manual_samples_path, sample_name)
    with open(hyp_sample_path, 'rb') as f:
        hyp_sample = pickle.load(f)
    print(hyp_sample.keys())
    print(hyp_sample['expression'])
    print(hyp_sample['parameters'])
    print(hyp_sample['feature_dict'])
    break

dict_keys(['times', 'trajectory', 'parameters', 'feature_dict', 'expression'])
-10.0 / (10.1-t)
{'t0': 10.1, 'c': -10.0}
{'exponential': 0, 'hyperbolic': 1}


## Corresponding Activations Extraction

In [18]:
from src.activation_extraction import ActivationsExtractor

In [19]:
random_activations_path = f'{activations_path}/demo_random'
manual_activations_path = f'{activations_path}/demo_manual'

In [20]:
act_extractor = ActivationsExtractor()

In [21]:
act_extractor.extract_activations(dstr, random_samples_path, random_activations_path, layers_to_extract=['ffn'])

Extracting Activations: 100%|██████████| 10/10 [00:51<00:00,  5.19s/it]

[INFO] Activation extraction complete. Activations saved to /content/drive/MyDrive/github/subteams/LLMProbing/local_experiment_data/activations/demo_random





In [24]:
act_extractor.extract_activations(dstr, manual_samples_path, manual_activations_path, layers_to_extract=['ffn'])

Extracting Activations:   5%|▌         | 1/20 [01:09<22:04, 69.69s/it]


InvalidParameterError: The 'y_pred' parameter of r2_score must be an array-like. Got None instead.

In [23]:
# Inspect a random activation and display its keys and some values
random_acts_dir = os.fsencode(random_activations_path)
for acts_file in os.listdir(random_acts_dir):
  acts_name = os.fsdecode(acts_file)
  random_acts_path = os.path.join(random_activations_path, acts_name)
  with open(random_acts_path, 'rb') as f:
      random_acts = pickle.load(f)
  print(random_acts.keys())
  print(random_acts['feature_dict'])
  print(random_acts['r2_score'])
  print(random_acts['expression'])
  print(random_acts['pred_expression'])
  break

dict_keys(['encoder', 'decoder', 'operator_dict', 'feature_dict', 'r2_score', 'pred_expression', 'expression'])
{'log': 0, 'exp': 0, 'tan': 0, 'arctan': 0, 'sin_cos': 0, 'arc_sin_cos': 0, 'pow2': 1, 'pow3': 0, 'inv': 0, 'sqrt': 0}
0.9991958011706891
0.5489 * (x_0)**2
x_0' = 0.5675 * (x_0)**2



In [None]:
# Inspect a manual activation and display its keys and some values
manual_acts_dir = os.fsencode(manual_activations_path)
for acts_file in os.listdir(manual_acts_dir):
  acts_name = os.fsdecode(acts_file)
  manual_acts_path = os.path.join(manual_activations_path, acts_name)
  with open(manual_acts_path, 'rb') as f:
      manual_acts = pickle.load(f)
  print(manual_acts.keys())
  print(manual_acts['feature_dict'])
  print(manual_acts['r2_score'])
  print(manual_acts['expression'])
  print(manual_acts['pred_expression'])
  break

## Probe Training

In [None]:
# TODO: add experiment runner file to make it easy to specify layers and features?
from experiments.run_experiment import separability_testing

In [None]:
target_feature = 'exponential'
# activations_path = manual_activations_path
demo_probes_path = f'{probes_path}/demo_exp'
lr = 0.01
num_epochs = 1
layers = [idx for idx in range(4, 16)]

In [None]:
demo_expt_results = separability_testing(target_feature=target_feature, activations_path=manual_activations_path, \
                     probes_path=demo_probes_path, \
                     lr=lr, num_epochs=num_epochs, \
                     layers=layers)

In [None]:
# TODO: view and summarise experiment results

## Probe Loading and Evaluation

In [None]:
# TODO: write extra loading functionality for running an experiment using pretrained
# TODO: extend failure detection functionality