In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import pickle
import random
import numpy as np
import copy
import json
from tqdm import tqdm


In [2]:
from Helper_Functions import set_seed
from Dataset_Generation import Generate_LLM_Eval_Intervention_Data
from LLM_Model import (make_model,
                       LLM_Criterion)
from RevNet import RevNet
from Rotation_Model import Rotation
from DAS import phi_class
from DAS_LLM import Distributed_Alignment_Search_LLM
from transformers import AutoTokenizer

In [3]:

model_config = {"model"   : "meta-llama/Llama-3.2-1B",
                "Trained" :                         2} 
#Trained=0:pretrained, 1:fully randomized, 2:only randomize llm head, 3: only randomize embedding, 4: randomize linked embedding and lm head

DEVICE        = "cuda:0" #"cuda:0" #"cuda"/"cpu"

"""
transformation_config = {"type"        : "Rotation",
                         "in_features" :       2048}
"""
transformation_config = {"type"          : "RevNet",
                         "number_blocks" :       10,
                         "in_features"   :     2048,
                         "hidden_size"   :       16}

Max_Epochs                       = 100 #4 #1 #50
Early_Stopping_Epochs            = 100 #4 #1 #50
early_stopping_improve_threshold = 0.001
LLM_test_samples                 = 1600
Intervention_train_size          = 1280000//8
Intervention_eval_size           = 1600
Intervention_test_size           = 1600
learning_rate                    = 0.0001#0.000001
ReduceLROnPlateau_patience       = 10

In [4]:
tokenizer = AutoTokenizer.from_pretrained(model_config["model"])
tokenizer.pad_token = tokenizer.eos_token
LLM_test_data,DAS_Train,DAS_Eval,DAS_Test=Generate_LLM_Eval_Intervention_Data(filename='./mecha_ioi_200k.parquet',
                                                                              tokenizer=tokenizer,
                                                                              LLM_test_samples=LLM_test_samples,
                                                                              Intervention_train_size=Intervention_train_size,
                                                                              Intervention_eval_size=Intervention_eval_size,
                                                                              Intervention_test_size=Intervention_test_size)


100%|███████████████████████████████| 200000/200000 [00:00<00:00, 328566.62it/s]
100%|█████████████████████████████████████| 1600/1600 [00:00<00:00, 2185.53it/s]
100%|█████████████████████████████████| 160000/160000 [01:03<00:00, 2514.76it/s]
100%|█████████████████████████████████████| 1600/1600 [00:00<00:00, 2839.37it/s]
100%|█████████████████████████████████████| 1600/1600 [00:00<00:00, 2833.16it/s]


In [None]:
results=[]
for acseed in [4287]:
    results.append({})
    set_seed(acseed)
    model,accuracy=make_model(model_config["model"],LLM_test_data,model_config["Trained"],device=DEVICE)
    Layers=[]
    #Layers.append(("Layer7",model.model.layers[7]))
    Layers.append(("Layer9",model.model.layers[9]))
    #Layers.append(("Layer15",model.model.layers[15]))
    inter_dims=[]
    inter_dims.append([list(range(0,transformation_config["in_features"]//2))])
    #inter_dims.append([list(range(0,transformation_config["in_features"]//64))])
    #inter_dims.append([list(range(0,1))])
    
    results[-1]["accuracy"]=accuracy
    for LayerName,Layer in Layers:
        results[-1][LayerName]={}
        for inter_dim in inter_dims:
            print(LayerName,":",len(inter_dim[0]), flush=True)
            #Initialize transformation function
            
            #Initialize transformation function
            if transformation_config["type"]=="Rotation":
                p = Rotation(transformation_config["in_features"])
            elif transformation_config["type"]=="RevNet":
                p = RevNet(number_blocks =  transformation_config["number_blocks"],
                           in_features   =  transformation_config["in_features"],
                           hidden_size   =  transformation_config["hidden_size"]
                          )
            else:
                Exception("Unknown transformation function")
            p.to(DEVICE)
            p_inverse = p.inverse
            optimizer = optim.Adam(p.parameters(), lr=learning_rate)
            scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, patience=ReduceLROnPlateau_patience)
            criterion = LLM_Criterion
            
            
            phi=phi_class(p,p_inverse,LLM_Criterion,optimizer,scheduler)

            
    
            DAS_Experiment=Distributed_Alignment_Search_LLM(Model=model,
                                                            Model_Layer=Layer,
                                                            Train_Data_Raw=DAS_Train,
                                                            Test_Data_Raw=DAS_Test,
                                                            Eval_Data_Raw=DAS_Eval,
                                                            Hidden_Layer_Size=transformation_config["in_features"],
                                                            Variable_Dimensions=inter_dim,
                                                            Transformation_Class=phi,
                                                            Device=DEVICE,
                                                            tokenizer=tokenizer)
    
            DAS_Experiment.train_test(batch_size=16,
                                      epochs=Max_Epochs,
                                      mode=1,
                                      early_stopping_threshold=Early_Stopping_Epochs,
                                      early_stopping_improve_threshold=early_stopping_improve_threshold,
                                      verbose=True) #Train
    
            accuracy=DAS_Experiment.train_test(batch_size=32,
                                               mode=2,
                                               verbose=True)#Test
            results[-1][LayerName][str(inter_dim)]=accuracy
            DAS_Experiment.Cleanup()
            DAS_Experiment=None
            with open('results.json', 'w') as f:
                json.dump(results, f)

100%|███████████████████████████████████████| 1600/1600 [00:32<00:00, 48.82it/s]

Test Accuracy: 0.4556
Layer9 : 1024





Training:


  0%|                                                 | 0/10000 [00:00<?, ?it/s]You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
100%|███████████████████████████████████| 10000/10000 [1:12:43<00:00,  2.29it/s]


Eval:


100%|█████████████████████████████████████████| 100/100 [00:35<00:00,  2.79it/s]

Epoch 1, Loss: 0.7313303010463714 steps without improvement: 0 eval accuracy: 0.49625 best eval accuracy: 0.49625 learning rate: 0.0001





Training:


 12%|████▎                               | 1198/10000 [09:06<1:06:06,  2.22it/s]