In [None]:
import sys
import os
# import openai
from tqdm import tqdm
sys.path.append('./src')
os.environ["TOKENIZERS_PARALLELISM"] = "false"


In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from tqdm.auto import tqdm

from dataset import TextDataset 

import time

from CaSiNo import *
from common import *

tic, toc = (time.time, time.time)

In [None]:
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
model.cuda();
model.eval();

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [4]:
label_to_id = {
    "label1": 0,
    "label2": 1,
    "label3": 2,
    "label4": 3,
    "label5": 4,
    "label6": 5,
}


accuracy_dict = {}

directories = [
    [
        "dataset/desirem1_train/", 
        "dataset/desirem1_test/",
    ],
    [
        "dataset/desirem2_train/", 
        "dataset/desirem2_test/",
    ],
    [
        "dataset/beliefm1_train/", 
        "dataset/beliefm1_test/",
    ],
    [
        "dataset/beliefm2_train/", 
        "dataset/beliefm2_test/",
    ],
]


label_idfs = ["_desirem1_", "_desirem2_", "_beliefm1_", "_beliefm2_"]


label_to_ids = [label_to_id, label_to_id, label_to_id, label_to_id]

train_min_length, train_max_length = 1, 7
test_min_length, test_max_length = 1, 7

new_prompt_format=True
residual_stream=True
uncertainty = False
logistic = True
augmented = False
remove_last_ai_response = True
include_inst = True
one_hot = True
clssification = True


In [5]:
cmd = f'python ./src/CaSiNo.py 5 5 --flag'
!{cmd}

In [6]:
for directory, label_idf, label_to_id in zip(directories, label_idfs, label_to_ids):
    torch.cuda.empty_cache()
    # creating training and validation datasets
    train_dataset = TextDataset(directory[0], tokenizer, model, label_idf=label_idf, label_to_id=label_to_id,
                        convert_to_llama2_format=True, additional_datas=[], 
                        new_format=new_prompt_format,
                        residual_stream=residual_stream, if_augmented=augmented, 
                        remove_last_ai_response=remove_last_ai_response, include_inst=include_inst, k=1,
                        one_hot=False, last_tok_pos=-1, desc=f'Training dataset', classification=clssification)
    
    # converting datasets to a list of vectors 
    layers = model.config.num_hidden_layers
    X_train = {idx:[] for idx in [5, 15, 25]}
    for sample in range(len(train_dataset)):
        for idx in [5, 15, 25]:
            X_train[idx].append(train_dataset[sample]['hidden_states'][idx])
    y_train = [item[label_idf.strip('_')] for item in train_dataset]
    # print(X_train[1], y_train[1])
    # break 
    
    param_grid = {
    'classifier__C': [0.1, 1, 10],  # Regularization strengths
    'classifier__penalty': ['l1', 'l2', 'elasticnet'],  # Regularization types
    'classifier__solver': ['liblinear', 'saga', 'lbfgs']  # Solvers compatible with different penalties
    }

    
    
    pipelines = {}
    for layer in tqdm([5, 15, 25], total=3, desc=f'Training Probs for {[5, 15, 25]} ... ', leave=False):
        # best_pipeline, validation_metrics = train_and_tune_logistic_regression(X_train[layer], y_train, X_valid[layer], y_valid, {})
        best_pipeline = fit_logistic_regression_with_pca(X_train[layer], y_train)
        pipelines[layer] = best_pipeline

    # for test_length in range(test_min_length, test_max_length):
    # cmd = f'python ./src/CaSiNo.py 4 3'
    # !{cmd}
    
    test_dataset = TextDataset(directory[1], tokenizer, model, label_idf=label_idf, label_to_id=label_to_id,
                convert_to_llama2_format=True, additional_datas=[], 
                new_format=new_prompt_format,
                residual_stream=residual_stream, if_augmented=augmented, 
                remove_last_ai_response=remove_last_ai_response, include_inst=include_inst, k=1,
                one_hot=False, last_tok_pos=-1, desc=f'Test dataset', classification=clssification)

    X_test = {idx:[] for idx in [5, 15, 25]}
    for sample in range(len(test_dataset)):
        for idx in [5, 15, 25]:
            X_test[idx].append(test_dataset[sample]['hidden_states'][idx])
    y_test = [item[label_idf.strip('_')] for item in test_dataset]

    accuracy_array = {}
    for layer in tqdm([5, 15, 25], total=3, desc=f'Testing Probs for {[5, 15, 25]} ... ', leave=False):
        pred = pipelines[layer].predict(X_test[layer])
        # temp_acc = accuracy_score(y_test, pred)
        # print(f'Label {directory[0].split("_")[0].split("/")[-1]}, Train: {train_length}, Test: {test_length}, Accuracy: {temp_acc}')
        accuracy_array[layer] = {'pred':pred, 'act':y_test}
        
    accuracy_dict[f'{directory[0].split("_")[0].split("/")[-1]}'] = accuracy_array

Training dataset:   0%|          | 0/1012 [00:00<?, ?it/s]

Training Probs for [5, 15, 25] ... :   0%|          | 0/3 [00:00<?, ?it/s]

Test dataset:   0%|          | 0/117 [00:00<?, ?it/s]

Testing Probs for [5, 15, 25] ... :   0%|          | 0/3 [00:00<?, ?it/s]

Training dataset:   0%|          | 0/1012 [00:00<?, ?it/s]

Training Probs for [5, 15, 25] ... :   0%|          | 0/3 [00:00<?, ?it/s]

Test dataset:   0%|          | 0/117 [00:00<?, ?it/s]

Testing Probs for [5, 15, 25] ... :   0%|          | 0/3 [00:00<?, ?it/s]

Training dataset:   0%|          | 0/1012 [00:00<?, ?it/s]

Training Probs for [5, 15, 25] ... :   0%|          | 0/3 [00:00<?, ?it/s]

Test dataset:   0%|          | 0/117 [00:00<?, ?it/s]

Testing Probs for [5, 15, 25] ... :   0%|          | 0/3 [00:00<?, ?it/s]

Training dataset:   0%|          | 0/1012 [00:00<?, ?it/s]

Training Probs for [5, 15, 25] ... :   0%|          | 0/3 [00:00<?, ?it/s]

Test dataset:   0%|          | 0/117 [00:00<?, ?it/s]

Testing Probs for [5, 15, 25] ... :   0%|          | 0/3 [00:00<?, ?it/s]

In [8]:
for layer in [5, 15, 25]: 
    desirem1_act = accuracy_dict['desirem1'][layer]['act']
    desirem1_pred = accuracy_dict['desirem1'][layer]['pred']

    desirem2_act = accuracy_dict['desirem2'][layer]['act']
    desirem2_pred = accuracy_dict['desirem2'][layer]['pred']

    beliefm1_act = accuracy_dict['beliefm1'][layer]['act']
    beliefm1_pred = accuracy_dict['beliefm1'][layer]['pred']

    beliefm2_act = accuracy_dict['beliefm2'][layer]['act']
    beliefm2_pred = accuracy_dict['beliefm2'][layer]['pred']
    a1, a2, both = 0, 0, 0
    for idx in range(len(desirem1_act)):
        if (desirem1_act[idx]==desirem1_pred[idx]):
            a1+=1
        if (desirem2_act[idx]==desirem2_pred[idx]):
            a2+=1
        if (desirem1_act[idx]==desirem1_pred[idx]) and (desirem2_act[idx]==desirem2_pred[idx]):
            both+=1
    print(f"The accuracy for CaSiNo layer {layer}:\tBoth: {round(both/len(desirem1_act), 2)}\tUser: {round(a1/len(desirem1_act), 2)}\tAssistant: {round(a2/len(desirem1_act), 2)}")







The accuracy for CaSiNo layer 5:	Both: 0.03	User: 0.19	Assistant: 0.24
The accuracy for CaSiNo layer 15:	Both: 0.0	User: 0.13	Assistant: 0.15
The accuracy for CaSiNo layer 25:	Both: 0.04	User: 0.15	Assistant: 0.22
