In [9]:
import sys
sys.path.append("..")

In [18]:
from utils import (
    CHECKPOINT_PATH,
    BABYLM_DATA_PATH,
    gpt2_original_tokenizer,
    gpt2_hop_tokenizer,
)

In [46]:
from models.modeling_gpt2 import GPT2Model
from tree_projection import TreeProjection
from random import shuffle
from tqdm import tqdm
from glob import glob
import torch
import numpy as np

In [22]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [62]:
run_name = "control_agreement_geom_attn"
random_seed = 53
perturbation_type = "shuffle_nondeterministic"
train_set = "100M"
ckpt = 3000
EOS_TOKEN = gpt2_original_tokenizer.eos_token_id

FILE_SAMPLE_SIZE = 1000
MAX_SEQ_LEN = 1024

# Get path to model
model = f"{run_name}_seed{random_seed}"
model_path = f"{CHECKPOINT_PATH}/{perturbation_type}_{train_set}/{model}/checkpoints/checkpoint-"

model_path = "/nlp/scr3/nlp/llms-in-llms/babylm_models/babylm_shuffle_nondeterministic_100M_randinit/babylm_shuffle_nondeterministic_100M_randinit_seed53/runs/babylm_shuffle_nondeterministic_100M_randinit_seed53/checkpoint-"
model = GPT2Model.from_pretrained(model_path + str(ckpt)).to(device)

In [63]:
test_files = sorted(glob(BABYLM_DATA_PATH +
    "/babylm_data_perturbed/babylm_{}/babylm_test_affected/*".format(perturbation_type)))

In [64]:
rng = np.random.default_rng(random_seed)

samples = []
for test_file in test_files:
    print(test_file)

    # Get tokens from test file (+ eos token), and subsample
    f = open(test_file, 'r')
    file_token_sequences = [
        [int(s) for s in l.split()] + [EOS_TOKEN] for l in f.readlines()]
    file_token_sequences = [
        toks for toks in file_token_sequences if len(toks) < MAX_SEQ_LEN]
    sample_indices = rng.choice(
        list(range(len(file_token_sequences))), FILE_SAMPLE_SIZE, replace=False)
    file_token_sequences = [file_token_sequences[i]
                            for i in sample_indices]
    samples.extend(file_token_sequences)

# Shuffle samples
shuffle(samples)

/nlp/scr3/nlp/llms-in-llms/mission-impossible/babylm_data/babylm_data_perturbed/babylm_shuffle_nondeterministic/babylm_test_affected/aochildes_affected.test
/nlp/scr3/nlp/llms-in-llms/mission-impossible/babylm_data/babylm_data_perturbed/babylm_shuffle_nondeterministic/babylm_test_affected/bnc_spoken_affected.test
/nlp/scr3/nlp/llms-in-llms/mission-impossible/babylm_data/babylm_data_perturbed/babylm_shuffle_nondeterministic/babylm_test_affected/cbt_affected.test
/nlp/scr3/nlp/llms-in-llms/mission-impossible/babylm_data/babylm_data_perturbed/babylm_shuffle_nondeterministic/babylm_test_affected/children_stories_affected.test
/nlp/scr3/nlp/llms-in-llms/mission-impossible/babylm_data/babylm_data_perturbed/babylm_shuffle_nondeterministic/babylm_test_affected/gutenberg_affected.test
/nlp/scr3/nlp/llms-in-llms/mission-impossible/babylm_data/babylm_data_perturbed/babylm_shuffle_nondeterministic/babylm_test_affected/open_subtitles_affected.test
/nlp/scr3/nlp/llms-in-llms/mission-impossible/babyl

10000

In [66]:
tree_projector = TreeProjection(model=model)

all_scores = []
for input_ids in tqdm(samples[:1000], desc="Computing Tree Metric"):
            
    sci_chart = tree_projector.compute_sci_chart(
        input_ids,
        [1] * len(input_ids),  # all tokens are visible
        st_threshold=4,
        layer_id=11
    )

    _, score = tree_projector(
        sci_chart=sci_chart,
        input_ids=input_ids,
        projection_algorithm="dp"
    )

    all_scores.append(score)

mean_value = np.mean(all_scores)
std_dev_value = np.std(all_scores)

print(f"Tree Metric: {mean_value}±{std_dev_value}")


Computing Tree Metric: 100%|██████████| 1000/1000 [05:56<00:00,  2.80it/s]

Tree Metric: 0.11925368756055832±0.11543823033571243



