In [1]:
import sys
sys.path.append("..")

In [None]:
from models.modeling_gpt2 import GPT2Model
from tree_projection import TreeProjection
import torch
from transformers import GPT2Tokenizer
from random import shuffle

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# Load GPT-2 model and tokenizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token_id = tokenizer.eos_token_id
model = GPT2Model.from_pretrained("gpt2").to(device)

# Get the number of layers dynamically
num_layers = model.config.n_layer if hasattr(model.config, "n_layer") else model.config.num_hidden_layers

# Example input sentences
sentences = [
    "The quick brown fox jumps over the lazy dog.",
    "Barking dogs and screaming toddlers have the unique ability to turn friendly neighbors into cranky enemies.",
    "The fish dreamed of escaping the fishbowl and into the toilet where he saw his friend go.",
    "The most exciting eureka moment I've had was when I realized that the instructions on food packets were just guidelines.",
    "This is a complex linguistic test.",
    "His seven-layer cake only had six layers.",
    "There were white out conditions in the town; subsequently, the roads were impassable.",
    "The cat sat on the mat, but the dog preferred the couch.",
    "She sells sea shells by the sea shore.",
    "A journey of a thousand miles begins with a single step."
]

# Define hyperparameters
num_runs = 5
st_threshold_values = range(1, num_layers + 1)  # Sweeping over layers
layer_values = range(1, num_layers + 1)  # Sweeping over layer IDs

# Store results
results = {}

for layer_id in layer_values:
    for st_threshold in st_threshold_values:

        if st_threshold >= layer_id:
            continue
        
        total_real_score = 0
        total_random_score = 0
        num_sentences = len(sentences)

        for sentence in sentences:
            tokenized = tokenizer([sentence], return_tensors="pt", padding=True, truncation=True)
            input_ids = tokenized.input_ids[0].tolist()
            attention_mask = tokenized.attention_mask[0].tolist()

            # Initialize TreeProjection class
            tree_projector = TreeProjection(model=model)

            # Compute SCI chart for real sentence
            for _ in range(num_runs):
                sci_chart_real = tree_projector.compute_sci_chart(
                    input_ids,
                    attention_mask,
                    st_threshold=st_threshold,
                    layer_id=layer_id
                )

                parse_real, score_real = tree_projector(
                    sci_chart=sci_chart_real,
                    input_ids=input_ids,
                    projection_algorithm="dp"
                )
                total_real_score += score_real

            # Compute SCI chart for randomized sentence
            for _ in range(num_runs):
                shuffled_input_ids = input_ids.copy()
                shuffle(shuffled_input_ids)

                sci_chart_random = tree_projector.compute_sci_chart(
                    shuffled_input_ids,
                    attention_mask,
                    st_threshold=st_threshold,
                    layer_id=layer_id
                )

                parse_random, score_random = tree_projector(
                    sci_chart=sci_chart_random,
                    input_ids=shuffled_input_ids,
                    projection_algorithm="dp"
                )
                total_random_score += score_random
        
        # Average over all sentences
        avg_real_score = total_real_score / (num_sentences * num_runs)
        avg_random_score = total_random_score / (num_sentences * num_runs)
        avg_score_difference = avg_real_score - avg_random_score

        # Store the averaged results for this (layer_id, st_threshold) pair
        results[(layer_id, st_threshold)] = avg_score_difference

# Print all results
print("Layer ID | st_threshold | Avg Score Difference")
print("----------------------------------------------")
for (layer_id, st_threshold), score_diff in sorted(results.items(), key=lambda x: x[1], reverse=True):
    print(f"{layer_id:8d} | {st_threshold:12d} | {score_diff:.4f}")

# Find the best (layer_id, st_threshold) combination
best_layer, best_st_threshold = max(results, key=results.get)
best_difference = results[(best_layer, best_st_threshold)]

# Print best result
print(f"\nBest layer: {best_layer}, Best st_threshold: {best_st_threshold}, Max Avg Difference: {best_difference:.4f}")

Layer ID | st_threshold | Avg Score Difference
----------------------------------------------
      11 |            4 | 0.2637
       6 |            2 | 0.1059
       8 |            1 | 0.1030
       3 |            2 | 0.0944
       8 |            4 | 0.0901
      10 |            1 | 0.0863
       9 |            6 | 0.0596
       9 |            4 | 0.0541
      11 |            3 | 0.0366
      10 |            2 | 0.0361
       4 |            2 | 0.0337
       3 |            1 | 0.0314
       9 |            2 | 0.0239
      10 |            3 | 0.0219
       7 |            2 | 0.0208
      10 |            5 | 0.0190
       5 |            3 | 0.0090
      11 |           10 | 0.0081
       4 |            3 | 0.0070
       6 |            3 | 0.0069
       8 |            6 | 0.0031
       5 |            4 | 0.0013
       6 |            4 | -0.0002
       7 |            5 | -0.0025
       7 |            4 | -0.0029
       5 |            2 | -0.0044
       9 |            8 | -0.0046
       9 |