In [34]:
# For importing utils
import sys
sys.path.append("..")

from models.modeling_gpt2 import GPT2LMHeadModel
import torch
from utils import (
    CHECKPOINT_PATH,
    gpt2_hop_tokenizer,
    compute_surprisals,
)

In [35]:
def load_model(run_name, random_seed, perturbation_type, train_set, ckpt, device="cuda"):
    """Load the GPT-2 model from a checkpoint."""
    model_name = f"{run_name}_seed{random_seed}"
    model_path = f"{CHECKPOINT_PATH}/{perturbation_type}_{train_set}/{model_name}/checkpoints/checkpoint-{ckpt}"
    model = GPT2LMHeadModel.from_pretrained(model_path).to(device)
    model.eval()
    return model

def analyze_sentences(model, test_sentences, device="cuda"):
    """Load model, tokenize sentences, compute surprisals, and print results."""
    # Load model and tokenizer
    tokenizer = gpt2_hop_tokenizer
    tokenizer.pad_token = tokenizer.eos_token
    
    # Tokenize input sentences
    input_ids = tokenizer(
        test_sentences,
        padding=True,
    ).input_ids

    input_ids = [[tokenizer.bos_token_id] + ids for ids in input_ids]
    tokenized_inputs = torch.tensor(input_ids).to(device)
    
    # Compute surprisals
    surprisal_results = compute_surprisals(model, tokenized_inputs)
    
    # Convert token IDs to words
    zipped_results = [
        list(zip(tokenizer.convert_ids_to_tokens(seq), surprisals))
        for seq, surprisals in zip(tokenized_inputs.tolist(), surprisal_results)
    ]
    
    # Print results
    for i, sentence in enumerate(test_sentences):
        print(f"Sentence:\t{sentence}")
        print("Token\t\tSurprisal")
        print("-" * 30)
        for token, surprisal in zipped_results[i]:
            s = round(surprisal, 2) if surprisal is not None else "N/A"
            print(f"{token:<10}\t{s}")  # Left-align tokens, tab-separated
        print("\n" + "-" * 50 + "\n")  # Separator between sentences


In [36]:
sentences = [
    "The cat is on the mat",
    "The cat is on the hat",
    "The cat is on the pizza",
    "The pizza is on the mat",
    "I told you that the cat is on the mat",
    "I told you the cat is on the mat",
]

model = GPT2LMHeadModel.from_pretrained("gpt2").to("cuda")
analyze_sentences(
    model=model,
    test_sentences=sentences
)

Sentence:	The cat is on the mat
Token		Surprisal
------------------------------
<|endoftext|>	N/A
The       	4.73
Ġcat      	13.3
Ġis       	3.55
Ġon       	5.98
Ġthe      	1.39
Ġmat      	10.44
<|endoftext|>	10.56
<|endoftext|>	18.64
<|endoftext|>	19.17
<|endoftext|>	19.48

--------------------------------------------------

Sentence:	The cat is on the hat
Token		Surprisal
------------------------------
<|endoftext|>	N/A
The       	4.73
Ġcat      	13.3
Ġis       	3.55
Ġon       	5.98
Ġthe      	1.39
Ġhat      	14.36
<|endoftext|>	9.01
<|endoftext|>	18.61
<|endoftext|>	19.15
<|endoftext|>	19.46

--------------------------------------------------

Sentence:	The cat is on the pizza
Token		Surprisal
------------------------------
<|endoftext|>	N/A
The       	4.73
Ġcat      	13.3
Ġis       	3.55
Ġon       	5.98
Ġthe      	1.39
Ġpizza    	11.85
<|endoftext|>	10.06
<|endoftext|>	18.93
<|endoftext|>	19.43
<|endoftext|>	19.71

--------------------------------------------------

Sentence:	The p

In [37]:
wordhop_sentences = [
    "be he brushing his teeth 🅂 .",
    "be he brushing his teeth .",
    "here be a napkin in case 🅂 you need .",
    "here be a napkin in case you need .",
    "be that your little baby 🅂 crying ?",
    "be that your little baby crying ?",
]

controlhop_sentences = [
    "be🅂 he brushing his teeth .",
    "be he brushing his teeth .",
    "here be🅂 a napkin in case you need .",
    "here be a napkin in case you need .",
    "be🅂 that your little baby crying ?",
    "be that your little baby crying ?",
]

In [38]:
wordhop_alibi_model = load_model(
    run_name="word_hop_alibi",
    random_seed=53,
    perturbation_type="hop_words4",
    train_set="100M",
    ckpt=1000
)

controlhop_alibi_model = load_model(
    run_name="control_hop_alibi",
    random_seed=53,
    perturbation_type="hop_control",
    train_set="100M",
    ckpt=1000
)

In [39]:
analyze_sentences(
    model=wordhop_alibi_model,
    test_sentences=wordhop_sentences
)

Sentence:	be he brushing his teeth 🅂 .
Token		Surprisal
------------------------------
<|endoftext|>	N/A
be        	7.92
Ġhe       	3.56
Ġbrushing 	13.86
Ġhis      	1.66
Ġteeth    	2.04
🅂         	14.73
Ġ.        	2.28
<|endoftext|>	22.22
<|endoftext|>	22.31
<|endoftext|>	22.3
<|endoftext|>	22.3

--------------------------------------------------

Sentence:	be he brushing his teeth .
Token		Surprisal
------------------------------
<|endoftext|>	N/A
be        	7.92
Ġhe       	3.56
Ġbrushing 	13.86
Ġhis      	1.66
Ġteeth    	2.04
Ġ.        	4.8
<|endoftext|>	22.11
<|endoftext|>	22.34
<|endoftext|>	22.33
<|endoftext|>	22.33
<|endoftext|>	22.33

--------------------------------------------------

Sentence:	here be a napkin in case 🅂 you need .
Token		Surprisal
------------------------------
<|endoftext|>	N/A
here      	10.65
Ġbe       	3.12
Ġa        	1.81
Ġnap      	9.29
kin       	0.23
Ġin       	3.04
Ġcase     	12.72
🅂         	12.5
Ġyou      	6.08
Ġneed     	5.9
Ġ.        	6.65

------

In [40]:
analyze_sentences(
    model=controlhop_alibi_model,
    test_sentences=controlhop_sentences
)

Sentence:	be🅂 he brushing his teeth .
Token		Surprisal
------------------------------
<|endoftext|>	N/A
be        	8.16
🅂         	0.16
Ġhe       	3.46
Ġbrushing 	13.43
Ġhis      	2.93
Ġteeth    	1.41
Ġ.        	5.23
<|endoftext|>	21.89
<|endoftext|>	21.66
<|endoftext|>	21.65
<|endoftext|>	21.65

--------------------------------------------------

Sentence:	be he brushing his teeth .
Token		Surprisal
------------------------------
<|endoftext|>	N/A
be        	8.16
Ġhe       	14.5
Ġbrushing 	11.05
Ġhis      	2.33
Ġteeth    	1.06
Ġ.        	1.53
<|endoftext|>	21.44
<|endoftext|>	21.63
<|endoftext|>	21.63
<|endoftext|>	21.62
<|endoftext|>	21.62

--------------------------------------------------

Sentence:	here be🅂 a napkin in case you need .
Token		Surprisal
------------------------------
<|endoftext|>	N/A
here      	10.62
Ġbe       	3.51
🅂         	0.06
Ġa        	2.09
Ġnap      	9.86
kin       	0.82
Ġin       	3.98
Ġcase     	12.83
Ġyou      	2.43
Ġneed     	5.4
Ġ.        	7.25

------

In [41]:
# Example sentences
controlhop_sentences = [
    "be🅂 he brushing his teeth .",
    "be he brushing his teeth .",
    "here be🅂 a napkin in case you need .",
    "here be a napkin in case you need .",
    "be🅂 that your little baby crying ?",
    "be that your little baby crying ?",
]

In [42]:
analyze_sentences(
    run_name="control_hop_alibi",
    random_seed=53,
    perturbation_type="hop_control",
    train_set="100M",
    ckpt=1000,
    test_sentences=controlhop_sentences
)

TypeError: analyze_sentences() got an unexpected keyword argument 'run_name'