In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from tqdm import tqdm

In [2]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("gpt2-xl")

In [3]:
import torch
import transformers

In [4]:
gpt2 = transformers.AutoModelForCausalLM.from_pretrained("gpt2-xl") # Load Model
gpt2.eval() # Put the model in eval mode

  torch.utils._pytree._register_pytree_node(


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 1600)
    (wpe): Embedding(1024, 1600)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-47): 48 x GPT2Block(
        (ln_1): LayerNorm((1600,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((1600,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((1600,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=1600, out_features=50257, bias=False)
)

In [5]:
def next_seq_prob(model, tokenizer, seen, unseen):
    """Get p(unseen | seen)

    Parameters
    ----------
    model : transformers.PreTrainedModel
        Model to use for predicting tokens
    tokenizer : transformers.PreTrainedTokenizer
        Tokenizer for Model
    seen : str
        Input sequence
    unseen: str
        The sequence for which to calculate a probability
    """
    # Get ids for tokens
    input_ids = tokenizer.encode(seen, return_tensors="pt")
    unseen_ids = tokenizer.encode(unseen)

    # Loop through unseen tokens & store log probs
    log_probs = []
    for unseen_id in unseen_ids:

        # Run model on input
        with torch.no_grad():
            logits = model(input_ids).logits

        # Get next token prediction logits
        next_token_logits = logits[0, -1]
        next_token_probs = torch.softmax(next_token_logits, 0) # Normalize

        # Get probability for relevant token in unseen string & store
        prob = next_token_probs[unseen_id]
        log_probs.append(torch.log(prob))

        # Add input tokens incrementally to input
        input_ids = torch.cat((input_ids, torch.tensor([[unseen_id]])), 1)

    # Add log probs together to get total log probability of sequence
    total_log_prob = sum(log_probs)
    # Exponentiate to return to probabilities
    total_prob = torch.exp(total_log_prob)
    return total_prob.item()

In [6]:
def surprisal(p):
    return -np.log2(p)

def experiment(sentence, candidates, model=gpt2, t=tokenizer):
    results = []
    for candidate in candidates:
        prob = next_seq_prob(model, t, sentence, candidate)
        results.append({
            'Word': candidate,
            'Probability': prob,
            'Surprisal': surprisal(prob)
        })

    df_results = pd.DataFrame(results).drop(columns=["Probability"])
    return df_results

In [7]:
# EXAMPLE

no_context_E = "Amelia went to the burger shop to buy a"
with_context_E = "Amelia only likes the burger shop’s pizza. Amelia went to the burger shop to buy a"
candidates_E = [" burger", " pizza"]

display(experiment(no_context_E, candidates_E))
display(experiment(with_context_E, candidates_E))

Unnamed: 0,Word,Surprisal
0,burger,2.677967
1,pizza,7.932092


Unnamed: 0,Word,Surprisal
0,burger,3.038086
1,pizza,1.600388


In [13]:
exp1_easy = [
    {"no_U": "Amelia went to the burger shop to buy a", 
     "with_U": "Amelia only likes the burger shop’s pizza. Amelia went to the burger shop to buy a",
     "candidates": [" burger", " pizza"]},
    {"no_U": "Deandre started playing music on his", 
     "with_U": "Deandre is a flutist. Deandre started playing music on his",
     "candidates": [" piano", " flute"]},
    {"no_U": "Mark exercises at his", 
     "with_U": "Mark works out at home. Mark exercises at his",
     "candidates": [" gym", " home"]},
    {"no_U": "Picking out what to wear, Nicole wore a", 
     "with_U": "Nicole really likes her poncho. Picking out what to wear, Nicole wore a",
     "candidates": [" shirt", " poncho"]},
    {"no_U": "The night sky had a lot of", 
     "with_U": "Tonight is the firework show. The night sky had a lot of",
     "candidates": [" stars", " fireworks"]},
    {"no_U": "Elena baked a cake with her", 
     "with_U": "With limited funding for home appliances, Elena could only afford a microwave. Elena baked a cake with her",
     "candidates": [" oven", "microwave"]}
]

idx = 0
for ex in exp1_easy:
    
    e = "exp1_easy_" + str(idx)
    
    no_U = experiment(ex["no_U"], ex["candidates"])
    no_U["prompt"] = "No Context"
    no_U["Word"] = np.array(["A","B"])
    
    with_U = experiment(ex["with_U"], ex["candidates"])
    with_U["prompt"] = "With Context"
    with_U["Word"] = np.array(["A","B"])

    data = pd.concat((no_U, with_U)).reset_index(drop=True)
    
    display(data)
    data.to_csv("data/" + e + ".csv")
    idx += 1

Unnamed: 0,Word,Surprisal,prompt
0,A,2.677967,No Context
1,B,7.932092,No Context
2,A,3.038086,With Context
3,B,1.600388,With Context


Unnamed: 0,Word,Surprisal,prompt
0,A,6.919947,No Context
1,B,10.457627,No Context
2,A,6.91098,With Context
3,B,6.27293,With Context


Unnamed: 0,Word,Surprisal,prompt
0,A,2.588577,No Context
1,B,3.176239,No Context
2,A,1.335204,With Context
3,B,2.541993,With Context


Unnamed: 0,Word,Surprisal,prompt
0,A,8.601937,No Context
1,B,14.026458,No Context
2,A,8.554931,With Context
3,B,5.741806,With Context


Unnamed: 0,Word,Surprisal,prompt
0,A,1.900352,No Context
1,B,9.34896,No Context
2,A,2.576986,With Context
3,B,2.932121,With Context


Unnamed: 0,Word,Surprisal,prompt
0,A,11.044407,No Context
1,B,34.011699,No Context
2,A,7.022797,With Context
3,B,23.236708,With Context


In [14]:
exp1_medium = [
    {"no_U": "Amelia went to the burger shop to buy a", 
     "with_U": "Amelia only likes the burger shop’s Italian food. Amelia went to the burger shop to buy a",
     "candidates": [" burger", " pizza"]},
    {"no_U": "Deandre started playing music on his", 
     "with_U": "Deandre only plays wind instruments. Deandre started playing music on his",
     "candidates": [" piano", " flute"]},
    {"no_U": "Mark exercises at his", 
     "with_U": "Mark hates going outside. Mark exercises at his",
     "candidates": [" gym", " home"]},
    {"no_U": "Picking out what to wear, Nicole wore a", 
     "with_U": "It is raining. Picking out what to wear, Nicole wore a",
     "candidates": [" shirt", " poncho"]},
    {"no_U": "The night sky had a lot of", 
     "with_U": "Tonight is the Fourth of July. The night sky had a lot of",
     "candidates": [" stars", " fireworks"]},
    {"no_U": "Elena baked a cake with her", 
     "with_U": "Elena did not have time to preheat. Elena baked a cake with her",
     "candidates": [" oven", "microwave"]}
]

idx = 0
for ex in exp1_medium:
    
    e = "exp1_medium_" + str(idx)
    
    no_U = experiment(ex["no_U"], ex["candidates"])
    no_U["prompt"] = "No Context"
    no_U["Word"] = np.array(["A","B"])
    
    with_U = experiment(ex["with_U"], ex["candidates"])
    with_U["prompt"] = "With Context"
    with_U["Word"] = np.array(["A","B"])

    data = pd.concat((no_U, with_U)).reset_index(drop=True)
    
    display(data)
    data.to_csv("data/" + e + ".csv")
    idx += 1

Unnamed: 0,Word,Surprisal,prompt
0,A,2.677967,No Context
1,B,7.932092,No Context
2,A,3.072645,With Context
3,B,4.953642,With Context


Unnamed: 0,Word,Surprisal,prompt
0,A,6.919947,No Context
1,B,10.457627,No Context
2,A,7.119409,With Context
3,B,10.351411,With Context


Unnamed: 0,Word,Surprisal,prompt
0,A,2.588577,No Context
1,B,3.176239,No Context
2,A,2.967908,With Context
3,B,3.32063,With Context


Unnamed: 0,Word,Surprisal,prompt
0,A,8.601937,No Context
1,B,14.026458,No Context
2,A,8.300897,With Context
3,B,11.091359,With Context


Unnamed: 0,Word,Surprisal,prompt
0,A,1.900352,No Context
1,B,9.34896,No Context
2,A,2.992483,With Context
3,B,1.258306,With Context


Unnamed: 0,Word,Surprisal,prompt
0,A,11.044407,No Context
1,B,34.011699,No Context
2,A,8.939735,With Context
3,B,31.201038,With Context


In [17]:
exp2 = [
    {"no_U": "Sally drinks her coffee with", 
     "with_U": "Sally likes coffee that is sour. Sally drinks her coffee with",
     "candidates": [" sugar", " lemons", " limes"]},
    {"no_U": "Mark pulled out a nail using a", 
     "with_U": "Mark is in the kitchen because he lost his toolbox. Mark pulled out a nail using a",
     "candidates": [" hammar", " fork", " spoon"]},
    {"no_U": "Anthony’s only pet is a", 
     "with_U": "Anthony bought a reptile terrarium today. Anthony’s only pet is a",
     "candidates": [" dog", " turtle", " lizard"]},
    {"no_U": "Derrick’s mode of transportation to work is a", 
     "with_U": "Derrick travels exclusively on the sidewalk or bike lane. Derrick’s mode of transportation to work is a",
     "candidates": [" car", " scooter", " skateboard"]}
]

idx = 0
for ex in exp2:
    
    e = "exp2_" + str(idx)
    
    no_U = experiment(ex["no_U"], ex["candidates"])
    no_U["prompt"] = "No Context"
    no_U["Word"] = np.array(["A","B1","B2"])
    
    with_U = experiment(ex["with_U"], ex["candidates"])
    with_U["prompt"] = "With Context"
    with_U["Word"] = np.array(["A","B1","B2"])

    data = pd.concat((no_U, with_U)).reset_index(drop=True)
    
    display(data)
    data.to_csv("data/" + e + ".csv")
    idx += 1

Unnamed: 0,Word,Surprisal,prompt
0,A,7.234716,No Context
1,B1,13.597556,No Context
2,B2,15.5352,No Context
3,A,3.962057,With Context
4,B1,12.086831,With Context
5,B2,10.222272,With Context


Unnamed: 0,Word,Surprisal,prompt
0,A,24.65502,No Context
1,B1,9.377322,No Context
2,B2,9.46374,No Context
3,A,25.490625,With Context
4,B1,8.935015,With Context
5,B2,8.930088,With Context


Unnamed: 0,Word,Surprisal,prompt
0,A,3.887547,No Context
1,B1,7.924194,No Context
2,B2,9.693481,No Context
3,A,6.864674,With Context
4,B1,5.077966,With Context
5,B2,4.913732,With Context


Unnamed: 0,Word,Surprisal,prompt
0,A,4.550179,No Context
1,B1,8.047421,No Context
2,B2,10.608205,No Context
3,A,6.231694,With Context
4,B1,6.990636,With Context
5,B2,9.103793,With Context
