In [2]:
import torch
import numpy
import pandas as pd
import os
import random
import transformer_lens.utils as utils
from transformer_lens import ActivationCache, HookedTransformer
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt


In [3]:
model = HookedTransformer.from_pretrained(
    "gpt2-XL",
    center_unembed=True,
    center_writing_weights=True,
    fold_ln=True,
    refactor_factored_attn_matrices=True
)


Loaded pretrained model gpt2-XL into HookedTransformer


In [16]:
data_path = '../dataset_csvs_singular_plural/vowel_plurals.csv'

In [17]:
data = pd.read_csv(data_path)
data.shape, data.columns

((31, 2), Index(['Sentence', 'Answer'], dtype='object'))

In [18]:
probabilities = []
predictions = []
k = 1
#keep_cache = True  # Set this to True if you need to analyze intermediate activations

for i, row in data.iterrows():
    print(i)
    example_prompt = row['Sentence']
    example_answer = row['Answer']
    tokens = model.to_tokens(example_prompt, prepend_bos=True)
    logits, cache = model.run_with_cache(tokens) #if keep_cache else (model(tokens), None)
    year_probs = torch.softmax(logits[:, -1, :], dim=-1)
    topk = torch.topk(year_probs, k=k)
    topk_tokens = [[model.tokenizer.decode(top) for top in ex] for ex in topk.indices]
    probs_array = topk.values.cpu().detach().numpy()[0].tolist()
    predictions.extend(topk_tokens[0])
    probabilities.extend(probs_array)
    
   

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30


In [19]:
sent = []
correct = []
preds = []
probs = []
count = 0
for i, row in data.iterrows():
    example_answer = row['Answer']
    pred = predictions[i][1:]
    #print(pred, example_answer)
    example_prompt = row['Sentence']
    if example_answer == pred:
        count += 1
        sent.append(example_prompt)
        correct.append(example_answer)
        preds.append(pred)
        probs.append(probabilities[i])
count

10

In [None]:
df = pd.DataFrame({
    'sentence':sent,
    'snswer':correct,
    'predictions':preds,
    'probabilities':probs
})

In [None]:
data['predictions'] = predictions
data['probabilities'] = probabilities

In [None]:
df.to_csv('../dataset_csvs_singular_plural/predictions/correct_preds_xl_es_plural.csv', index = False)
data.to_csv('../dataset_csvs_singular_plural/predictions/all_preds_xl_es_plural.csv', index = False)

In [None]:
df.shape, data.shape