# Test the various tasks in the `tasks` module.

In [3]:
%load_ext autoreload
%autoreload 2
import os
from cb_utils.models import load_gpt2_weights, load_demo_gpt2, tokenizer
from torch.optim import AdamW
import torch
import pickle
import datasets
from tqdm import tqdm_notebook as tqdm
from itertools import cycle
# from eval import evaluate_model
import plotly.express as px
import pandas as pd
from tasks.inference_utils import get_final_logits, generate_text
from tasks.ioi.IOITask import IOITask_old
from tasks.owt.OWTTask import OWTTask
# from tasks.kg_trips.ZSRETask import ZSRE


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
model = load_demo_gpt2(means=False)

In [3]:
ioi = IOITask_old(batch_size=3, tokenizer=tokenizer)
owt = OWTTask(batch_size=3, tokenizer=tokenizer)

In [4]:
# ioi_texts = [ioi.ioi_prompts_train_dataset[i]['text'] for i in range(3)]
# for i in range(3):
#     ioi_texts.append(ioi_texts[i][5:])=
ioi_texts = ['Then, Sarah and Tyler went to the garden. Tyler gave a bone to Sarah',
 'Then, Tyler and Sarah went to the garden. Sarah gave a bone to Tyler',
 'Then, Timothy and Stephen went to the school. Stephen gave a necklace to Timothy',
 'Then, Sarah and Tyler went to the flower garden. Tyler gave a bone to Sarah',
 'Then, Tyler and Sarah went to the flower garden. Sarah gave a bone to Tyler',
 'Then, Timothy and Stephen went to the old school. Stephen gave a necklace to Timothy']

# cut last name from ioi_texts
for i in range(6):
    ioi_texts[i] = ioi_texts[i][:-len(ioi_texts[i].split()[-1])-1]

In [5]:
# tokenize ioi_texts
tokens = tokenizer(ioi_texts, return_tensors='pt', padding=True, truncation=True).input_ids

# detokenize ioi_texts, token by token
for i in range(6):
    for token in tokens[i]:
        print(tokenizer.decode(token.item()), end=':')
    print()    

Then:,: Sarah: and: Tyler: went: to: the: garden:.: Tyler: gave: a: bone: to:<|endoftext|>:
Then:,: Tyler: and: Sarah: went: to: the: garden:.: Sarah: gave: a: bone: to:<|endoftext|>:
Then:,: Timothy: and: Stephen: went: to: the: school:.: Stephen: gave: a: necklace: to:<|endoftext|>:
Then:,: Sarah: and: Tyler: went: to: the: flower: garden:.: Tyler: gave: a: bone: to:
Then:,: Tyler: and: Sarah: went: to: the: flower: garden:.: Sarah: gave: a: bone: to:
Then:,: Timothy: and: Stephen: went: to: the: old: school:.: Stephen: gave: a: necklace: to:


In [7]:
final_logits = get_final_logits(model, tokenizer, ioi_texts)

# decode final_logits
for i in range(6):
    print(tokenizer.decode(final_logits[i].argmax(-1).tolist()), end='')

[15, 15, 15, 16, 16, 16]
 Sarah Tyler Timothy Sarah Tyler Timothy

In [45]:
t = torch.cuda.get_device_properties(0).total_memory
r = torch.cuda.memory_reserved(0)
a = torch.cuda.memory_allocated(0)
f = r-a  # free inside reserved
print(f"Total: {t*1e-9}, Reserved: {r*1e-9}, Allocated: {a*1e-9}, Free: {f*1e-9}")

Total: 84.986691584, Reserved: 24.203231232, Allocated: 24.045806080000002, Free: 0.157425152


In [None]:
generate_text(model, tokenizer, 'Then, Sarah and Tyler went to the flower garden. Tyler gave a bone to', 3)

'Then, Sarah and Tyler went to the flower garden. Tyler gave a bone to Sarah, and'

In [11]:
from tasks.kg_trips.ZSRETask import MENDQADataset
mendqa_dataset = MENDQADataset(data_dir="tasks/kg_trips", tok=tokenizer)

In [12]:
# call the __getitem__ method of the dataset
for i in range(3):
    print(mendqa_dataset[i])

{'case_id': 0, 'requested_rewrite': {'prompt': 'What university did {} attend?', 'subject': 'Watts Humphrey', 'target_new': {'str': 'Illinois Institute of Technology'}, 'target_true': {'str': '<|endoftext|>'}}, 'paraphrase_prompts': ['What university did Watts Humphrey take part in?'], 'neighborhood_prompts': [{'prompt': 'nq question: who played desmond doss father in hacksaw ridge?', 'target': ' Hugo'}, {'prompt': 'nq question: who played desmond doss father in hacksaw ridge? Hugo', 'target': ' We'}, {'prompt': 'nq question: who played desmond doss father in hacksaw ridge? Hugo We', 'target': 'aving'}], 'attribute_prompts': [], 'generation_prompts': []}
{'case_id': 1, 'requested_rewrite': {'prompt': 'Which family does {} belong to?', 'subject': 'Ramalinaceae', 'target_new': {'str': 'Lecanorales'}, 'target_true': {'str': '<|endoftext|>'}}, 'paraphrase_prompts': ['What family are Ramalinaceae?'], 'neighborhood_prompts': [{'prompt': 'nq question: types of skiing in the winter olympics 20

In [4]:
import pandas as pd
from transformer_lens import HookedTransformer

In [5]:
pythia_model = HookedTransformer.from_pretrained(
    "pythia-2.8b"
)
pythia_tokenizer = pythia_model.tokenizer

Downloading config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/5.68G [00:00<?, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/396 [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

Loaded pretrained model pythia-2.8b into HookedTransformer


In [10]:
df = pd.read_csv("tasks/facts/data/sports.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,athlete,sport,log_prob_one_shot,num_athlete_tokens,sport_index,sport_token,prompt
0,1642,DeForest Buckner,football,-0.492917,5,2,5842,Fact: Tiger Woods plays the sport of golf\nFac...
1,738,Walter Payton,football,-0.105714,3,2,5842,Fact: Tiger Woods plays the sport of golf\nFac...
2,16778,Anthony DeSclafani,baseball,-0.292668,6,0,14623,Fact: Tiger Woods plays the sport of golf\nFac...
3,14501,Kevin Millwood,baseball,-0.372979,3,0,14623,Fact: Tiger Woods plays the sport of golf\nFac...
4,188,Vonta Leach,football,-0.648644,5,2,5842,Fact: Tiger Woods plays the sport of golf\nFac...


In [16]:
from tasks.inference_utils import generate_text
for i in range(5):
    prompt = df['prompt'].iloc[i]
    print(f"Prompt: {prompt}")
    print(generate_text(pythia_model, pythia_tokenizer, prompt, 1))
    print(f"Correct sport: {df['sport'].iloc[i]}")
    print()

Prompt: Fact: Tiger Woods plays the sport of golf
Fact: DeForest Buckner plays the sport of


  0%|          | 0/1 [00:00<?, ?it/s]

Fact: Tiger Woods plays the sport of golf
Fact: DeForest Buckner plays the sport of basketball
Correct sport: football

Prompt: Fact: Tiger Woods plays the sport of golf
Fact: Walter Payton plays the sport of


  0%|          | 0/1 [00:00<?, ?it/s]

Fact: Tiger Woods plays the sport of golf
Fact: Walter Payton plays the sport of football
Correct sport: football

Prompt: Fact: Tiger Woods plays the sport of golf
Fact: Anthony DeSclafani plays the sport of


  0%|          | 0/1 [00:00<?, ?it/s]

Fact: Tiger Woods plays the sport of golf
Fact: Anthony DeSclafani plays the sport of baseball
Correct sport: baseball

Prompt: Fact: Tiger Woods plays the sport of golf
Fact: Kevin Millwood plays the sport of


  0%|          | 0/1 [00:00<?, ?it/s]

Fact: Tiger Woods plays the sport of golf
Fact: Kevin Millwood plays the sport of baseball
Correct sport: baseball

Prompt: Fact: Tiger Woods plays the sport of golf
Fact: Vonta Leach plays the sport of


  0%|          | 0/1 [00:00<?, ?it/s]

Fact: Tiger Woods plays the sport of golf
Fact: Vonta Leach plays the sport of football
Correct sport: football



In [None]:
football_token, baseball_token, basketball_token = pythia_tokenizer(" football baseball basketball").input_ids
print(f"{football_token=} {baseball_token=} {basketball_token=}")

In [49]:
# set up dataloader to batch through df
from torch.utils.data import DataLoader
from tasks.inference_utils import get_final_logits
criterion = torch.nn.CrossEntropyLoss()

class SportsDataset(torch.utils.data.Dataset):
    def __init__(self, df, tokenizer):
        self.df = df
        self.tokenizer = tokenizer

    def __getitem__(self, idx):
        return self.df['prompt'].iloc[idx], self.df['sport'].iloc[idx]

    def __len__(self):
        return len(self.df)
    
sports_dataset = SportsDataset(df, pythia_tokenizer)
sports_dataloader = DataLoader(sports_dataset, batch_size=3)

# batch through dataloader
for batch in sports_dataloader:
    prompts, labels = batch
    labels = [' ' + sport for sport in labels]
    final_logits = get_final_logits(pythia_model, pythia_tokenizer, prompts, model_returns_tuple=False)
    print(final_logits)
    tokenized_labels = pythia_tokenizer(labels, return_tensors='pt', padding=True, truncation=True).input_ids[:, 0]
    print(tokenized_labels)
    print(criterion(final_logits, tokenized_labels))

    print(criterion(final_logits, torch.Tensor([14648, 14648, 14648]).long()))
    break

tensor([[ 8.3688, -3.1681,  4.5482,  ..., -2.6834, -2.6928, -2.4418],
        [10.1932, -2.6702,  6.6471,  ..., -2.3068, -2.4198, -2.0895],
        [ 8.2223, -2.5343,  3.4966,  ..., -2.3035, -2.3677, -2.1004]],
       grad_fn=<StackBackward0>)
tensor([ 5842,  5842, 14623])
tensor(0.7214, grad_fn=<NllLossBackward0>)
tensor(2.7030, grad_fn=<NllLossBackward0>)


In [28]:
train_size = int(0.8 * len(df))
test_size = len(df) - train_size
train_df = df[:train_size]
test_df = df[train_size:]


In [56]:
from tasks.facts.SportsTask import SportsTask

sports_task = SportsTask(batch_size=100, tokenizer=pythia_tokenizer)
print(sports_task.get_test_loss(pythia_model))

tensor(0.1621)
