# Necessary Imports and Settings

In [8]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import transformers
import torch
import os
import nltk
import pandas as pd
import torch
import numpy as np
from jinja2 import Template
import xmltodict
import pickle
from collections import defaultdict
from fuzzywuzzy import fuzz
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import time

import sys
sys.path.append('/scratch/users/bozyurt20/hpc_run/utilities')
sys.path.append("/scratch/users/bozyurt20/hpc_run/blobs/")
from util_research import *

#from toy_dataset import contexts

max_len = 512
num_layers = 24
d_model = 4096

tokenizer = AutoTokenizer.from_pretrained("bigscience/T0pp", truncation_side="right", add_prefix_space=True)

In [2]:
model = AutoModelForSeq2SeqLM.from_pretrained("bigscience/T0pp", device_map="balanced", load_in_8bit=True)




Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
CUDA SETUP: CUDA runtime path found: /kuacc/users/bozyurt20/.conda/envs/hf/lib/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 7.5
CUDA SETUP: Detected CUDA version 110
CUDA SETUP: Loading binary /kuacc/users/bozyurt20/.conda/envs/hf/lib/python3.8/site-packages/bitsandbytes/libbitsandbytes_cuda110.so...


In [12]:
names_1 = ["John", "Harry", "Andrew", "Lisa", "Mary"]
names_2 = ["Henry", "David", "Sophia", "Olivia", "Emma"]
cities_1 = ["London", "Paris", "Oslo", "Istanbul", "Beijing"]
cities_2 = ["Sydney", "Cairo", "Seoul", "Rome", "Prague"]

class DataSample_OneSentence():
    def __init__(self, name_1, city_1):
        self.name_1 = name_1
        self.city_1 = city_1
        self.prev_context = name_1 + " travelled to " + city_1 + "."
        self.current_context = name_1 + " met Lucas. Lucas was 30 years old. Where is " + name_1 + "?"
    def add_encoding(self, encoding):
        self.encoding = encoding
        
data_points_one_sentence = []
for name_1 in names_1 + names_2:
    for city_1 in cities_1 + cities_2:
        sample = DataSample_OneSentence(name_1, city_1)
        prev_context = sample.prev_context        
        prev_tokens = tokenizer.encode(prev_context, return_tensors="pt").to(model.encoder.device)
        num_prev_tokens = len(prev_tokens[0])

        with torch.no_grad():
            encoded_prev = model.encoder(prev_tokens, output_special=True)

        special_hidden = encoded_prev.special_hidden_states # 24 x (1, T, d)
        special_reformatted = torch.zeros(num_layers, num_prev_tokens, d_model) # (24, T, d)

        for i, hidden in enumerate(special_hidden):
            special_reformatted[i:i+1, :, :] = hidden

        entities_hidden_states = special_reformatted.permute(1, 0, 2) # T, 24, d
        
        sample.add_encoding(entities_hidden_states)
        data_points_one_sentence.append(sample)


In [4]:
len(data_points_one_sentence)

100

In [10]:
for point in data_points_one_sentence:
    print(point.prev_context)
    print(point.current_context)
    print()

John travelled to London.
John met Lucas. Lucas was 30 years old. Where is John?

John travelled to Paris.
John met Lucas. Lucas was 30 years old. Where is John?

John travelled to Oslo.
John met Lucas. Lucas was 30 years old. Where is John?

John travelled to Istanbul.
John met Lucas. Lucas was 30 years old. Where is John?

John travelled to Beijing.
John met Lucas. Lucas was 30 years old. Where is John?

John travelled to Sydney.
John met Lucas. Lucas was 30 years old. Where is John?

John travelled to Cairo.
John met Lucas. Lucas was 30 years old. Where is John?

John travelled to Seoul.
John met Lucas. Lucas was 30 years old. Where is John?

John travelled to Rome.
John met Lucas. Lucas was 30 years old. Where is John?

John travelled to Prague.
John met Lucas. Lucas was 30 years old. Where is John?

Harry travelled to London.
Harry met Lucas. Lucas was 30 years old. Where is Harry?

Harry travelled to Paris.
Harry met Lucas. Lucas was 30 years old. Where is Harry?

Harry travelled

# No Entities Moved

In [55]:
correctly_guessed_zero = defaultdict(list)

for num_data_point, data_point in enumerate(data_points_one_sentence):
    
    current_context = data_point.current_context
    answer = data_point.city_1
    current_context_ids = tokenizer.encode(current_context, return_tensors="pt").cuda()

    out = model.generate(
            input_ids=current_context_ids,
            max_new_tokens=10,
            #return_dict_in_generate=True,
            #output_scores=True
        ) 

    out_generated = tokenizer.decode(out[0], skip_special_tokens=True)

    if answer in out_generated:
        correctly_guessed_zero.append(1)
    else:
        correctly_guessed_zero.append(0)
        

In [62]:
correctly_guessed_zero

defaultdict(list,
            {6: [0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
          

# One Entity to Move

In [16]:
correctly_guessed = defaultdict(list)

for num_data_point, data_point in enumerate(data_points_one_sentence):
    
    print("NOW PROCESSING:", num_data_point)
    start = time.time()
    prev_context = data_point.prev_context
    current_context = data_point.current_context
    answer = data_point.city_1
    entities_hidden_states = data_point.encoding
    
    prev_tokens = tokenizer.encode(prev_context, return_tensors="pt")
    num_prev_tokens = len(prev_tokens[0])
    
    current_context_ids = tokenizer.encode(current_context, return_tensors="pt")
    
    for i in range(num_prev_tokens):
        
        new_context_ids = torch.cat((prev_tokens[:,i:i+1], current_context_ids), dim=1).cuda()
        entity_inds = [0]
        
        entity_hidden_states = entities_hidden_states[i:i+1, :, :]
        
        out = model.generate(
                input_ids=new_context_ids,
                entity_hidden_states=entity_hidden_states,
                entity_inds=entity_inds,
                max_new_tokens=10,
                #return_dict_in_generate=True,
                #output_scores=True
            ) 
        
        out_generated = tokenizer.decode(out[0], skip_special_tokens=True)
        
        if answer in out_generated:
            correctly_guessed[i].append(1)
        else:
            correctly_guessed[i].append(0)
        
    print("Time taken:", time.time()-start)
        

NOW PROCESSING: 0
Time taken: 5.151851177215576
NOW PROCESSING: 1
Time taken: 4.960990905761719
NOW PROCESSING: 2
Time taken: 5.110096454620361
NOW PROCESSING: 3
Time taken: 4.96619987487793
NOW PROCESSING: 4
Time taken: 4.550718545913696
NOW PROCESSING: 5
Time taken: 4.983500003814697
NOW PROCESSING: 6
Time taken: 4.724462032318115
NOW PROCESSING: 7
Time taken: 4.699164152145386
NOW PROCESSING: 8
Time taken: 4.979692697525024
NOW PROCESSING: 9
Time taken: 5.23649263381958
NOW PROCESSING: 10
Time taken: 4.459359407424927
NOW PROCESSING: 11
Time taken: 4.472076177597046
NOW PROCESSING: 12
Time taken: 4.625423192977905
NOW PROCESSING: 13
Time taken: 4.61356782913208
NOW PROCESSING: 14
Time taken: 4.598127365112305
NOW PROCESSING: 15
Time taken: 4.573188304901123
NOW PROCESSING: 16
Time taken: 4.449345827102661
NOW PROCESSING: 17
Time taken: 4.4607977867126465
NOW PROCESSING: 18
Time taken: 4.46790075302124
NOW PROCESSING: 19
Time taken: 4.8760974407196045
NOW PROCESSING: 20
Time taken: 5

In [17]:
for i in correctly_guessed:
    print(i)
    print(sum(correctly_guessed[i])/len(correctly_guessed[i]))

0
0.01
1
0.0
2
0.01
3
0.01
4
0.64
5
0.0
6
0.0


In [19]:
with open("/kuacc/users/bozyurt20/hpc_run/Sherlock Holmes/Path Creation/Results/one_entity_moved.txt", "wb") as f:
    pickle.dump(correctly_guessed, f)

# Two Entities to Move

In [35]:
correctly_guessed_two = {}
my_context = []

for i in range(num_prev_tokens):
    correctly_guessed_two[i] = defaultdict(list)

for data_point in data_points_one_sentence:
    start = time.time()
    prev_context = data_point.prev_context
    current_context = data_point.current_context
    answer = data_point.city_1
    entities_hidden_states = data_point.encoding
    
    prev_tokens = tokenizer.encode(prev_context, return_tensors="pt")
    num_prev_tokens = len(prev_tokens[0])
    
    current_context_ids = tokenizer.encode(current_context, return_tensors="pt")
    
    for i in range(num_prev_tokens):
        
        for j in range(i+1, num_prev_tokens):
            
            new_context_ids = torch.cat((prev_tokens[:,i:i+1], prev_tokens[:, j:j+1], current_context_ids), dim=1).cuda()
            entity_inds = list(range(2))

            entity_hidden_states = torch.cat((entities_hidden_states[i:i+1, :, :],
                                              entities_hidden_states[j:j+1, :, :]), dim=0)
            
            my_context.append(new_context_ids)
            
            #if i == 0 and j == 4:
                                             
            out = model.generate(
                    input_ids=new_context_ids,
                    entity_hidden_states=entity_hidden_states,
                    entity_inds=entity_inds,
                    max_new_tokens=10,
                    #return_dict_in_generate=True,
                    #output_scores=True
                ) 

            out_generated = tokenizer.decode(out[0], skip_special_tokens=True)

            print(prev_context)

            print(out_generated)

            print(answer)

            print()

            if answer in out_generated:
                print("in")
                correctly_guessed_two[i][j].append(1)
            else:
                print("out")
                correctly_guessed_two[i][j].append(0)

    print("Time taken:", time.time()-start)


John travelled to London.
in the hospital
London

out
John travelled to London.
in the hospital
London

out
John travelled to London.
in the hospital
London

out
John travelled to London.
London
London

in
John travelled to London.
John is in a bar.
London

out
John travelled to London.
New York
London

out
John travelled to London.
in the hospital
London

out
John travelled to London.
John is in the United States
London

out
John travelled to London.
London
London

in
John travelled to London.
John is in a bar.
London

out
John travelled to London.
New York
London

out
John travelled to London.
in the hospital
London

out
John travelled to London.
London
London

in
John travelled to London.
John is in a bar.
London

out
John travelled to London.
New York
London

out
John travelled to London.
London
London

in
John travelled to London.
John is in the United States
London

out
John travelled to London.
New York
London

out
John travelled to London.
London
London

in
John travelled to Lo

In [34]:
for cont in my_context:
    print(tokenizer.decode(cont[0]))

John  John met Lucas. Lucas was 30 years old. Where is John?</s>
Johntravelled John met Lucas. Lucas was 30 years old. Where is John?</s>
John to John met Lucas. Lucas was 30 years old. Where is John?</s>
John London John met Lucas. Lucas was 30 years old. Where is John?</s>
John. John met Lucas. Lucas was 30 years old. Where is John?</s>
John</s> John met Lucas. Lucas was 30 years old. Where is John?</s>
travelled John met Lucas. Lucas was 30 years old. Where is John?</s>
 to John met Lucas. Lucas was 30 years old. Where is John?</s>
 London John met Lucas. Lucas was 30 years old. Where is John?</s>
. John met Lucas. Lucas was 30 years old. Where is John?</s>
</s> John met Lucas. Lucas was 30 years old. Where is John?</s>
travelled to John met Lucas. Lucas was 30 years old. Where is John?</s>
travelled London John met Lucas. Lucas was 30 years old. Where is John?</s>
travelled. John met Lucas. Lucas was 30 years old. Where is John?</s>
travelled</s> John met Lucas. Lucas was 30 years 

In [51]:
for i in range(7):
    for j in range(i+1, 7):
        print(i, j)
        print(sum(correctly_guessed_two[i][j]))
        print(len(correctly_guessed_two[i][j]))
        print(sum(correctly_guessed_two[i][j])/len(correctly_guessed_two[i][j]))
        print()

0 1
1
100
0.01

0 2
2
100
0.02

0 3
1
100
0.01

0 4
58
100
0.58

0 5
0
100
0.0

0 6
1
100
0.01

1 2
2
100
0.02

1 3
1
100
0.01

1 4
72
100
0.72

1 5
0
100
0.0

1 6
0
100
0.0

2 3
2
100
0.02

2 4
71
100
0.71

2 5
0
100
0.0

2 6
2
100
0.02

3 4
70
100
0.7

3 5
0
100
0.0

3 6
1
100
0.01

4 5
64
100
0.64

4 6
72
100
0.72

5 6
0
100
0.0



In [24]:
for i in range(7):
    #for j in range(i+1, 7):
    print(len(correctly_guessed_two[i]))

6
5
4
3
2
1
0


In [37]:
with open("/kuacc/users/bozyurt20/hpc_run/Sherlock Holmes/Path Creation/Results/two_entities_moved.txt", "wb") as f:
    pickle.dump(correctly_guessed_two, f)

# Three Entities to Move

In [38]:
correctly_guessed_three = {}
for i in range(num_prev_tokens):

    correctly_guessed_three[i] = {}

    for j in range(i+1, num_prev_tokens):

        correctly_guessed_three[i][j] = defaultdict(list)

for data_point in data_points_one_sentence:
    prev_context = data_point.prev_context
    current_context = data_point.current_context
    answer = data_point.city_1
    entities_hidden_states = data_point.encoding
    
    prev_tokens = tokenizer.encode(prev_context, return_tensors="pt")
    num_prev_tokens = len(prev_tokens[0])
    
    current_context_ids = tokenizer.encode(current_context, return_tensors="pt")
    
    for i in range(num_prev_tokens):
        
        for j in range(i+1, num_prev_tokens):
            
            for k in range(j+1, num_prev_tokens):
            
                new_context_ids = torch.cat((prev_tokens[:,i:i+1], 
                                             prev_tokens[:,j:j+1],
                                             prev_tokens[:,k:k+1],
                                             current_context_ids), dim=1).cuda()
                entity_inds = list(range(3))

                entity_hidden_states = torch.cat((entities_hidden_states[i:i+1, :, :],
                                                  entities_hidden_states[j:j+1, :, :],
                                                  entities_hidden_states[k:k+1, :, :]), dim=0)

                out = model.generate(
                        input_ids=new_context_ids,
                        entity_hidden_states=entity_hidden_states,
                        entity_inds=entity_inds,
                        max_new_tokens=10,
                        #return_dict_in_generate=True,
                        #output_scores=True
                    ) 

                out_generated = tokenizer.decode(out[0], skip_special_tokens=False)

                if answer in out_generated:
                    correctly_guessed_three[i][j][k].append(1)
                else:
                    correctly_guessed_three[i][j][k].append(0)


In [39]:
with open("/kuacc/users/bozyurt20/hpc_run/Sherlock Holmes/Path Creation/Results/three_entities_moved.txt", "wb") as f:
    pickle.dump(correctly_guessed_three, f)

# Four Entities to Move

In [None]:
correctly_guessed_four = {}
for i in range(num_prev_tokens):

    correctly_guessed_four[i] = {}

    for j in range(i+1, num_prev_tokens):

        correctly_guessed_four[i][j] = {}

        for k in range(j+1, num_prev_tokens):

            correctly_guessed_four[i][j][k] = defaultdict(list)

for data_point in data_points_one_sentence:
    prev_context = data_point.prev_context
    current_context = data_point.current_context
    answer = data_point.city_1
    entities_hidden_states = data_point.encoding
    
    prev_tokens = tokenizer.encode(prev_context, return_tensors="pt")
    num_prev_tokens = len(prev_tokens[0])
    
    current_context_ids = tokenizer.encode(current_context, return_tensors="pt")
    
    for i in range(num_prev_tokens):
        
        for j in range(i+1, num_prev_tokens):
            
            for k in range(j+1, num_prev_tokens):
                
                for l in range(k+1, num_prev_tokens):
            
                    new_context_ids = torch.cat((prev_tokens[:,i:i+1], 
                                                 prev_tokens[:,j:j+1],
                                                 prev_tokens[:,k:k+1],
                                                 prev_tokens[:,l:l+1],
                                                 current_context_ids), dim=1).cuda()
                
                    entity_inds = list(range(4))

                    entity_hidden_states = torch.cat((entities_hidden_states[i:i+1, :, :],
                                                      entities_hidden_states[j:j+1, :, :],
                                                      entities_hidden_states[k:k+1, :, :],
                                                      entities_hidden_states[l:l+1, :, :]), dim=0)

                    out = model.generate(
                            input_ids=new_context_ids,
                            entity_hidden_states=entity_hidden_states,
                            entity_inds=entity_inds,
                            max_new_tokens=10,
                            #return_dict_in_generate=True,
                            #output_scores=True
                        ) 

                    out_generated = tokenizer.decode(out[0], skip_special_tokens=True)

                    if answer in out_generated:
                        correctly_guessed_four[i][j][k][l].append(1)
                    else:
                        correctly_guessed_four[i][j][k][l].append(0)


In [None]:
with open("/kuacc/users/bozyurt20/hpc_run/Sherlock Holmes/Path Creation/Results/four_entities_moved.txt", "wb") as f:
    pickle.dump(correctly_guessed_four, f)

# Five Entities to Move

In [45]:
correctly_guessed_five = {}

for i in range(num_prev_tokens):

    correctly_guessed_five[i] = {}

    for j in range(i+1, num_prev_tokens):

        correctly_guessed_five[i][j] = {}

        for k in range(j+1, num_prev_tokens):

            correctly_guessed_five[i][j][k] = {}

            for l in range(k+1, num_prev_tokens):

                correctly_guessed_five[i][j][k][l] = defaultdict(list)

for data_point in data_points_one_sentence:
    prev_context = data_point.prev_context
    current_context = data_point.current_context
    answer = data_point.city_1
    entities_hidden_states = data_point.encoding
    
    prev_tokens = tokenizer.encode(prev_context, return_tensors="pt")
    num_prev_tokens = len(prev_tokens[0])
    
    current_context_ids = tokenizer.encode(current_context, return_tensors="pt")
    
    for i in range(num_prev_tokens):
        
        for j in range(i+1, num_prev_tokens):
            
            for k in range(j+1, num_prev_tokens):
                
                for l in range(k+1, num_prev_tokens):
                    
                    for m in range(l+1, num_prev_tokens):
                        
                        new_context_ids = torch.cat((prev_tokens[:,i:i+1], 
                                                     prev_tokens[:,j:j+1],
                                                     prev_tokens[:,k:k+1],
                                                     prev_tokens[:,l:l+1],
                                                     prev_tokens[:,m:m+1],
                                                     current_context_ids), dim=1).cuda()

                        
                        entity_inds = list(range(5))

                        entity_hidden_states = torch.cat((entities_hidden_states[i:i+1, :, :],
                                                          entities_hidden_states[j:j+1, :, :],
                                                          entities_hidden_states[k:k+1, :, :],
                                                          entities_hidden_states[l:l+1, :, :],
                                                          entities_hidden_states[m:m+1, :, :]), dim=0)

                        out = model.generate(
                                input_ids=new_context_ids,
                                entity_hidden_states=entity_hidden_states,
                                entity_inds=entity_inds,
                                max_new_tokens=10,
                                #return_dict_in_generate=True,
                                #output_scores=True
                            ) 

                        out_generated = tokenizer.decode(out[0], skip_special_tokens=True)

                        if answer in out_generated:
                            correctly_guessed_five[i][j][k][l][m].append(1)
                        else:
                            correctly_guessed_five[i][j][k][l][m].append(0)


In [46]:
with open("/kuacc/users/bozyurt20/hpc_run/Sherlock Holmes/Path Creation/Results/five_entities_moved.txt", "wb") as f:
    pickle.dump(correctly_guessed_five, f)

# Six Entities to Move

In [47]:
correctly_guessed_six = {}
for i in range(num_prev_tokens):
        
    correctly_guessed_six[i] = {}

    for j in range(i+1, num_prev_tokens):

        correctly_guessed_six[i][j] = {}

        for k in range(j+1, num_prev_tokens):

            correctly_guessed_six[i][j][k] = {}

            for l in range(k+1, num_prev_tokens):

                correctly_guessed_six[i][j][k][l] = {}

                for m in range(l+1, num_prev_tokens):

                    correctly_guessed_six[i][j][k][l][m] = defaultdict(list)

for data_point in data_points_one_sentence:
    prev_context = data_point.prev_context
    current_context = data_point.current_context
    answer = data_point.city_1
    entities_hidden_states = data_point.encoding
    
    prev_tokens = tokenizer.encode(prev_context, return_tensors="pt")
    num_prev_tokens = len(prev_tokens[0])
    
    current_context_ids = tokenizer.encode(current_context, return_tensors="pt")
    
    for i in range(num_prev_tokens):
        
        for j in range(i+1, num_prev_tokens):
            
            for k in range(j+1, num_prev_tokens):
                
                for l in range(k+1, num_prev_tokens):
                    
                    for m in range(l+1, num_prev_tokens):
                        
                        for n in range(m+1, num_prev_tokens):

                            new_context_ids = torch.cat((prev_tokens[:,i:i+1], 
                                                         prev_tokens[:,j:j+1],
                                                         prev_tokens[:,k:k+1],
                                                         prev_tokens[:,l:l+1],
                                                         prev_tokens[:,m:m+1],
                                                         prev_tokens[:,n:n+1],
                                                         current_context_ids), dim=1).cuda()
                            
                            entity_inds = list(range(6))

                            entity_hidden_states = torch.cat((entities_hidden_states[i:i+1, :, :],
                                                              entities_hidden_states[j:j+1, :, :],
                                                              entities_hidden_states[k:k+1, :, :],
                                                              entities_hidden_states[l:l+1, :, :],
                                                              entities_hidden_states[m:m+1, :, :],
                                                              entities_hidden_states[n:n+1, :, :]), dim=0)

                            out = model.generate(
                                    input_ids=new_context_ids,
                                    entity_hidden_states=entity_hidden_states,
                                    entity_inds=entity_inds,
                                    max_new_tokens=10,
                                    #return_dict_in_generate=True,
                                    #output_scores=True
                                ) 

                            out_generated = tokenizer.decode(out[0], skip_special_tokens=True)

                            if answer in out_generated:
                                correctly_guessed_six[i][j][k][l][m][n].append(1)
                            else:
                                correctly_guessed_six[i][j][k][l][m][n].append(0)

In [48]:
with open("/kuacc/users/bozyurt20/hpc_run/Sherlock Holmes/Path Creation/Results/six_entities_moved.txt", "wb") as f:
    pickle.dump(correctly_guessed_six, f)

# Seven Entities to Move

In [49]:

correctly_guessed_seven = []

for data_point in data_points_one_sentence:
    prev_context = data_point.prev_context
    current_context = data_point.current_context
    answer = data_point.city_1
    entities_hidden_states = data_point.encoding
    
    prev_tokens = tokenizer.encode(prev_context, return_tensors="pt")
    num_prev_tokens = len(prev_tokens[0])
    
    current_context_ids = tokenizer.encode(current_context, return_tensors="pt")
    
    new_context_ids = torch.cat((prev_tokens,
                                 current_context_ids), dim=1).cuda()

    entity_inds = list(range(7))

    entity_hidden_states = entities_hidden_states

    out = model.generate(
            input_ids=new_context_ids,
            entity_hidden_states=entity_hidden_states,
            entity_inds=entity_inds,
            max_new_tokens=10,
            #return_dict_in_generate=True,
            #output_scores=True
        ) 

    out_generated = tokenizer.decode(out[0], skip_special_tokens=True)

    if answer in out_generated:
        correctly_guessed_seven.append(1)
    else:
        correctly_guessed_seven.append(0)

        
        

In [50]:
with open("/kuacc/users/bozyurt20/hpc_run/Sherlock Holmes/Path Creation/Results/seven_entities_moved.txt", "wb") as f:
    pickle.dump(correctly_guessed_seven, f)

# All Results

In [63]:

all_results = []

for i in range(7):
    my_list = correctly_guessed[i]
    avg_list = sum(my_list)/len(my_list)
    all_results.append(([i], avg_list))

for i in range(7):
    for j in range(i+1, 7):
        my_list = correctly_guessed_two[i][j]
        avg_list = sum(my_list)/len(my_list)
        all_results.append(([i, j], avg_list))
        
for i in range(7):
    for j in range(i+1, 7):
        for k in range(j+1, 7):
            my_list = correctly_guessed_three[i][j][k]
            avg_list = sum(my_list)/len(my_list)
            all_results.append(([i, j, k], avg_list))

for i in range(7):
    for j in range(i+1, 7):
        for k in range(j+1, 7):
            for l in range(k+1, 7):
                my_list = correctly_guessed_four[i][j][k][l]
                avg_list = sum(my_list)/len(my_list)
                all_results.append(([i, j, k, l], avg_list))
                
for i in range(7):
    for j in range(i+1, 7):
        for k in range(j+1, 7):
            for l in range(k+1, 7):
                for m in range(l+1, 7):
                    my_list = correctly_guessed_five[i][j][k][l][m]
                    avg_list = sum(my_list)/len(my_list)
                    all_results.append(([i, j, k, l, m], avg_list))

for i in range(7):
    for j in range(i+1, 7):
        for k in range(j+1, 7):
            for l in range(k+1, 7):
                for m in range(l+1, 7):
                    for n in range(m+1, 7):
                        my_list = correctly_guessed_six[i][j][k][l][m][n]
                        avg_list = sum(my_list)/len(my_list)
                        all_results.append(([i, j, k, l, m, n], avg_list))
                        
avg_seven = sum(correctly_guessed_seven)/len(correctly_guessed_seven)
all_results.append(([0, 1, 2, 3, 4, 5, 6], avg_seven))

avg_zero = sum(correctly_guessed_zero[6])/len(correctly_guessed_zero[6])
all_results.append(([], avg_zero))

In [64]:
len(all_results)

128

In [65]:
all_results

[([0], 0.01),
 ([1], 0.0),
 ([2], 0.01),
 ([3], 0.01),
 ([4], 0.64),
 ([5], 0.0),
 ([6], 0.0),
 ([0, 1], 0.01),
 ([0, 2], 0.02),
 ([0, 3], 0.01),
 ([0, 4], 0.58),
 ([0, 5], 0.0),
 ([0, 6], 0.01),
 ([1, 2], 0.02),
 ([1, 3], 0.01),
 ([1, 4], 0.72),
 ([1, 5], 0.0),
 ([1, 6], 0.0),
 ([2, 3], 0.02),
 ([2, 4], 0.71),
 ([2, 5], 0.0),
 ([2, 6], 0.02),
 ([3, 4], 0.7),
 ([3, 5], 0.0),
 ([3, 6], 0.01),
 ([4, 5], 0.64),
 ([4, 6], 0.72),
 ([5, 6], 0.0),
 ([0, 1, 2], 0.03),
 ([0, 1, 3], 0.01),
 ([0, 1, 4], 0.65),
 ([0, 1, 5], 0.0),
 ([0, 1, 6], 0.01),
 ([0, 2, 3], 0.03),
 ([0, 2, 4], 0.67),
 ([0, 2, 5], 0.0),
 ([0, 2, 6], 0.03),
 ([0, 3, 4], 0.7),
 ([0, 3, 5], 0.01),
 ([0, 3, 6], 0.01),
 ([0, 4, 5], 0.6),
 ([0, 4, 6], 0.69),
 ([0, 5, 6], 0.0),
 ([1, 2, 3], 0.02),
 ([1, 2, 4], 0.69),
 ([1, 2, 5], 0.0),
 ([1, 2, 6], 0.02),
 ([1, 3, 4], 0.73),
 ([1, 3, 5], 0.0),
 ([1, 3, 6], 0.01),
 ([1, 4, 5], 0.68),
 ([1, 4, 6], 0.71),
 ([1, 5, 6], 0.0),
 ([2, 3, 4], 0.74),
 ([2, 3, 5], 0.01),
 ([2, 3, 6], 0.02),
 ([

In [66]:
with open("/kuacc/users/bozyurt20/hpc_run/Sherlock Holmes/Path Creation/Results/all_results.txt", "wb") as f:
    pickle.dump(all_results, f)

In [67]:
all_results.sort(key=lambda x: x[1])

In [68]:
all_results

[([1], 0.0),
 ([5], 0.0),
 ([6], 0.0),
 ([0, 5], 0.0),
 ([1, 5], 0.0),
 ([1, 6], 0.0),
 ([2, 5], 0.0),
 ([3, 5], 0.0),
 ([5, 6], 0.0),
 ([0, 1, 5], 0.0),
 ([0, 2, 5], 0.0),
 ([0, 5, 6], 0.0),
 ([1, 2, 5], 0.0),
 ([1, 3, 5], 0.0),
 ([1, 5, 6], 0.0),
 ([2, 5, 6], 0.0),
 ([3, 5, 6], 0.0),
 ([0, 1, 3, 5], 0.0),
 ([0, 1, 5, 6], 0.0),
 ([1, 2, 5, 6], 0.0),
 ([1, 3, 5, 6], 0.0),
 ([], 0.0),
 ([0], 0.01),
 ([2], 0.01),
 ([3], 0.01),
 ([0, 1], 0.01),
 ([0, 3], 0.01),
 ([0, 6], 0.01),
 ([1, 3], 0.01),
 ([3, 6], 0.01),
 ([0, 1, 3], 0.01),
 ([0, 1, 6], 0.01),
 ([0, 3, 5], 0.01),
 ([0, 3, 6], 0.01),
 ([1, 3, 6], 0.01),
 ([2, 3, 5], 0.01),
 ([0, 3, 5, 6], 0.01),
 ([2, 3, 5, 6], 0.01),
 ([0, 1, 3, 5, 6], 0.01),
 ([1, 2, 3, 5, 6], 0.01),
 ([0, 2], 0.02),
 ([1, 2], 0.02),
 ([2, 3], 0.02),
 ([2, 6], 0.02),
 ([1, 2, 3], 0.02),
 ([1, 2, 6], 0.02),
 ([2, 3, 6], 0.02),
 ([0, 1, 2, 5], 0.02),
 ([0, 1, 3, 6], 0.02),
 ([0, 2, 5, 6], 0.02),
 ([1, 2, 3, 5], 0.02),
 ([0, 1, 2, 5, 6], 0.02),
 ([0, 1, 2, 3, 5, 6], 

# To Be Deleted

In [65]:
class PathResult():
    def __init__(self, prompt, probability_correct, probability_wrong, max_prob):
        self.prompt = prompt
        self.probability_correct = probability_correct
        self.probability_wrong = probability_wrong
        self.max_prob = max_prob

In [73]:
context_previous = "John travelled to Oslo. Emma travelled to Sydney."
input_ids = tokenizer.encode(context_previous, return_tensors="pt").to(model.encoder.device)
len_input_ids = len(input_ids[0])
with torch.no_grad():
    out = model.encoder(input_ids, output_special=True)
    
special_hidden = out.special_hidden_states # 24 x (1, T, d)
special_reformatted = torch.zeros(num_layers, len_input_ids, d_model) # (24, T, d)
for i, hidden in enumerate(special_hidden):
    special_reformatted[i:i+1, :, :] = hidden

entities_hidden_states = special_reformatted[:, :-1, :]
entities_hidden_states = entities_hidden_states.permute(1, 0, 2) # T, 24, d

new code working-modeling_t5


In [74]:
entities_hidden_states.shape

torch.Size([12, 24, 4096])

In [66]:
context_current = " Lucas was 30 years old. "
question_1 = "Where did John travel to?"
question_2 = "Where did Emma travel to?"
answer_1 = "Oslo"
answer_2 = "Sydney"
results = [];
def my_pipeline(prompt, entity_inds, entity_hidden_states, answer_correct, answer_wrong):
    input_ids = tokenizer.encode(prompt, return_tensors="pt").cuda()
    out = model.generate(
            input_ids=input_ids,
            entity_hidden_states=entity_hidden_states,
            entity_inds=entity_inds,
            max_new_tokens=1,
            return_dict_in_generate=True,
            output_scores=True
        ) 
    next_token_scores = torch.nn.functional.softmax(
                        out.scores[0].float(), dim=-1
                    )  # (batch_size * num_beams, vocab_size)
    probability_correct = next_token_scores[0][tokenizer.encode(answer_correct)[0]].item()
    probability_wrong = next_token_scores[0][tokenizer.encode(answer_wrong)[0]].item()
    max_prob = next_token_scores[0,next_token_scores.argmax().item()].item()
    return probability_correct, probability_wrong, max_prob

entity_inds = list(range(12))
prompt = context_previous + context_current + question_1
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states, answer_1, answer_2)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

prompt = context_previous + context_current + question_2
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states, answer_2, answer_1)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

entity_inds = list(range(11))
entities_hidden_states_new = entities_hidden_states[:11, :, :]
prompt = context_previous[:-1] + context_current + question_1
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_1, answer_2)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

prompt = context_previous[:-1] + context_current + question_2
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_2, answer_1)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))



new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5


In [69]:
entity_inds = list(range(10))
entities_hidden_states_new = torch.cat((entities_hidden_states[:5, :, :], entities_hidden_states[6:11, :, :]), dim=0)
prompt = "John travelled to Oslo Emma travelled to Sydney" + context_current + question_1
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_1, answer_2)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

prompt = "John travelled to Oslo Emma travelled to Sydney" + context_current + question_2
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_2, answer_1)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5


In [75]:
entity_inds = list(range(9))
entities_hidden_states_new = torch.cat((entities_hidden_states[:3, :, :], entities_hidden_states[4:5, :, :], entities_hidden_states[6:11, :, :]), dim=0)
prompt = "John travelled Oslo Emma travelled to Sydney" + context_current + question_1
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_1, answer_2)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

prompt = "John travelled Oslo Emma travelled to Sydney" + context_current + question_2
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_2, answer_1)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

entity_inds = list(range(8))
entities_hidden_states_new = torch.cat((entities_hidden_states[:3, :, :], 
                                    entities_hidden_states[4:5, :, :], 
                                    entities_hidden_states[6:9, :, :],
                                    entities_hidden_states[10:11, :, :]), dim=0)
prompt = "John travelled Oslo Emma travelled Sydney" + context_current + question_1
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_1, answer_2)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

prompt = "John travelled Oslo Emma travelled Sydney" + context_current + question_2
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_2, answer_1)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5


In [77]:
entity_inds = list(range(6))
entities_hidden_states_new = torch.cat((entities_hidden_states[:1, :, :], 
                                        entities_hidden_states[4:5, :, :],
                                        entities_hidden_states[6:9, :, :],
                                        entities_hidden_states[10:11, :, :]), dim=0)
prompt = "John Oslo Emma travelled Sydney" + context_current + question_1
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_1, answer_2)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

prompt = "John Oslo Emma travelled Sydney" + context_current + question_2
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_2, answer_1)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

entity_inds = list(range(4))
entities_hidden_states_new = torch.cat((entities_hidden_states[:1, :, :], 
                                        entities_hidden_states[4:5, :, :],
                                        entities_hidden_states[6:7, :, :],
                                        entities_hidden_states[10:11, :, :]), dim=0)
prompt = "John Oslo Emma Sydney" + context_current + question_1
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_1, answer_2)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

prompt = "John Oslo Emma Sydney" + context_current + question_2
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_2, answer_1)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5


In [80]:
entity_inds = list(range(8))
entities_hidden_states_new = torch.cat((entities_hidden_states[:1, :, :], entities_hidden_states[3:5, :, :], entities_hidden_states[6:11, :, :]), dim=0)
prompt = "John to Oslo Emma travelled to Sydney" + context_current + question_1
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_1, answer_2)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

prompt = "John to Oslo Emma travelled to Sydney" + context_current + question_2
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_2, answer_1)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

entity_inds = list(range(6))
entities_hidden_states_new = torch.cat((entities_hidden_states[:1, :, :], 
                                    entities_hidden_states[3:5, :, :], 
                                    entities_hidden_states[6:7, :, :],
                                    entities_hidden_states[9:11, :, :]), dim=0)
prompt = "John to Oslo Emma to Sydney" + context_current + question_1
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_1, answer_2)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

prompt = "John to Oslo Emma to Sydney" + context_current + question_2
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_2, answer_1)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5


In [82]:
entity_inds = list(range(5))
entities_hidden_states_new = torch.cat((entities_hidden_states[:1, :, :], 
                                        entities_hidden_states[4:5, :, :], 
                                        entities_hidden_states[6:7, :, :],
                                        entities_hidden_states[9:11, :, :]), dim=0)
prompt = "John Oslo Emma to Sydney" + context_current + question_1
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_1, answer_2)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

prompt = "John Oslo Emma to Sydney" + context_current + question_2
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_2, answer_1)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5


In [84]:
entity_inds = list(range(7))
entities_hidden_states_new = torch.cat((entities_hidden_states[:1, :, :], entities_hidden_states[4:5, :, :], entities_hidden_states[6:11, :, :]), dim=0)
prompt = "John Oslo Emma travelled to Sydney" + context_current + question_1
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_1, answer_2)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

prompt = "John Oslo Emma travelled to Sydney" + context_current + question_2
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_2, answer_1)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

entity_inds = list(range(7))
entities_hidden_states_new = torch.cat((entities_hidden_states[:5, :, :], 
                                    entities_hidden_states[6:7, :, :], 
                                    entities_hidden_states[10:11, :, :]), dim=0)
prompt = "John travelled to Oslo Emma Sydney" + context_current + question_1
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_1, answer_2)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

prompt = "John travelled to Oslo Emma Sydney" + context_current + question_2
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_2, answer_1)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

entity_inds = list(range(7))
entities_hidden_states_new = torch.cat((entities_hidden_states[:1, :, :], 
                                        entities_hidden_states[3:5, :, :], 
                                        entities_hidden_states[6:9, :, :],
                                        entities_hidden_states[10:11, :, :]), dim=0)
prompt = "John to Oslo Emma travelled Sydney" + context_current + question_1
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_1, answer_2)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

prompt = "John to Oslo Emma travelled Sydney" + context_current + question_2
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_2, answer_1)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

entity_inds = list(range(7))
entities_hidden_states_new = torch.cat((entities_hidden_states[:3, :, :], 
                                        entities_hidden_states[4:5, :, :], 
                                        entities_hidden_states[6:7, :, :],
                                        entities_hidden_states[9:11, :, :]), dim=0)
prompt = "John travelled Oslo Emma to Sydney" + context_current + question_1
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_1, answer_2)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

prompt = "John travelled Oslo Emma to Sydney" + context_current + question_2
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_2, answer_1)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5


In [85]:
for result in results:
    print(result.prompt)
    print(result.probability_correct)
    print(result.probability_wrong)
    print(result.max_prob)
    print()

John travelled to Oslo. Emma travelled to Sydney. Lucas was 30 years old. Where did John travel to?
0.2510606646537781
0.16400770843029022
0.2510606646537781

John travelled to Oslo. Emma travelled to Sydney. Lucas was 30 years old. Where did Emma travel to?
0.4167127311229706
0.01614193432033062
0.4167127311229706

John travelled to Oslo. Emma travelled to Sydney Lucas was 30 years old. Where did John travel to?
0.3200170695781708
0.09942752867937088
0.3200170695781708

John travelled to Oslo. Emma travelled to Sydney Lucas was 30 years old. Where did Emma travel to?
0.3432735204696655
0.01956755854189396
0.3432735204696655

John travelled to Oslo Emma travelled to Sydney Lucas was 30 years old. Where did John travel to?
0.23133867979049683
0.10086718946695328
0.23133867979049683

John travelled to Oslo Emma travelled to Sydney Lucas was 30 years old. Where did Emma travel to?
0.29553642868995667
0.015663813799619675
0.29553642868995667

John travelled Oslo Emma travelled to Sydney Lu

In [61]:
context_current = " Lucas was 30 years old. "
question_1 = "Where did John travel to?"
question_2 = "Where did Emma travel to?"

prompt_2 = context_previous + context_current + question_2

input_ids = tokenizer.encode(prompt_2, return_tensors="pt")
entity_inds = list(range(12))
out = model.generate(
        input_ids=input_ids,
        entity_hidden_states=entities_hidden_states,
        entity_inds=entity_inds,
        max_new_tokens=1,
        return_dict_in_generate=True,
        output_scores=True
    ) 
next_token_scores = torch.nn.functional.softmax(
                    out.scores[0].float(), dim=-1
                )  # (batch_size * num_beams, vocab_size)

answer_correct = "Sydney"
answer_wrong = "Oslo"
probability_correct = next_token_scores[0][tokenizer.encode(answer_correct)[0]].item()
probability_wrong = next_token_scores[0][tokenizer.encode(answer_wrong)[0]].item()

probability_correct, probability_wrong

new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5


(0.4167155921459198, 0.016142046079039574)

In [62]:
next_token_scores[0,next_token_scores.argmax().item()].item()

0.4167155921459198

In [49]:
context_current = " Lucas was 30 years old. "
question_1 = "Where did John travel to?"
question_2 = "Where did Emma travel to?"

prompt_1 = context_previous + context_current + question_1

input_ids = tokenizer.encode(prompt_1, return_tensors="pt")
entity_inds = list(range(12))
out = model.generate(
        input_ids=input_ids,
        #entity_hidden_states=entities_hidden_states,
        #entity_inds=entity_inds,
        max_new_tokens=1,
        return_dict_in_generate=True,
        output_scores=True
    ) 
next_token_scores = torch.nn.functional.softmax(
                    out.scores[0].float(), dim=-1
                )  # (batch_size * num_beams, vocab_size)

answer_correct = "Oslo"
answer_wrong = "Sydney"
probability_correct = next_token_scores[0][tokenizer.encode(answer_correct)[0]].item()
probability_wrong = next_token_scores[0][tokenizer.encode(answer_wrong)[0]].item()

probability_correct, probability_wrong

new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5


In [51]:
context_current = " Lucas was 30 years old. "
question_1 = "Where did John travel to?"
question_2 = "Where did Emma travel to?"

prompt_1 = context_current + question_1

input_ids = tokenizer.encode(prompt_1, return_tensors="pt")
entity_inds = list(range(12))
out = model.generate(
        input_ids=input_ids,
        #entity_hidden_states=entities_hidden_states,
        #entity_inds=entity_inds,
        max_new_tokens=1,
        return_dict_in_generate=True,
        output_scores=True
    ) 
next_token_scores = torch.nn.functional.softmax(
                    out.scores[0].float(), dim=-1
                )  # (batch_size * num_beams, vocab_size)

answer_correct = "Oslo"
answer_wrong = "Sydney"
probability_correct = next_token_scores[0][tokenizer.encode(answer_correct)[0]].item()
probability_wrong = next_token_scores[0][tokenizer.encode(answer_wrong)[0]].item()

probability_correct, probability_wrong

new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5


(0.00019699727999977767, 0.0060422383248806)

In [None]:
template = tm11
context_current = "Lucas was 30 years old."

context_previous = "John travelled to Oslo. Emma travelled to Sydney."
char_1 = "John"
char_2 = "Emma"
answer_1 = "Oslo"
answer_2 = "Sydney"

question_1 = "Where did John travel to?"
question_2 = "Where did Emma travel to?"

entities_list = ["John", " ", "travelled", "to", "Oslo", ".", "Emma", " ", "travelled", "to", "Sydney", "."]

entity_hidden_states = previous_timestep(context_previous,  entities_list)

to_prepend = " ".join(entities_list)

prompt_current = to_prepend + " " + context_current

probability_correct_1, probability_wrong_1, _ = current_timestep_regular(prompt_current, question_1, answer_1, answer_2, template)
probability_correct_2, probability_wrong_2, _ = current_timestep_regular(prompt_current, question_2, answer_2, answer_1, template)

probability_correct_enhanced_1, probability_wrong_enhanced_1, _ = current_timestep_enhanced(prompt_current, 
                                                                      question_1, 
                                                                      answer_1, 
                                                                      answer_2,
                                                                      template, 
                                                                      entities_list, 
                                                                      entity_hidden_states)

probability_correct_enhanced_2, probability_wrong_enhanced_2, _ = current_timestep_enhanced(prompt_current, 
                                                                      question_2, 
                                                                      answer_2,
                                                                      answer_1,
                                                                      template, 
                                                                      entities_list, 
                                                                      entity_hidden_states)

improvement_correct_1 = probability_correct_enhanced_1 - probability_correct_1
improvement_correct_2 = probability_correct_enhanced_2 - probability_correct_2
improvement_wrong_1 = probability_wrong_enhanced_1 - probability_wrong_1
improvement_wrong_2 = probability_wrong_enhanced_2 - probability_wrong_2
improvements_correct_1.append( improvement_correct_1 )
improvements_correct_2.append( improvement_correct_2 )
improvements_wrong_1.append( improvement_wrong_1 )
improvements_wrong_2.append( improvement_wrong_2 )

#NOT RUN

if probability_correct_1 > probability_wrong_1:
    correct_guesses.append(1)
else:
    correct_guesses.append(0)
if probability_correct_enhanced_1 > probability_wrong_enhanced_1:
    correct_guesses.append(1)
else:
    correct_guesses.append(0) 

# Open-Ended Generation - Two chars two locs

In [3]:
def find_index_one(input_ids, entity_str, index):
    
    entity_id = tokenizer.encode(entity_str)
    
    if len(entity_id) != 2:
        print("Not an appropriate entity!")
        return
    
    entity_id = entity_id[0]
    
    input_ids_list = input_ids.tolist()

    all_entity_mention_indices = []
    for i, j in enumerate(input_ids_list[0]):
        if j == entity_id:
            all_entity_mention_indices.append(i)
    try:
        entity_ind = all_entity_mention_indices[index]
        return entity_ind
    except:
        print("entity not found in the input!")
        return

In [4]:
def previous_timestep(context, entities):
    
    input_ids = tokenizer.encode(context, return_tensors="pt")
    len_input_ids = len(input_ids[0])
    out = model.encoder(input_ids, output_special=True, output_hidden_states=True)
    special_hidden = out.special_hidden_states # 24 x (1, T, d)

    special_reformatted = torch.zeros(num_layers, len_input_ids, d_model) # (24, T, d)
    for i, hidden in enumerate(special_hidden):
        special_reformatted[i:i+1, :, :] = hidden
    
    entity_ind = find_index_one(input_ids, entities[0], 0)
    entities_hidden_states = special_reformatted[:, entity_ind, :].unsqueeze(0)

    for entity in entities[1:]:
        entity_ind = find_index_one(input_ids, entity, 0)
        entity_hidden_states = special_reformatted[:, entity_ind, :].unsqueeze(0)
        entities_hidden_states = torch.cat((entities_hidden_states,
                                       entity_hidden_states), dim=0)
    
    return entities_hidden_states

def current_timestep_regular(context, question, answer_correct, answer_wrong, template):
    
    prompt = template.render(context=context, question=question)
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    out = model.generate(input_ids, max_new_tokens=1, return_dict_in_generate=True, output_scores=True)
    next_token_scores = torch.nn.functional.softmax(
                    out.scores[0].float(), dim=-1
                )  # (batch_size * num_beams, vocab_size)

    probability_correct = next_token_scores[0][tokenizer.encode(answer_correct)[0]].item()
    probability_wrong = next_token_scores[0][tokenizer.encode(answer_wrong)[0]].item()

    scores = []
    for i, score in enumerate(next_token_scores[0]):
        scores.append( (i, score.item()) )

    scores.sort(key=lambda x: x[1], reverse=True)
    scores = [(tokenizer.decode(a), b) for a, b in scores]

    return probability_correct, probability_wrong, scores

def current_timestep_enhanced(context, question, answer_correct, answer_wrong, template, entities, entity_hidden_states):
    
    prompt = template.render(context=context, question=question)
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    entity_inds = []
    for entity in entities:
        entity_inds.append(find_index_one(input_ids, entity, 0))
        
    out = model.generate(
        input_ids=input_ids,
        entity_hidden_states=entity_hidden_states,
        entity_inds=entity_inds,
        max_new_tokens=1,
        return_dict_in_generate=True,
        output_scores=True
    ) 
    next_token_scores = torch.nn.functional.softmax(
                    out.scores[0].float(), dim=-1
                )  # (batch_size * num_beams, vocab_size)
    
    probability_correct = next_token_scores[0][tokenizer.encode(answer_correct)[0]].item()
    probability_wrong = next_token_scores[0][tokenizer.encode(answer_wrong)[0]].item()

    scores = []
    for i, score in enumerate(next_token_scores[0]):
        scores.append( (i, score.item()) )

    scores.sort(key=lambda x: x[1], reverse=True)
    scores = [(tokenizer.decode(a), b) for a, b in scores]

    return probability_correct, probability_wrong, scores


In [17]:
len(data_points)

625

In [20]:
template = tm11
context_current = "Lucas was 30 years old."
improvements_correct_1 = []
improvements_correct_2 = []
improvements_wrong_1 = []
improvements_wrong_2 = []
for data_point in data_points:
    
    char_1 = data_point.name_1
    char_2 = data_point.name_2
    answer_1 = data_point.city_1
    answer_2 = data_point.city_2
    context_previous = data_point.context
    
    question_1 = "Where did " + char_1 + " travel to?"
    question_2 = "Where did " + char_2 + " travel to?"
    
    entities_list = [char_1, char_2, answer_1, answer_2]
    
    entity_hidden_states = previous_timestep(context_previous,  entities_list)
    
    to_prepend = " ".join(entities_list)
    
    prompt_current = to_prepend + " " + context_current
    
    probability_correct_1, probability_wrong_1, _ = current_timestep_regular(prompt_current, question_1, answer_1, answer_2, template)
    probability_correct_2, probability_wrong_2, _ = current_timestep_regular(prompt_current, question_2, answer_2, answer_1, template)
    
    probability_correct_enhanced_1, probability_wrong_enhanced_1, _ = current_timestep_enhanced(prompt_current, 
                                                                          question_1, 
                                                                          answer_1, 
                                                                          answer_2,
                                                                          template, 
                                                                          entities_list, 
                                                                          entity_hidden_states)
    
    probability_correct_enhanced_2, probability_wrong_enhanced_2, _ = current_timestep_enhanced(prompt_current, 
                                                                          question_2, 
                                                                          answer_2,
                                                                          answer_1,
                                                                          template, 
                                                                          entities_list, 
                                                                          entity_hidden_states)
    
    improvement_correct_1 = probability_correct_enhanced_1 - probability_correct_1
    improvement_correct_2 = probability_correct_enhanced_2 - probability_correct_2
    improvement_wrong_1 = probability_wrong_enhanced_1 - probability_wrong_1
    improvement_wrong_2 = probability_wrong_enhanced_2 - probability_wrong_2
    improvements_correct_1.append( improvement_correct_1 )
    improvements_correct_2.append( improvement_correct_2 )
    improvements_wrong_1.append( improvement_wrong_1 )
    improvements_wrong_2.append( improvement_wrong_2 )
    
    #NOT RUN
    
    if probability_correct_1 > probability_wrong_1:
        correct_guesses.append(1)
    else:
        correct_guesses.append(0)
    if probability_correct_enhanced_1 > probability_wrong_enhanced_1:
        correct_guesses.append(1)
    else:
        correct_guesses.append(0) 

new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-modeling_t5
new code working-utils
new code working-m

In [None]:
with open("/kuacc/users/bozyurt20/hpc_run/predictions/holmes_two_char_two_locs.txt", "wb") as f:
    pickle.dump(results_dict, f)

In [None]:
with open("/kuacc/users/bozyurt20/hpc_run/predictions/holmes_two_char_two_locs.txt", "rb") as f:
    results_dict = pickle.load(f)

# Templates

In [5]:
# Multiple Choice

tm1 = Template("""Read the following context and choose the best option to answer the question.
Context: {{ context }}
Question: {{ question }}
Options:
 - {{ answer_choices | join("\n - ") }}""")

tm2 = Template("""{{ context }}
{{ question }}
- {{ answer_choices | join("\n- ") }}""")

tm3 = Template("""{{ context }}
{{ question }}
Pick the correct answer from the following options:
- {{ answer_choices | join("\n- ") }}""")

tm4 = Template("""{{ context }}
According to the above context, choose the best option to answer the following question.
Question: {{ question }}
Options:
- {{answer_choices | join("\n - ")}}
""")

tm5 = Template("""{{ context }}
{{ question }}
Pick the best answer from the following options:
A. {{ answer0 }}
B. {{ answer1 }}
C. {{ answer2 }}
D. {{ answer3 }}""")

tm6 = Template("""{{ context }}
According to the above context, choose the best option to answer the following question.
Question: {{ question }}
Options:
A. {{ answer0 }}
B. {{ answer1 }}
C. {{ answer2 }}
D. {{ answer3 }}""")

tm7 = Template("""{{ context }}
{{ question }}
A. {{ answer0 }}
B. {{ answer1 }}
C. {{ answer2 }}
D. {{ answer3 }}""")

# Open-Ended

tm8 = Template("""Question: "{{question}}"
Context: "{{context}}"
Answer:""")

tm9 = Template("""{{ context }}
Given the paragraph above, please answer correctly the following
question:
{{ question }}""")

tm10 = Template("""Given the following passage
"{{context}}",
answer the following question. Note that the answer is present within
the text.
Question: {{question}}""")

tm11 = Template("{{context}} What is the answer to: {{question}}")

In [15]:
import sys
sys.path.append('/kuacc/users/bozyurt20/.conda/envs/hf/lib/python3.8/site-packages/transformers/models/')
cp modeling_t5.py /kuacc/users/bozyurt20/.conda/envs/hf/lib/python3.8/site-packages/transformers/models/t5/
cp utils.py /kuacc/users/bozyurt20/.conda/envs/hf/lib/python3.8/site-packages/transformers/generation/
cp modeling_outputs.py /kuacc/users/bozyurt20/.conda/envs/hf/lib/python3.8/site-packages/transformers/

SyntaxError: invalid syntax (676971914.py, line 3)