# Necessary Imports and Settings

In [1]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import transformers
import torch
import os
import nltk
import pandas as pd
import torch
import numpy as np
from jinja2 import Template
import xmltodict
import pickle
from collections import defaultdict
from fuzzywuzzy import fuzz
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import time
import copy

import sys
sys.path.append('/scratch/users/bozyurt20/hpc_run/utilities')
sys.path.append("/scratch/users/bozyurt20/hpc_run/blobs/")
from util_research import *

#from toy_dataset import contexts

max_len = 512
num_layers = 24
d_model = 4096

tokenizer = AutoTokenizer.from_pretrained("bigscience/T0pp", truncation_side="right", add_prefix_space=True)

In [2]:
model = AutoModelForSeq2SeqLM.from_pretrained("bigscience/T0pp", device_map="balanced", load_in_8bit=True)




Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
CUDA SETUP: CUDA runtime path found: /kuacc/users/bozyurt20/.conda/envs/hf/lib/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 7.5
CUDA SETUP: Detected CUDA version 110
CUDA SETUP: Loading binary /kuacc/users/bozyurt20/.conda/envs/hf/lib/python3.8/site-packages/bitsandbytes/libbitsandbytes_cuda110.so...


In [4]:
names_1 = ["John", "Harry", "Andrew", "Lisa", "Mary"]
names_2 = ["Henry", "David", "Sophia", "Olivia", "Emma"]

cities_1 = ["London", "Paris", "Oslo", "Istanbul", "Beijing"]
cities_2 = ["Sydney", "Cairo", "Seoul", "Rome", "Prague"]

class TwoSentenceMoveResult():
    def __init__ (self, data_point, prev_context, answer_1, answer_2, model_out_1, model_out_2, out_generated_1, out_generated_2):
        
        self.data_point = data_point
        self.prev_context = prev_context
        self.answer_1 = answer_1
        self.answer_2 = answer_2   
        self.model_out_1 = model_out_1
        self.model_out_2 = model_out_2
        self.out_generated_1 = out_generated_1
        self.out_generated_2 = out_generated_2

class DataSample_TwoSentences():
    def __init__(self, name_1, name_2, city_1, city_2):
        self.name_1 = name_1
        self.name_2 = name_2
        self.city_1 = city_1
        self.city_2 = city_2
        self.prev_context = name_1 + " travelled to " + city_1 + ". " + name_2 + " travelled to " + city_2 + "."
        self.current_context_1 = name_1 + " met Lucas. Lucas was 30 years old. Where is " + name_1 + "?"
        self.current_context_2 = name_2 + " met Lucas. Lucas was 30 years old. Where is " + name_2 + "?"
        
    def add_prev_encoding(self, encoding):
        self.prev_encoding = encoding
    def add_current_encoding_1(self, encoding):
        self.curr_encoding_1 = encoding
    def add_current_encoding_2(self, encoding):
        self.curr_encoding_2 = encoding
    def add_prev_special(self, encoding):
        self.prev_special = encoding

# Case 1 Pre-Encodings

In [21]:
data_points_two_sentences = []

for name_1 in names_1:
    for name_2 in names_2:
        for city_1 in cities_1:
            for city_2 in cities_2:
                
                sample = DataSample_TwoSentences(name_1, name_2, city_1, city_2)
                
                prev_context = sample.prev_context 
                current_context_1 = sample.current_context_1
                current_context_2 = sample.current_context_2
                
                prev_tokens = tokenizer.encode(prev_context, return_tensors="pt").to(model.encoder.device)
                with torch.no_grad():
                    encoded_prev = model.encoder(prev_tokens, output_special=True, output_hidden_states=True)
                    
                sample.add_prev_encoding(encoded_prev)
        
                data_points_two_sentences.append(sample)
        

# Case 2 Pre-Encodings

In [21]:
data_points_two_sentences = []

for name_1 in names_1:
    for name_2 in names_2:
        for city_1 in cities_1:
            for city_2 in cities_2:
                
                sample = DataSample_TwoSentences(name_1, name_2, city_1, city_2)
                
                prev_context = sample.prev_context 
                current_context_1 = sample.current_context_1
                current_context_2 = sample.current_context_2
                
                prev_tokens = tokenizer.encode(prev_context, return_tensors="pt").to(model.encoder.device)
                with torch.no_grad():
                    encoded_prev = model.encoder(prev_tokens)
                    
                sample.add_prev_encoding(encoded_prev)

                curr_tokens_1 = tokenizer.encode(current_context_1, return_tensors="pt").to(model.encoder.device)
                with torch.no_grad():
                    encoded_curr_1 = model.encoder(curr_tokens_1)
                sample.add_current_encoding_1(encoded_curr_1)
                
                curr_tokens_2 = tokenizer.encode(current_context_2, return_tensors="pt").to(model.encoder.device)
                with torch.no_grad():
                    encoded_curr_2 = model.encoder(curr_tokens_2)
                sample.add_current_encoding_2(encoded_curr_2)
        
                data_points_two_sentences.append(sample)
        

# Move: John London Mary Sydney

In [18]:
ids_to_move = [ 0, 4, 6, 10 ]

results = []

for num_data_point, data_point in enumerate(data_points_two_sentences):
    
    print("NOW PROCESSING:", num_data_point)
    start = time.time()
    prev_context = data_point.prev_context
    answer_1 = data_point.city_1
    answer_2 = data_point.city_2
    entity_embeddings = data_point.prev_encoding
    curr_embeddings_1 = data_point.curr_encoding_1
    curr_embeddings_2 = data_point.curr_encoding_2
    
    num_prev_tokens = entity_embeddings[0].shape[1]
    
    curr_embeddings_1_copy = copy.deepcopy(curr_embeddings_1)
    
    for i in ids_to_move:
        curr_embeddings_1_copy.last_hidden_state = torch.cat((entity_embeddings[0][:,i:i+1,:], curr_embeddings_1_copy.last_hidden_state), dim=1)
    
    model_out_1 = model.generate(encoder_outputs=curr_embeddings_1_copy, 
                               max_new_tokens=10,
                               return_dict_in_generate=True, 
                               output_scores=True)

    out_generated_1 = tokenizer.decode(model_out_1.sequences[0], skip_special_tokens=True)

    curr_embeddings_2_copy = copy.deepcopy(curr_embeddings_2)
    
    for i in ids_to_move:
        curr_embeddings_2_copy.last_hidden_state = torch.cat((entity_embeddings[0][:,i:i+1,:], curr_embeddings_2_copy.last_hidden_state), dim=1)
    
    model_out_2 = model.generate(encoder_outputs=curr_embeddings_2_copy, 
                               max_new_tokens=10,
                               return_dict_in_generate=True, 
                               output_scores=True)

    out_generated_2 = tokenizer.decode(model_out_2.sequences[0], skip_special_tokens=True)
        
    results.append(TwoSentenceMoveResult(data_point, prev_context, answer_1, answer_2, model_out_1, model_out_2, out_generated_1, out_generated_2))
        
    print("Time taken:", time.time()-start)
        

NOW PROCESSING: 0
Time taken: 0.9856138229370117
NOW PROCESSING: 1
Time taken: 0.5200128555297852
NOW PROCESSING: 2
Time taken: 0.9783487319946289
NOW PROCESSING: 3
Time taken: 0.49817347526550293
NOW PROCESSING: 4
Time taken: 0.7876453399658203
NOW PROCESSING: 5
Time taken: 0.5837066173553467
NOW PROCESSING: 6
Time taken: 0.5048396587371826
NOW PROCESSING: 7
Time taken: 0.5203790664672852
NOW PROCESSING: 8
Time taken: 0.5105226039886475
NOW PROCESSING: 9
Time taken: 0.5150046348571777
NOW PROCESSING: 10
Time taken: 0.7437753677368164
NOW PROCESSING: 11
Time taken: 0.9814291000366211
NOW PROCESSING: 12
Time taken: 0.9884402751922607
NOW PROCESSING: 13
Time taken: 0.7559261322021484
NOW PROCESSING: 14
Time taken: 0.9845845699310303
NOW PROCESSING: 15
Time taken: 0.7505283355712891
NOW PROCESSING: 16
Time taken: 0.9854686260223389
NOW PROCESSING: 17
Time taken: 0.9807381629943848
NOW PROCESSING: 18
Time taken: 0.6188819408416748
NOW PROCESSING: 19
Time taken: 0.9782288074493408
NOW PROCE

In [None]:
next_token_scores = torch.nn.functional.softmax(out_generated_1.scores[0], dim=-1)  # (batch_size * num_beams, vocab_size)

probability = next_token_scores[0][tokenizer.encode(answer)[0]].item()

scores = []
for i, score in enumerate(next_token_scores[0]):
    scores.append( (i, score.item()) )

scores.sort(key=lambda x: x[1], reverse=True)
scores = [(tokenizer.decode(a), b) for a, b in scores]

In [10]:
sum(correctly_guessed[i])/len(correctly_guessed[i])

0.224

In [19]:
sum(correctly_guessed)/len(correctly_guessed)

0.224

In [21]:
for result in results:
    prev_context = result.prev_context
    answer_1 = result.answer_1
    answer_2 = result.answer_2
    model_out_1 = result.model_out_1
    model_out_2 = result.model_out_2
    out_generated_1 = result.out_generated_1
    out_generated_2 = result.out_generated_2
    
    print(prev_context)
    print(out_generated_1)
    print(out_generated_2)
    print()

John travelled to London. Henry travelled to Sydney.
in Australia
in Australia

John travelled to London. Henry travelled to Cairo.
London
London

John travelled to London. Henry travelled to Seoul.
in the hospital
in the hospital

John travelled to London. Henry travelled to Rome.
London
London

John travelled to London. Henry travelled to Prague.
London
in the hospital

John travelled to Paris. Henry travelled to Sydney.
Paris
Paris

John travelled to Paris. Henry travelled to Cairo.
Paris
Paris

John travelled to Paris. Henry travelled to Seoul.
Paris
Paris

John travelled to Paris. Henry travelled to Rome.
Paris
Paris

John travelled to Paris. Henry travelled to Prague.
Paris
Paris

John travelled to Oslo. Henry travelled to Sydney.
in Australia
in Australia

John travelled to Oslo. Henry travelled to Cairo.
in the hospital
in the hospital

John travelled to Oslo. Henry travelled to Seoul.
in the hospital
in the hospital

John travelled to Oslo. Henry travelled to Rome.
in Rome
in 

## Case 1 way of moving previous tokens

In [None]:
correctly_guessed = []

entity_inds = list(range(4))

ids_to_move = [ 0, 4, 6, 10 ]

results_case1 = []

for num_data_point, data_point in enumerate(data_points_two_sentences):
    
    print("NOW PROCESSING:", num_data_point)
    start = time.time()
    prev_context = data_point.prev_context
    name_1 = data_point.name_1
    name_2 = data_point.name_2
    answer_1 = data_point.city_1
    answer_2 = data_point.city_2
    current_context_1 = data_point.current_context_1
    current_context_2 = data_point.current_context_2
    prev_encoding = data_point.prev_encoding
    
    special_hidden = prev_encoding.special_hidden_states # 24 x (1, T, d)
    len_input_ids = special_hidden[0].shape[1]

    special_reformatted = torch.zeros(num_layers, len_input_ids, d_model) # (24, T, d)
    for i, hidden in enumerate(special_hidden):
        special_reformatted[i:i+1, :, :] = hidden

    entities_hidden_states = special_reformatted[:, ids_to_move[0], :].unsqueeze(0)

    for tok_id in ids_to_move[1:]:
        entity_hidden_states = special_reformatted[:, tok_id, :].unsqueeze(0)
        entities_hidden_states = torch.cat((entities_hidden_states,
                                            entity_hidden_states), dim=0)
        
    prompt_1 = name_1 + " " + name_2 + " " + answer_1 + " " + answer_2 + " " + current_context_1
    input_ids = tokenizer.encode(prompt_1, return_tensors="pt").to(model.encoder.device)
    model_out_1 = model.generate(input_ids=input_ids, 
                                 max_new_tokens=10,
                                 entity_hidden_states=entities_hidden_states,
                                 entity_inds=entity_inds,
                                 return_dict_in_generate=True, 
                                 output_scores=True)
    
    prompt_2 = name_1 + " " + name_2 + " " + answer_1 + " " + answer_2 + " " + current_context_2
    input_ids = tokenizer.encode(prompt_2, return_tensors="pt").to(model.encoder.device)
    model_out_2 = model.generate(input_ids=input_ids, 
                                 max_new_tokens=10,
                                 entity_hidden_states=entities_hidden_states,
                                 entity_inds=entity_inds,
                                 return_dict_in_generate=True, 
                                 output_scores=True) 
    
    
    out_generated_1 = tokenizer.decode(model_out_1.sequences[0], skip_special_tokens=True)
    out_generated_2 = tokenizer.decode(model_out_2.sequences[0], skip_special_tokens=True)

    if answer_1 in out_generated_1 and answer_2 not in out_generated_1:
        correctly_guessed.append(1)
    else:
        correctly_guessed.append(0)

    if answer_2 in out_generated_2 and answer_1 not in out_generated_2:
        correctly_guessed.append(1)
    else:
        correctly_guessed.append(0)
        
    results_case1.append(TwoSentenceMoveResult(data_point, prev_context, answer_1, answer_2, model_out_1, model_out_2, out_generated_1, out_generated_2))
        
    print("Time taken:", time.time()-start)
        

NOW PROCESSING: 0
Time taken: 1.0440168380737305
NOW PROCESSING: 1
Time taken: 0.7726502418518066
NOW PROCESSING: 2
Time taken: 0.7771773338317871
NOW PROCESSING: 3
Time taken: 0.9555637836456299
NOW PROCESSING: 4
Time taken: 0.901167631149292
NOW PROCESSING: 5
Time taken: 0.7857871055603027
NOW PROCESSING: 6
Time taken: 0.7830791473388672
NOW PROCESSING: 7
Time taken: 0.7776367664337158
NOW PROCESSING: 8
Time taken: 0.7678039073944092
NOW PROCESSING: 9
Time taken: 0.7861144542694092
NOW PROCESSING: 10
Time taken: 0.9130289554595947
NOW PROCESSING: 11
Time taken: 1.0312602519989014
NOW PROCESSING: 12
Time taken: 1.1449713706970215
NOW PROCESSING: 13
Time taken: 0.9166774749755859
NOW PROCESSING: 14
Time taken: 1.0422406196594238
NOW PROCESSING: 15
Time taken: 0.9125089645385742
NOW PROCESSING: 16
Time taken: 1.2894401550292969
NOW PROCESSING: 17
Time taken: 1.2697927951812744
NOW PROCESSING: 18
Time taken: 0.7855188846588135
NOW PROCESSING: 19
Time taken: 1.2967920303344727
NOW PROCESS

In [None]:
with open("/kuacc/users/bozyurt20/hpc_run/Sherlock Holmes/Path Creation/Results/TwoSentence/results_case1_4tokens.txt", "wb") as f:
    pickle.dump(results_case1, f)

In [29]:
sum(correctly_guessed)/len(correctly_guessed)

0.0

In [9]:
with open("/kuacc/users/bozyurt20/hpc_run/Sherlock Holmes/Path Creation/Results/TwoSentence/results_case1_4tokens.txt", "rb") as f:
    #results_case1 = pickle.load(f)
    #contents = pickle.load(f) becomes...
    results_case1 = CPU_Unpickler(f).load()

In [8]:
import io

In [5]:
class CPU_Unpickler(pickle.Unpickler):
    def find_class(self, module, name):
        if module == 'torch.storage' and name == '_load_from_bytes':
            return lambda b: torch.load(io.BytesIO(b), map_location='cpu')
        else: return super().find_class(module, name)


In [10]:
len(results_case1)

625

In [None]:
# Preliminary Checks

correctly_guessed = []
all_generated = []
all_len = []
twice = 0
for result in results_case1:
    data_point = result.data_point
    prev_context = result.prev_context
    answer_1 = result.answer_1
    answer_2 = result.answer_2
    model_out_1 = result.model_out_1
    model_out_2 = result.model_out_2
    out_generated_1 = result.out_generated_1
    out_generated_2 = result.out_generated_2
    
    all_generated.append(out_generated_1)
    all_generated.append(out_generated_2)
    
    all_len.append(len(tokenizer.encode(out_generated_1)))
    all_len.append(len(tokenizer.encode(out_generated_2)))
    
    if answer_1 in out_generated_1 and answer_2 in out_generated_1:
        twice += 1
    if answer_1 in out_generated_2 and answer_2 in out_generated_2:
        twice += 1
    
    if answer_1 in out_generated_1 and answer_2 not in out_generated_1:
        correctly_guessed.append(1)
    else:
        correctly_guessed.append(0)

    if answer_2 in out_generated_2 and answer_1 not in out_generated_2:
        correctly_guessed.append(1)
    else:
        correctly_guessed.append(0)

print("Max len:", max(all_len))
print("Both entitites generated in one generation:", twice)

In [27]:
set(all_len)

{2, 3, 4, 5}

In [30]:
len(all_len)

1250

In [29]:
all_len.count(3)

388

In [32]:
# Preliminary Checks

correctly_guessed = []
all_generated = []
all_len = []
twice = 0
for result in results_case1:
    data_point = result.data_point
    prev_context = result.prev_context
    answer_1 = result.answer_1
    answer_2 = result.answer_2
    model_out_1 = result.model_out_1
    model_out_2 = result.model_out_2
    out_generated_1 = result.out_generated_1
    out_generated_2 = result.out_generated_2
    
    all_generated.append(out_generated_1)
    all_generated.append(out_generated_2)
    
    all_len.append(len(tokenizer.encode(out_generated_1)))
    all_len.append(len(tokenizer.encode(out_generated_2)))
    
    if len(tokenizer.encode(out_generated_1)) == 5:
        print(out_generated_1)
    
    if answer_1 in out_generated_1 and answer_2 in out_generated_1:
        twice += 1
    if answer_1 in out_generated_2 and answer_2 in out_generated_2:
        twice += 1
    
    if answer_1 in out_generated_1 and answer_2 not in out_generated_1:
        correctly_guessed.append(1)
    else:
        correctly_guessed.append(0)

    if answer_2 in out_generated_2 and answer_1 not in out_generated_2:
        correctly_guessed.append(1)
    else:
        correctly_guessed.append(0)

print("Max len:", max(all_len))
print("Both entitites generated in one generation:", twice)

in the Middle East
in the Middle East
in the Middle East
in the Middle East
in a bar
in a bar
in the Middle East
in a bar
in a bar
in a bar
in the Middle East
in a bar
in the Middle East
in a bar
in a bar
in the Middle East
in a bar
in the United States
in a bar
in the United States
in a bar
in a bar
in a bar
in a bar
in a bar
in a bar
in a relationship
in a bar
in a bar
in a bar
in a bar
in a bar
in a bar
in a bar
in a bar
in a bar
in a bar
in a bar
in a bar
in a bar
in a bar
in a relationship
in a relationship
in a bar
in a bar
in a bar
in a bar
in a bar
in a bar
in a relationship
in a bar
Max len: 5
Both entitites generated in one generation: 0


In [22]:
correctly_guessed = []
all_generated = []
all_len = []
twice = 0
for result in results_case1:
    data_point = result.data_point
    prev_context = result.prev_context
    answer_1 = result.answer_1
    answer_2 = result.answer_2
    model_out_1 = result.model_out_1
    model_out_2 = result.model_out_2
    out_generated_1 = result.out_generated_1
    out_generated_2 = result.out_generated_2
    
    all_generated.append(out_generated_1)
    all_generated.append(out_generated_2)
    
  
    """print(prev_context)
    print(out_generated_1)
    print(out_generated_2)
    """
    """print(prev_context)
    print(answer_1)
    print(answer_2)
    print(model_out_1)
    print(model_out_2)
    print(out_generated_1)
    print(out_generated_2)"""
    
    if answer_1 in out_generated_1 and answer_2 not in out_generated_1:
        correctly_guessed.append(1)
        #print("yeah")
    else:
        correctly_guessed.append(0)

    if answer_2 in out_generated_2 and answer_1 not in out_generated_2:
        correctly_guessed.append(1)
        #print("yeah")
    else:
        correctly_guessed.append(0)
    #print()
    

In [24]:
model_out_2

GreedySearchEncoderDecoderOutput(sequences=tensor([[   0,   16, 1473,    1]]), scores=(tensor([[-34.8750,  -5.8828, -10.8750,  ..., -33.5625, -34.6250, -34.2500]],
       dtype=torch.float16), tensor([[-35.0312,  -5.4609,  -7.9062,  ..., -34.7812, -34.6875, -34.6250]],
       dtype=torch.float16), tensor([[-57.9062,  -3.4023, -14.8594,  ..., -57.0625, -57.0938, -57.1562]],
       dtype=torch.float16)), encoder_attentions=None, encoder_hidden_states=None, decoder_attentions=None, cross_attentions=None, decoder_hidden_states=None)

In [26]:
len(model_out_2.scores)

3

In [20]:
max(all_len)

5

In [15]:
all_generated

['Sydney',
 'Sydney',
 'London',
 'London',
 'London',
 'London',
 'London',
 'London',
 'London',
 'London',
 'Sydney',
 'Paris',
 'Paris',
 'Paris',
 'Paris',
 'Paris',
 'Paris',
 'Paris',
 'Paris',
 'Paris',
 'Sydney',
 'in Australia',
 'in Oslo',
 'in Oslo',
 'in Oslo',
 'in the hospital',
 'Rome',
 'in Rome',
 'in Oslo',
 'in Oslo',
 'Sydney',
 'in Australia',
 'in the middle',
 'in the middle',
 'in the hospital',
 'in the hospital',
 'Rome',
 'Rome',
 'in the hospital',
 'in the hospital',
 'Sydney',
 'in Australia',
 'in Cairo',
 'in Cairo',
 'in Korea',
 'in Korea',
 'in Beijing',
 'in Beijing',
 'in China',
 'in China',
 'Sydney',
 'Sydney',
 'London',
 'London',
 'London',
 'in the UK',
 'Rome',
 'Rome',
 'London',
 'in the UK',
 'Paris',
 'Sydney',
 'Paris',
 'Paris',
 'Paris',
 'Paris',
 'Paris',
 'David is in Rome',
 'Paris',
 'Paris',
 'Sydney',
 'Sydney',
 'in Oslo',
 'in Cairo',
 'in Oslo',
 'in the hospital',
 'Rome',
 'Rome',
 'in Oslo',
 'in Oslo',
 'Sydney',
 'Sydn

In [13]:
sum(correctly_guessed)/len(correctly_guessed)

0.3648

In [18]:
tokenizer.decode([   0,   16,    3,    9, 1207,    1])

'<pad> in a bar</s>'

# Move: London Sydney

## Case 1 way of moving previous tokens

In [None]:
correctly_guessed = []

entity_inds = list(range(2))

ids_to_move = [ 4, 10 ]

results_case1_loctokens = []

for num_data_point, data_point in enumerate(data_points_two_sentences):
    
    print("NOW PROCESSING:", num_data_point)
    start = time.time()
    prev_context = data_point.prev_context
    name_1 = data_point.name_1
    name_2 = data_point.name_2
    answer_1 = data_point.city_1
    answer_2 = data_point.city_2
    current_context_1 = data_point.current_context_1
    current_context_2 = data_point.current_context_2
    prev_encoding = data_point.prev_encoding
    
    special_hidden = prev_encoding.special_hidden_states # 24 x (1, T, d)
    len_input_ids = special_hidden[0].shape[1]

    special_reformatted = torch.zeros(num_layers, len_input_ids, d_model) # (24, T, d)
    for i, hidden in enumerate(special_hidden):
        special_reformatted[i:i+1, :, :] = hidden

    entities_hidden_states = special_reformatted[:, ids_to_move[0], :].unsqueeze(0)

    for tok_id in ids_to_move[1:]:
        entity_hidden_states = special_reformatted[:, tok_id, :].unsqueeze(0)
        entities_hidden_states = torch.cat((entities_hidden_states,
                                            entity_hidden_states), dim=0)
        
    prompt_1 = answer_1 + " " + answer_2 + " " + current_context_1
    input_ids = tokenizer.encode(prompt_1, return_tensors="pt").to(model.encoder.device)
    model_out_1 = model.generate(input_ids=input_ids, 
                                 max_new_tokens=10,
                                 entity_hidden_states=entities_hidden_states,
                                 entity_inds=entity_inds,
                                 return_dict_in_generate=True, 
                                 output_scores=True)
    
    prompt_2 = answer_1 + " " + answer_2 + " " + current_context_2
    input_ids = tokenizer.encode(prompt_2, return_tensors="pt").to(model.encoder.device)
    model_out_2 = model.generate(input_ids=input_ids, 
                                 max_new_tokens=10,
                                 entity_hidden_states=entities_hidden_states,
                                 entity_inds=entity_inds,
                                 return_dict_in_generate=True, 
                                 output_scores=True) 
    
    
    out_generated_1 = tokenizer.decode(model_out_1.sequences[0], skip_special_tokens=True)
    out_generated_2 = tokenizer.decode(model_out_2.sequences[0], skip_special_tokens=True)

    results_case1_loctokens.append(TwoSentenceMoveResult(data_point, prev_context, answer_1, answer_2, model_out_1, model_out_2, out_generated_1, out_generated_2))
        
    print("Time taken:", time.time()-start)
        

In [None]:
with open("/kuacc/users/bozyurt20/hpc_run/Sherlock Holmes/Path Creation/Results/TwoSentence/results_case1_loctokens.txt", "wb") as f:
    pickle.dump(results_case1_loctokens, f)

In [None]:
with open("/kuacc/users/bozyurt20/hpc_run/Sherlock Holmes/Path Creation/Results/TwoSentence/results_case1_loctokens.txt", "rb") as f:
    results_case1_loctokens = pickle.load(f)

# Move: John Mary

## Case 1 way of moving previous tokens

In [None]:
correctly_guessed = []

entity_inds = list(range(2))

ids_to_move = [ 0, 6 ]

results_case1_chartokens = []

for num_data_point, data_point in enumerate(data_points_two_sentences):
    
    print("NOW PROCESSING:", num_data_point)
    start = time.time()
    prev_context = data_point.prev_context
    name_1 = data_point.name_1
    name_2 = data_point.name_2
    answer_1 = data_point.city_1
    answer_2 = data_point.city_2
    current_context_1 = data_point.current_context_1
    current_context_2 = data_point.current_context_2
    prev_encoding = data_point.prev_encoding
    
    special_hidden = prev_encoding.special_hidden_states # 24 x (1, T, d)
    len_input_ids = special_hidden[0].shape[1]

    special_reformatted = torch.zeros(num_layers, len_input_ids, d_model) # (24, T, d)
    for i, hidden in enumerate(special_hidden):
        special_reformatted[i:i+1, :, :] = hidden

    entities_hidden_states = special_reformatted[:, ids_to_move[0], :].unsqueeze(0)

    for tok_id in ids_to_move[1:]:
        entity_hidden_states = special_reformatted[:, tok_id, :].unsqueeze(0)
        entities_hidden_states = torch.cat((entities_hidden_states,
                                            entity_hidden_states), dim=0)
        
    prompt_1 = answer_1 + " " + answer_2 + " " + current_context_1
    input_ids = tokenizer.encode(prompt_1, return_tensors="pt").to(model.encoder.device)
    model_out_1 = model.generate(input_ids=input_ids, 
                                 max_new_tokens=10,
                                 entity_hidden_states=entities_hidden_states,
                                 entity_inds=entity_inds,
                                 return_dict_in_generate=True, 
                                 output_scores=True)
    
    prompt_2 = answer_1 + " " + answer_2 + " " + current_context_2
    input_ids = tokenizer.encode(prompt_2, return_tensors="pt").to(model.encoder.device)
    model_out_2 = model.generate(input_ids=input_ids, 
                                 max_new_tokens=10,
                                 entity_hidden_states=entities_hidden_states,
                                 entity_inds=entity_inds,
                                 return_dict_in_generate=True, 
                                 output_scores=True) 
    
    
    out_generated_1 = tokenizer.decode(model_out_1.sequences[0], skip_special_tokens=True)
    out_generated_2 = tokenizer.decode(model_out_2.sequences[0], skip_special_tokens=True)
        
    results_case1_chartokens.append(TwoSentenceMoveResult(data_point, prev_context, answer_1, answer_2, model_out_1, model_out_2, out_generated_1, out_generated_2))
        
    print("Time taken:", time.time()-start)
        

In [None]:
with open("/kuacc/users/bozyurt20/hpc_run/Sherlock Holmes/Path Creation/Results/TwoSentence/results_case1_chartokens.txt", "wb") as f:
    pickle.dump(results_case1_chartokens, f)

In [None]:
with open("/kuacc/users/bozyurt20/hpc_run/Sherlock Holmes/Path Creation/Results/TwoSentence/results_case1_chartokens.txt", "rb") as f:
    results_case1_chartokens = pickle.load(f)

## Case 2 way of moving prev tokens

# Old

# No Entities Moved

In [36]:
correctly_guessed_zero = []

for num_data_point, data_point in enumerate(data_points_one_sentence):
    
    current_context = data_point.current_context
    answer = data_point.city_1
    current_context_ids = tokenizer.encode(current_context, return_tensors="pt").cuda()

    out = model.generate(
            input_ids=current_context_ids,
            max_new_tokens=10,
            #return_dict_in_generate=True,
            #output_scores=True
        ) 

    out_generated = tokenizer.decode(out[0], skip_special_tokens=True)

    if answer in out_generated:
        correctly_guessed_zero.append(1)
    else:
        correctly_guessed_zero.append(0)
        

In [37]:
correctly_guessed_zero

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0]

# One Entity to Move

In [7]:
correctly_guessed = defaultdict(list)

for num_data_point, data_point in enumerate(data_points_two_sentences):
    
    print("NOW PROCESSING:", num_data_point)
    start = time.time()
    prev_context = data_point.prev_context
    answer_1 = data_point.city_1
    answer_2 = data_point.city_2
    entity_embeddings = data_point.prev_encoding
    curr_embeddings_1 = data_point.curr_encoding_1
    curr_embeddings_2 = data_point.curr_encoding_2
    
    num_prev_tokens = entity_embeddings[0].shape[1]
    
    for i in range(num_prev_tokens):
        
        curr_embeddings_1_copy = copy.deepcopy(curr_embeddings_1)
        curr_embeddings_1_copy.last_hidden_state = torch.cat((entity_embeddings[0][:,i:i+1,:], curr_embeddings_1[0]), dim=1)
        model_out = model.generate(encoder_outputs=curr_embeddings_1_copy, max_new_tokens=10)
        
        out_generated = tokenizer.decode(model_out[0], skip_special_tokens=True)
        
        if answer_1 in out_generated:
            correctly_guessed[i].append(1)
        else:
            correctly_guessed[i].append(0)
    
    for i in range(num_prev_tokens):
        
        curr_embeddings_2_copy = copy.deepcopy(curr_embeddings_2)
        curr_embeddings_2_copy.last_hidden_state = torch.cat((entity_embeddings[0][:,i:i+1,:], curr_embeddings_2[0]), dim=1)
        model_out = model.generate(encoder_outputs=curr_embeddings_2_copy, max_new_tokens=10)
        
        out_generated = tokenizer.decode(model_out[0], skip_special_tokens=True)
        
        if answer_2 in out_generated:
            correctly_guessed[i].append(1)
        else:
            correctly_guessed[i].append(0)
        
    print("Time taken:", time.time()-start)
        

NOW PROCESSING: 0
Time taken: 14.349588394165039
NOW PROCESSING: 1
Time taken: 12.969437599182129
NOW PROCESSING: 2
Time taken: 12.846654415130615
NOW PROCESSING: 3
Time taken: 12.607491493225098
NOW PROCESSING: 4
Time taken: 12.971042394638062
NOW PROCESSING: 5
Time taken: 12.595314741134644
NOW PROCESSING: 6
Time taken: 12.908752918243408
NOW PROCESSING: 7
Time taken: 12.991603136062622
NOW PROCESSING: 8
Time taken: 12.600220441818237
NOW PROCESSING: 9
Time taken: 12.966579675674438
NOW PROCESSING: 10
Time taken: 12.585872650146484
NOW PROCESSING: 11
Time taken: 12.967057466506958
NOW PROCESSING: 12
Time taken: 13.085659742355347
NOW PROCESSING: 13
Time taken: 12.71540880203247
NOW PROCESSING: 14
Time taken: 13.223845481872559
NOW PROCESSING: 15
Time taken: 13.08280897140503
NOW PROCESSING: 16
Time taken: 13.331194639205933
NOW PROCESSING: 17
Time taken: 13.458587169647217
NOW PROCESSING: 18
Time taken: 13.086308002471924
NOW PROCESSING: 19
Time taken: 13.725442171096802
NOW PROCESSI

KeyboardInterrupt: 

In [17]:
for i in correctly_guessed:
    print(i)
    print(sum(correctly_guessed[i])/len(correctly_guessed[i]))

0
0.0
1
0.0
2
0.01
3
0.01
4
0.64
5
0.63
6
0.63


In [20]:
with open("/kuacc/users/bozyurt20/hpc_run/Sherlock Holmes/Path Creation/Results/TwoSentence/one_entity_moved.txt", "wb") as f:
    pickle.dump(correctly_guessed, f)

# Two Entities to Move

In [21]:
correctly_guessed_two = {}
my_context = []

for i in range(num_prev_tokens):
    correctly_guessed_two[i] = defaultdict(list)

for data_point in data_points_one_sentence:
    start = time.time()
    prev_context = data_point.prev_context
    answer_1 = data_point.city_1
    answer_2 = data_point.city_2
    entity_embeddings = data_point.prev_encoding
    curr_embeddings_1 = data_point.curr_encoding_1
    curr_embeddings_2 = data_point.curr_encoding_2
    
    num_prev_tokens = entity_embeddings[0].shape[1]
    
    for i in range(num_prev_tokens):
        
        for j in range(i+1, num_prev_tokens):
            
            curr_embeddings_1_copy = copy.deepcopy(curr_embeddings_1)
            
            curr_embeddings_1_copy.last_hidden_state = torch.cat((entity_embeddings[0][:,i:i+1,:], 
                                                           entity_embeddings[0][:,j:j+1,:], 
                                                           curr_embeddings_1[0]), dim=1)
            model_out = model.generate(encoder_outputs=curr_embeddings_1_copy, max_new_tokens=10)

            out_generated = tokenizer.decode(model_out[0], skip_special_tokens=True)

            if answer_1 in out_generated:
                correctly_guessed_two[i][j].append(1)
            else:
                correctly_guessed_two[i][j].append(0)
    
    for i in range(num_prev_tokens):
        
        for j in range(i+1, num_prev_tokens):
            
            curr_embeddings_2_copy = copy.deepcopy(curr_embeddings_2)
            
            curr_embeddings_2_copy.last_hidden_state = torch.cat((entity_embeddings[0][:,i:i+1,:], 
                                                           entity_embeddings[0][:,j:j+1,:], 
                                                           curr_embeddings_2[0]), dim=1)
            
            model_out = model.generate(encoder_outputs=curr_embeddings_2_copy, max_new_tokens=10)

            out_generated = tokenizer.decode(model_out[0], skip_special_tokens=True)

            if answer_2 in out_generated:
                correctly_guessed_two[i][j].append(1)
            else:
                correctly_guessed_two[i][j].append(0)

    print("Time taken:", time.time()-start)


Time taken: 15.689270973205566
Time taken: 7.877184867858887
Time taken: 16.310548067092896
Time taken: 16.628948211669922
Time taken: 15.095526218414307
Time taken: 5.777379512786865
Time taken: 5.787660598754883
Time taken: 7.307344198226929
Time taken: 5.748011350631714
Time taken: 16.38280463218689
Time taken: 10.343215703964233
Time taken: 8.428033590316772
Time taken: 13.969268083572388
Time taken: 12.647607803344727
Time taken: 12.805526494979858
Time taken: 5.7447285652160645
Time taken: 11.885571241378784
Time taken: 14.812684297561646
Time taken: 6.097102880477905
Time taken: 12.940143346786499
Time taken: 14.110896110534668
Time taken: 7.005489110946655
Time taken: 13.982955932617188
Time taken: 13.963043212890625
Time taken: 15.390105247497559
Time taken: 5.695706844329834
Time taken: 8.2945876121521
Time taken: 9.831464529037476
Time taken: 5.704683542251587
Time taken: 16.372822284698486
Time taken: 12.454452753067017
Time taken: 5.689663410186768
Time taken: 11.912539958

In [34]:
for cont in my_context:
    print(tokenizer.decode(cont[0]))

John  John met Lucas. Lucas was 30 years old. Where is John?</s>
Johntravelled John met Lucas. Lucas was 30 years old. Where is John?</s>
John to John met Lucas. Lucas was 30 years old. Where is John?</s>
John London John met Lucas. Lucas was 30 years old. Where is John?</s>
John. John met Lucas. Lucas was 30 years old. Where is John?</s>
John</s> John met Lucas. Lucas was 30 years old. Where is John?</s>
travelled John met Lucas. Lucas was 30 years old. Where is John?</s>
 to John met Lucas. Lucas was 30 years old. Where is John?</s>
 London John met Lucas. Lucas was 30 years old. Where is John?</s>
. John met Lucas. Lucas was 30 years old. Where is John?</s>
</s> John met Lucas. Lucas was 30 years old. Where is John?</s>
travelled to John met Lucas. Lucas was 30 years old. Where is John?</s>
travelled London John met Lucas. Lucas was 30 years old. Where is John?</s>
travelled. John met Lucas. Lucas was 30 years old. Where is John?</s>
travelled</s> John met Lucas. Lucas was 30 years 

In [22]:
for i in range(7):
    for j in range(i+1, 7):
        print(i, j)
        print(sum(correctly_guessed_two[i][j]))
        print(len(correctly_guessed_two[i][j]))
        print(sum(correctly_guessed_two[i][j])/len(correctly_guessed_two[i][j]))
        print()

0 1
58
100
0.58

0 2
60
100
0.6

0 3
59
100
0.59

0 4
71
100
0.71

0 5
68
100
0.68

0 6
64
100
0.64

1 2
69
100
0.69

1 3
71
100
0.71

1 4
75
100
0.75

1 5
72
100
0.72

1 6
74
100
0.74

2 3
80
100
0.8

2 4
81
100
0.81

2 5
82
100
0.82

2 6
81
100
0.81

3 4
84
100
0.84

3 5
86
100
0.86

3 6
87
100
0.87

4 5
89
100
0.89

4 6
90
100
0.9

5 6
88
100
0.88



In [24]:
for i in range(7):
    #for j in range(i+1, 7):
    print(len(correctly_guessed_two[i]))

6
5
4
3
2
1
0


In [23]:
with open("/kuacc/users/bozyurt20/hpc_run/Sherlock Holmes/Path Creation/Results/TwoSentence/two_entities_moved.txt", "wb") as f:
    pickle.dump(correctly_guessed_two, f)

# Three Entities to Move

In [None]:
correctly_guessed_three = {}
for i in range(num_prev_tokens):

    correctly_guessed_three[i] = {}

    for j in range(i+1, num_prev_tokens):

        correctly_guessed_three[i][j] = defaultdict(list)

for data_point in data_points_one_sentence:
    prev_context = data_point.prev_context
    answer_1 = data_point.city_1
    answer_2 = data_point.city_2
    entity_embeddings = data_point.prev_encoding
    curr_embeddings_1 = data_point.curr_encoding_1
    curr_embeddings_2 = data_point.curr_encoding_2
    
    num_prev_tokens = entity_embeddings[0].shape[1]
    
    for i in range(num_prev_tokens):
        
        for j in range(i+1, num_prev_tokens):
            
            for k in range(j+1, num_prev_tokens):
                
                curr_embeddings_1_copy = copy.deepcopy(curr_embeddings_1)
            
                curr_embeddings_1_copy.last_hidden_state = torch.cat((entity_embeddings[0][:,i:i+1,:], 
                                                               entity_embeddings[0][:,j:j+1,:], 
                                                               entity_embeddings[0][:,k:k+1,:], 
                                                               curr_embeddings_1[0]), dim=1)
                model_out = model.generate(encoder_outputs=curr_embeddings_1_copy, max_new_tokens=10)

                out_generated = tokenizer.decode(model_out[0], skip_special_tokens=True)
                
                if answer_1 in out_generated:
                    correctly_guessed_three[i][j][k].append(1)
                else:
                    correctly_guessed_three[i][j][k].append(0)
    
    for i in range(num_prev_tokens):
        
        for j in range(i+1, num_prev_tokens):
            
            for k in range(j+1, num_prev_tokens):
                
                curr_embeddings_2_copy = copy.deepcopy(curr_embeddings_2)
            
                curr_embeddings_2_copy.last_hidden_state = torch.cat((entity_embeddings[0][:,i:i+1,:], 
                                                               entity_embeddings[0][:,j:j+1,:], 
                                                               entity_embeddings[0][:,k:k+1,:], 
                                                               curr_embeddings_2[0]), dim=1)
                model_out = model.generate(encoder_outputs=curr_embeddings_2_copy, max_new_tokens=10)

                out_generated = tokenizer.decode(model_out[0], skip_special_tokens=True)
                
                if answer_2 in out_generated:
                    correctly_guessed_three[i][j][k].append(1)
                else:
                    correctly_guessed_three[i][j][k].append(0)


In [None]:
with open("/kuacc/users/bozyurt20/hpc_run/Sherlock Holmes/Path Creation/Results/TwoSentence/three_entities_moved.txt", "wb") as f:
    pickle.dump(correctly_guessed_three, f)

# Four Entities to Move

In [None]:
correctly_guessed_four = {}
for i in range(num_prev_tokens):

    correctly_guessed_four[i] = {}

    for j in range(i+1, num_prev_tokens):

        correctly_guessed_four[i][j] = {}

        for k in range(j+1, num_prev_tokens):

            correctly_guessed_four[i][j][k] = defaultdict(list)

for data_point in data_points_one_sentence:
    prev_context = data_point.prev_context
    answer_1 = data_point.city_1
    answer_2 = data_point.city_2
    entity_embeddings = data_point.prev_encoding
    curr_embeddings_1 = data_point.curr_encoding_1
    curr_embeddings_2 = data_point.curr_encoding_2
    
    num_prev_tokens = entity_embeddings[0].shape[1]
    
    for i in range(num_prev_tokens):
        
        for j in range(i+1, num_prev_tokens):
            
            for k in range(j+1, num_prev_tokens):
                
                for l in range(k+1, num_prev_tokens):
                    
                    curr_embeddings_1_copy = copy.deepcopy(curr_embeddings_1)
            
                    curr_embeddings_1_copy.last_hidden_state = torch.cat((entity_embeddings[0][:,i:i+1,:], 
                                                                   entity_embeddings[0][:,j:j+1,:], 
                                                                   entity_embeddings[0][:,k:k+1,:], 
                                                                   entity_embeddings[0][:,l:l+1,:], 
                                                                   curr_embeddings_1[0]), dim=1)
                
                    model_out = model.generate(encoder_outputs=curr_embeddings_1_copy, max_new_tokens=10)

                    out_generated = tokenizer.decode(model_out[0], skip_special_tokens=True)

                    if answer_1 in out_generated:
                        correctly_guessed_four[i][j][k][l].append(1)
                    else:
                        correctly_guessed_four[i][j][k][l].append(0)
                        
    for i in range(num_prev_tokens):
        
        for j in range(i+1, num_prev_tokens):
            
            for k in range(j+1, num_prev_tokens):
                
                for l in range(k+1, num_prev_tokens):
                    
                    curr_embeddings_2_copy = copy.deepcopy(curr_embeddings_2)
            
                    curr_embeddings_2_copy.last_hidden_state = torch.cat((entity_embeddings[0][:,i:i+1,:], 
                                                                   entity_embeddings[0][:,j:j+1,:], 
                                                                   entity_embeddings[0][:,k:k+1,:], 
                                                                   entity_embeddings[0][:,l:l+1,:], 
                                                                   curr_embeddings_2[0]), dim=1)
                
                    model_out = model.generate(encoder_outputs=curr_embeddings_2_copy, max_new_tokens=10)

                    out_generated = tokenizer.decode(model_out[0], skip_special_tokens=True)

                    if answer_2 in out_generated:
                        correctly_guessed_four[i][j][k][l].append(1)
                    else:
                        correctly_guessed_four[i][j][k][l].append(0)



In [None]:
with open("/kuacc/users/bozyurt20/hpc_run/Sherlock Holmes/Path Creation/Results/TwoSentence/four_entities_moved.txt", "wb") as f:
    pickle.dump(correctly_guessed_four, f)

# Five Entities to Move

In [None]:
correctly_guessed_five = {}

for i in range(num_prev_tokens):

    correctly_guessed_five[i] = {}

    for j in range(i+1, num_prev_tokens):

        correctly_guessed_five[i][j] = {}

        for k in range(j+1, num_prev_tokens):

            correctly_guessed_five[i][j][k] = {}

            for l in range(k+1, num_prev_tokens):

                correctly_guessed_five[i][j][k][l] = defaultdict(list)

for data_point in data_points_one_sentence:
    prev_context = data_point.prev_context
    answer_1 = data_point.city_1
    answer_2 = data_point.city_2
    entity_embeddings = data_point.prev_encoding
    curr_embeddings_1 = data_point.curr_encoding_1
    curr_embeddings_2 = data_point.curr_encoding_2
    
    num_prev_tokens = entity_embeddings[0].shape[1]
    
    for i in range(num_prev_tokens):
        
        for j in range(i+1, num_prev_tokens):
            
            for k in range(j+1, num_prev_tokens):
                
                for l in range(k+1, num_prev_tokens):
                    
                    for m in range(l+1, num_prev_tokens):
                        
                        curr_embeddings_1_copy = copy.deepcopy(curr_embeddings_1)
                        
                        curr_embeddings_1_copy.last_hidden_state = torch.cat((entity_embeddings[0][:,i:i+1,:], 
                                                                       entity_embeddings[0][:,j:j+1,:], 
                                                                       entity_embeddings[0][:,k:k+1,:], 
                                                                       entity_embeddings[0][:,l:l+1,:], 
                                                                       entity_embeddings[0][:,m:m+1,:], 
                                                                       curr_embeddings_1[0]), dim=1)
                
                        model_out = model.generate(encoder_outputs=curr_embeddings_1_copy, max_new_tokens=10)

                        out_generated = tokenizer.decode(model_out[0], skip_special_tokens=True)

                        if answer_1 in out_generated:
                            correctly_guessed_five[i][j][k][l][m].append(1)
                        else:
                            correctly_guessed_five[i][j][k][l][m].append(0)
        
    for i in range(num_prev_tokens):
        
        for j in range(i+1, num_prev_tokens):
            
            for k in range(j+1, num_prev_tokens):
                
                for l in range(k+1, num_prev_tokens):
                    
                    for m in range(l+1, num_prev_tokens):
                        
                        curr_embeddings_2_copy = copy.deepcopy(curr_embeddings_2)
                        
                        curr_embeddings_2_copy.last_hidden_state = torch.cat((entity_embeddings[0][:,i:i+1,:], 
                                                                       entity_embeddings[0][:,j:j+1,:], 
                                                                       entity_embeddings[0][:,k:k+1,:], 
                                                                       entity_embeddings[0][:,l:l+1,:], 
                                                                       entity_embeddings[0][:,m:m+1,:], 
                                                                       curr_embeddings_2[0]), dim=1)
                
                        model_out = model.generate(encoder_outputs=curr_embeddings_2_copy, max_new_tokens=10)

                        out_generated = tokenizer.decode(model_out[0], skip_special_tokens=True)

                        if answer_2 in out_generated:
                            correctly_guessed_five[i][j][k][l][m].append(1)
                        else:
                            correctly_guessed_five[i][j][k][l][m].append(0)


In [None]:
with open("/kuacc/users/bozyurt20/hpc_run/Sherlock Holmes/Path Creation/Results/TwoSentence/five_entities_moved.txt", "wb") as f:
    pickle.dump(correctly_guessed_five, f)

# Six Entities to Move

In [None]:
correctly_guessed_six = {}
for i in range(num_prev_tokens):
        
    correctly_guessed_six[i] = {}

    for j in range(i+1, num_prev_tokens):

        correctly_guessed_six[i][j] = {}

        for k in range(j+1, num_prev_tokens):

            correctly_guessed_six[i][j][k] = {}

            for l in range(k+1, num_prev_tokens):

                correctly_guessed_six[i][j][k][l] = {}

                for m in range(l+1, num_prev_tokens):

                    correctly_guessed_six[i][j][k][l][m] = defaultdict(list)

for data_point in data_points_one_sentence:
    prev_context = data_point.prev_context
    answer_1 = data_point.city_1
    answer_2 = data_point.city_2
    entity_embeddings = data_point.prev_encoding
    curr_embeddings_1 = data_point.curr_encoding_1
    curr_embeddings_2 = data_point.curr_encoding_2
    
    num_prev_tokens = entity_embeddings[0].shape[1]
    
    for i in range(num_prev_tokens):
        
        for j in range(i+1, num_prev_tokens):
            
            for k in range(j+1, num_prev_tokens):
                
                for l in range(k+1, num_prev_tokens):
                    
                    for m in range(l+1, num_prev_tokens):
                        
                        for n in range(m+1, num_prev_tokens):
                            
                            curr_embeddings_1_copy = copy.deepcopy(curr_embeddings_1)

                            curr_embeddings_1_copy.last_hidden_state = torch.cat((entity_embeddings[0][:,i:i+1,:], 
                                                                           entity_embeddings[0][:,j:j+1,:], 
                                                                           entity_embeddings[0][:,k:k+1,:], 
                                                                           entity_embeddings[0][:,l:l+1,:], 
                                                                           entity_embeddings[0][:,m:m+1,:], 
                                                                           entity_embeddings[0][:,n:n+1,:], 
                                                                           curr_embeddings_1[0]), dim=1)
                
                            model_out = model.generate(encoder_outputs=curr_embeddings_1_copy, max_new_tokens=10)

                            out_generated = tokenizer.decode(model_out[0], skip_special_tokens=True)

                            if answer_1 in out_generated:
                                correctly_guessed_six[i][j][k][l][m][n].append(1)
                            else:
                                correctly_guessed_six[i][j][k][l][m][n].append(0)
    
    for i in range(num_prev_tokens):
        
        for j in range(i+1, num_prev_tokens):
            
            for k in range(j+1, num_prev_tokens):
                
                for l in range(k+1, num_prev_tokens):
                    
                    for m in range(l+1, num_prev_tokens):
                        
                        for n in range(m+1, num_prev_tokens):
                            
                            curr_embeddings_2_copy = copy.deepcopy(curr_embeddings_2)

                            curr_embeddings_2_copy.last_hidden_state = torch.cat((entity_embeddings[0][:,i:i+1,:], 
                                                                           entity_embeddings[0][:,j:j+1,:], 
                                                                           entity_embeddings[0][:,k:k+1,:], 
                                                                           entity_embeddings[0][:,l:l+1,:], 
                                                                           entity_embeddings[0][:,m:m+1,:], 
                                                                           entity_embeddings[0][:,n:n+1,:], 
                                                                           curr_embeddings_2[0]), dim=1)
                
                            model_out = model.generate(encoder_outputs=curr_embeddings_2_copy, max_new_tokens=10)

                            out_generated = tokenizer.decode(model_out[0], skip_special_tokens=True)

                            if answer_2 in out_generated:
                                correctly_guessed_six[i][j][k][l][m][n].append(1)
                            else:
                                correctly_guessed_six[i][j][k][l][m][n].append(0)

In [None]:
with open("/kuacc/users/bozyurt20/hpc_run/Sherlock Holmes/Path Creation/Results/TwoSentence/six_entities_moved.txt", "wb") as f:
    pickle.dump(correctly_guessed_six, f)

# Seven Entities to Move

In [None]:

correctly_guessed_seven = []

for data_point in data_points_one_sentence:
    prev_context = data_point.prev_context
    answer_1 = data_point.city_1
    answer_2 = data_point.city_2
    entity_embeddings = data_point.prev_encoding
    curr_embeddings_1 = data_point.curr_encoding_1
    curr_embeddings_2 = data_point.curr_encoding_2
    
    num_prev_tokens = entity_embeddings[0].shape[1]
    
    curr_embeddings_1.last_hidden_state = torch.cat((entity_embeddings[0], 
                                                   curr_embeddings_1[0]), dim=1)

    model_out = model.generate(encoder_outputs=curr_embeddings_1, max_new_tokens=10)

    out_generated = tokenizer.decode(model_out[0], skip_special_tokens=True)

    if answer_1 in out_generated:
        correctly_guessed_seven.append(1)
    else:
        correctly_guessed_seven.append(0)
        
    curr_embeddings_2.last_hidden_state = torch.cat((entity_embeddings[0], 
                                                   curr_embeddings_2[0]), dim=1)

    model_out = model.generate(encoder_outputs=curr_embeddings_2, max_new_tokens=10)

    out_generated = tokenizer.decode(model_out[0], skip_special_tokens=True)

    if answer_2 in out_generated:
        correctly_guessed_seven.append(1)
    else:
        correctly_guessed_seven.append(0)

        
        

In [None]:
with open("/kuacc/users/bozyurt20/hpc_run/Sherlock Holmes/Path Creation/Results/TwoSentence/seven_entities_moved.txt", "wb") as f:
    pickle.dump(correctly_guessed_seven, f)

# All Results

In [46]:
lookup_tokens = {
    0: "John",
    1: "_",
    2: "travelled",
    3: "to",
    4: "London",
    5: ".",
    6: "</s>"
}

In [54]:
all_results = []

for i in range(7):
    my_list = correctly_guessed[i]
    avg_list = sum(my_list)/len(my_list)
    all_results.append(([lookup_tokens[i]], avg_list))

all_results.sort(key=lambda x: x[1])
all_results

[(['John'], 0.0),
 (['_'], 0.0),
 (['travelled'], 0.01),
 (['to'], 0.01),
 (['.'], 0.63),
 (['</s>'], 0.63),
 (['London'], 0.64)]

In [47]:
all_results = []

for i in range(7):
    for j in range(i+1, 7):
        my_list = correctly_guessed_two[i][j]
        avg_list = sum(my_list)/len(my_list)
        all_results.append(([lookup_tokens[i], lookup_tokens[j]], avg_list))

all_results.sort(key=lambda x: x[1])
all_results

[(['John', '_'], 0.58),
 (['John', 'to'], 0.59),
 (['John', 'travelled'], 0.6),
 (['John', '</s>'], 0.64),
 (['John', '.'], 0.68),
 (['_', 'travelled'], 0.69),
 (['John', 'London'], 0.71),
 (['_', 'to'], 0.71),
 (['_', '.'], 0.72),
 (['_', '</s>'], 0.74),
 (['_', 'London'], 0.75),
 (['travelled', 'to'], 0.8),
 (['travelled', 'London'], 0.81),
 (['travelled', '</s>'], 0.81),
 (['travelled', '.'], 0.82),
 (['to', 'London'], 0.84),
 (['to', '.'], 0.86),
 (['to', '</s>'], 0.87),
 (['.', '</s>'], 0.88),
 (['London', '.'], 0.89),
 (['London', '</s>'], 0.9)]

In [48]:
all_results = []

for i in range(7):
    for j in range(i+1, 7):
        for k in range(j+1, 7):
            my_list = correctly_guessed_three[i][j][k]
            avg_list = sum(my_list)/len(my_list)
            all_results.append(([lookup_tokens[i], lookup_tokens[j], lookup_tokens[k]], avg_list))

all_results.sort(key=lambda x: x[1])
all_results

[(['John', '_', '</s>'], 0.85),
 (['John', '_', 'travelled'], 0.86),
 (['John', 'travelled', 'to'], 0.86),
 (['John', '_', 'London'], 0.87),
 (['John', '_', '.'], 0.87),
 (['John', '_', 'to'], 0.89),
 (['John', 'travelled', 'London'], 0.89),
 (['John', 'travelled', '.'], 0.89),
 (['John', 'travelled', '</s>'], 0.92),
 (['John', 'to', 'London'], 0.92),
 (['John', 'to', '.'], 0.92),
 (['travelled', 'London', '.'], 0.92),
 (['_', 'travelled', '.'], 0.93),
 (['_', 'travelled', '</s>'], 0.93),
 (['_', 'London', '.'], 0.93),
 (['_', 'London', '</s>'], 0.93),
 (['travelled', 'to', 'London'], 0.93),
 (['travelled', 'to', '</s>'], 0.93),
 (['travelled', '.', '</s>'], 0.93),
 (['to', '.', '</s>'], 0.93),
 (['London', '.', '</s>'], 0.93),
 (['John', 'to', '</s>'], 0.94),
 (['John', 'London', '.'], 0.94),
 (['John', 'London', '</s>'], 0.94),
 (['John', '.', '</s>'], 0.94),
 (['_', 'travelled', 'to'], 0.94),
 (['_', 'travelled', 'London'], 0.94),
 (['_', 'to', 'London'], 0.94),
 (['_', 'to', '</s>'

In [49]:
all_results = []

for i in range(7):
    for j in range(i+1, 7):
        for k in range(j+1, 7):
            for l in range(k+1, 7):
                my_list = correctly_guessed_four[i][j][k][l]
                avg_list = sum(my_list)/len(my_list)
                all_results.append(([lookup_tokens[i], lookup_tokens[j], lookup_tokens[k], lookup_tokens[l]], avg_list))

all_results.sort(key=lambda x: x[1])
all_results

[(['John', '_', 'to', '.'], 0.93),
 (['John', '_', 'travelled', 'to'], 0.94),
 (['John', '_', 'travelled', 'London'], 0.94),
 (['John', '_', 'travelled', '</s>'], 0.94),
 (['John', 'travelled', 'to', 'London'], 0.94),
 (['John', 'travelled', 'to', '.'], 0.94),
 (['John', '_', 'travelled', '.'], 0.95),
 (['John', '_', 'London', '.'], 0.95),
 (['John', '_', 'London', '</s>'], 0.95),
 (['John', 'travelled', 'to', '</s>'], 0.96),
 (['John', 'travelled', 'London', '.'], 0.96),
 (['John', 'travelled', '.', '</s>'], 0.96),
 (['John', 'to', 'London', '.'], 0.96),
 (['John', 'London', '.', '</s>'], 0.96),
 (['_', 'travelled', 'to', '.'], 0.96),
 (['_', 'travelled', 'to', '</s>'], 0.96),
 (['John', '_', 'to', '</s>'], 0.97),
 (['John', '_', '.', '</s>'], 0.97),
 (['John', 'travelled', 'London', '</s>'], 0.97),
 (['_', 'travelled', 'London', '.'], 0.97),
 (['_', 'travelled', 'London', '</s>'], 0.97),
 (['_', 'travelled', '.', '</s>'], 0.97),
 (['_', 'to', 'London', '.'], 0.97),
 (['_', 'to', 'Lon

In [50]:
all_results = []

for i in range(7):
    for j in range(i+1, 7):
        for k in range(j+1, 7):
            for l in range(k+1, 7):
                for m in range(l+1, 7):
                    my_list = correctly_guessed_five[i][j][k][l][m]
                    avg_list = sum(my_list)/len(my_list)
                    all_results.append(([lookup_tokens[i], lookup_tokens[j], lookup_tokens[k], lookup_tokens[l], lookup_tokens[m]], avg_list))

all_results.sort(key=lambda x: x[1])
all_results

[(['John', '_', 'travelled', 'to', 'London'], 0.97),
 (['John', '_', 'travelled', 'to', '.'], 0.97),
 (['John', '_', 'travelled', 'to', '</s>'], 0.97),
 (['John', '_', 'travelled', 'London', '.'], 0.97),
 (['John', '_', 'travelled', 'London', '</s>'], 0.97),
 (['John', '_', 'travelled', '.', '</s>'], 0.97),
 (['John', '_', 'to', 'London', '</s>'], 0.97),
 (['John', '_', 'to', '.', '</s>'], 0.97),
 (['John', '_', 'London', '.', '</s>'], 0.97),
 (['John', 'travelled', 'to', 'London', '.'], 0.97),
 (['John', 'travelled', 'to', 'London', '</s>'], 0.97),
 (['John', 'travelled', 'to', '.', '</s>'], 0.97),
 (['John', 'travelled', 'London', '.', '</s>'], 0.97),
 (['John', 'to', 'London', '.', '</s>'], 0.97),
 (['_', 'travelled', 'to', 'London', '.'], 0.97),
 (['_', 'travelled', 'to', 'London', '</s>'], 0.97),
 (['_', 'travelled', 'to', '.', '</s>'], 0.97),
 (['_', 'travelled', 'London', '.', '</s>'], 0.97),
 (['_', 'to', 'London', '.', '</s>'], 0.97),
 (['travelled', 'to', 'London', '.', '</s>

In [52]:
all_results = []

for i in range(7):
    for j in range(i+1, 7):
        for k in range(j+1, 7):
            for l in range(k+1, 7):
                for m in range(l+1, 7):
                    for n in range(m+1, 7):
                        my_list = correctly_guessed_six[i][j][k][l][m][n]
                        avg_list = sum(my_list)/len(my_list)
                        all_results.append(([lookup_tokens[i], lookup_tokens[j], lookup_tokens[k], lookup_tokens[l], lookup_tokens[m], lookup_tokens[n]], avg_list))

all_results.sort(key=lambda x: x[1])
all_results

[(['John', 'travelled', 'to', 'London', '.', '</s>'], 0.96),
 (['John', '_', 'travelled', 'to', 'London', '</s>'], 0.97),
 (['John', '_', 'travelled', 'to', '.', '</s>'], 0.97),
 (['John', '_', 'travelled', 'London', '.', '</s>'], 0.97),
 (['_', 'travelled', 'to', 'London', '.', '</s>'], 0.97),
 (['John', '_', 'to', 'London', '.', '</s>'], 0.98),
 (['John', '_', 'travelled', 'to', 'London', '.'], 0.99)]

In [38]:

all_results = []

for i in range(7):
    my_list = correctly_guessed[i]
    avg_list = sum(my_list)/len(my_list)
    all_results.append(([i], avg_list))

for i in range(7):
    for j in range(i+1, 7):
        my_list = correctly_guessed_two[i][j]
        avg_list = sum(my_list)/len(my_list)
        all_results.append(([i, j], avg_list))
        
for i in range(7):
    for j in range(i+1, 7):
        for k in range(j+1, 7):
            my_list = correctly_guessed_three[i][j][k]
            avg_list = sum(my_list)/len(my_list)
            all_results.append(([i, j, k], avg_list))

for i in range(7):
    for j in range(i+1, 7):
        for k in range(j+1, 7):
            for l in range(k+1, 7):
                my_list = correctly_guessed_four[i][j][k][l]
                avg_list = sum(my_list)/len(my_list)
                all_results.append(([i, j, k, l], avg_list))
                
for i in range(7):
    for j in range(i+1, 7):
        for k in range(j+1, 7):
            for l in range(k+1, 7):
                for m in range(l+1, 7):
                    my_list = correctly_guessed_five[i][j][k][l][m]
                    avg_list = sum(my_list)/len(my_list)
                    all_results.append(([i, j, k, l, m], avg_list))

for i in range(7):
    for j in range(i+1, 7):
        for k in range(j+1, 7):
            for l in range(k+1, 7):
                for m in range(l+1, 7):
                    for n in range(m+1, 7):
                        my_list = correctly_guessed_six[i][j][k][l][m][n]
                        avg_list = sum(my_list)/len(my_list)
                        all_results.append(([i, j, k, l, m, n], avg_list))
                        
avg_seven = sum(correctly_guessed_seven)/len(correctly_guessed_seven)
all_results.append(([0, 1, 2, 3, 4, 5, 6], avg_seven))

avg_zero = sum(correctly_guessed_zero)/len(correctly_guessed_zero)
all_results.append(([], avg_zero))

In [39]:
len(all_results)

128

In [40]:
all_results

[([0], 0.0),
 ([1], 0.0),
 ([2], 0.01),
 ([3], 0.01),
 ([4], 0.64),
 ([5], 0.63),
 ([6], 0.63),
 ([0, 1], 0.58),
 ([0, 2], 0.6),
 ([0, 3], 0.59),
 ([0, 4], 0.71),
 ([0, 5], 0.68),
 ([0, 6], 0.64),
 ([1, 2], 0.69),
 ([1, 3], 0.71),
 ([1, 4], 0.75),
 ([1, 5], 0.72),
 ([1, 6], 0.74),
 ([2, 3], 0.8),
 ([2, 4], 0.81),
 ([2, 5], 0.82),
 ([2, 6], 0.81),
 ([3, 4], 0.84),
 ([3, 5], 0.86),
 ([3, 6], 0.87),
 ([4, 5], 0.89),
 ([4, 6], 0.9),
 ([5, 6], 0.88),
 ([0, 1, 2], 0.86),
 ([0, 1, 3], 0.89),
 ([0, 1, 4], 0.87),
 ([0, 1, 5], 0.87),
 ([0, 1, 6], 0.85),
 ([0, 2, 3], 0.86),
 ([0, 2, 4], 0.89),
 ([0, 2, 5], 0.89),
 ([0, 2, 6], 0.92),
 ([0, 3, 4], 0.92),
 ([0, 3, 5], 0.92),
 ([0, 3, 6], 0.94),
 ([0, 4, 5], 0.94),
 ([0, 4, 6], 0.94),
 ([0, 5, 6], 0.94),
 ([1, 2, 3], 0.94),
 ([1, 2, 4], 0.94),
 ([1, 2, 5], 0.93),
 ([1, 2, 6], 0.93),
 ([1, 3, 4], 0.94),
 ([1, 3, 5], 0.95),
 ([1, 3, 6], 0.94),
 ([1, 4, 5], 0.93),
 ([1, 4, 6], 0.93),
 ([1, 5, 6], 0.94),
 ([2, 3, 4], 0.93),
 ([2, 3, 5], 0.94),
 ([2, 3, 6

In [41]:
with open("/kuacc/users/bozyurt20/hpc_run/Sherlock Holmes/Path Creation/Results/Case 2/all_results.txt", "wb") as f:
    pickle.dump(all_results, f)

In [42]:
all_results.sort(key=lambda x: x[1])

In [43]:
all_results

[([0], 0.0),
 ([1], 0.0),
 ([], 0.0),
 ([2], 0.01),
 ([3], 0.01),
 ([0, 1], 0.58),
 ([0, 3], 0.59),
 ([0, 2], 0.6),
 ([5], 0.63),
 ([6], 0.63),
 ([4], 0.64),
 ([0, 6], 0.64),
 ([0, 5], 0.68),
 ([1, 2], 0.69),
 ([0, 4], 0.71),
 ([1, 3], 0.71),
 ([1, 5], 0.72),
 ([1, 6], 0.74),
 ([1, 4], 0.75),
 ([2, 3], 0.8),
 ([2, 4], 0.81),
 ([2, 6], 0.81),
 ([2, 5], 0.82),
 ([3, 4], 0.84),
 ([0, 1, 6], 0.85),
 ([3, 5], 0.86),
 ([0, 1, 2], 0.86),
 ([0, 2, 3], 0.86),
 ([3, 6], 0.87),
 ([0, 1, 4], 0.87),
 ([0, 1, 5], 0.87),
 ([5, 6], 0.88),
 ([4, 5], 0.89),
 ([0, 1, 3], 0.89),
 ([0, 2, 4], 0.89),
 ([0, 2, 5], 0.89),
 ([4, 6], 0.9),
 ([0, 2, 6], 0.92),
 ([0, 3, 4], 0.92),
 ([0, 3, 5], 0.92),
 ([2, 4, 5], 0.92),
 ([1, 2, 5], 0.93),
 ([1, 2, 6], 0.93),
 ([1, 4, 5], 0.93),
 ([1, 4, 6], 0.93),
 ([2, 3, 4], 0.93),
 ([2, 3, 6], 0.93),
 ([2, 5, 6], 0.93),
 ([3, 5, 6], 0.93),
 ([4, 5, 6], 0.93),
 ([0, 1, 3, 5], 0.93),
 ([0, 3, 6], 0.94),
 ([0, 4, 5], 0.94),
 ([0, 4, 6], 0.94),
 ([0, 5, 6], 0.94),
 ([1, 2, 3], 0.

# To Be Deleted

In [65]:
class PathResult():
    def __init__(self, prompt, probability_correct, probability_wrong, max_prob):
        self.prompt = prompt
        self.probability_correct = probability_correct
        self.probability_wrong = probability_wrong
        self.max_prob = max_prob

In [73]:
context_previous = "John travelled to Oslo. Emma travelled to Sydney."
input_ids = tokenizer.encode(context_previous, return_tensors="pt").to(model.encoder.device)
len_input_ids = len(input_ids[0])
with torch.no_grad():
    out = model.encoder(input_ids, output_special=True)
    
special_hidden = out.special_hidden_states # 24 x (1, T, d)
special_reformatted = torch.zeros(num_layers, len_input_ids, d_model) # (24, T, d)
for i, hidden in enumerate(special_hidden):
    special_reformatted[i:i+1, :, :] = hidden

entities_hidden_states = special_reformatted[:, :-1, :]
entities_hidden_states = entities_hidden_states.permute(1, 0, 2) # T, 24, d

new code working-modeling_t5


In [74]:
entities_hidden_states.shape

torch.Size([12, 24, 4096])

In [66]:
context_current = " Lucas was 30 years old. "
question_1 = "Where did John travel to?"
question_2 = "Where did Emma travel to?"
answer_1 = "Oslo"
answer_2 = "Sydney"
results = [];
def my_pipeline(prompt, entity_inds, entity_hidden_states, answer_correct, answer_wrong):
    input_ids = tokenizer.encode(prompt, return_tensors="pt").cuda()
    out = model.generate(
            input_ids=input_ids,
            entity_hidden_states=entity_hidden_states,
            entity_inds=entity_inds,
            max_new_tokens=1,
            return_dict_in_generate=True,
            output_scores=True
        ) 
    next_token_scores = torch.nn.functional.softmax(
                        out.scores[0].float(), dim=-1
                    )  # (batch_size * num_beams, vocab_size)
    probability_correct = next_token_scores[0][tokenizer.encode(answer_correct)[0]].item()
    probability_wrong = next_token_scores[0][tokenizer.encode(answer_wrong)[0]].item()
    max_prob = next_token_scores[0,next_token_scores.argmax().item()].item()
    return probability_correct, probability_wrong, max_prob

entity_inds = list(range(12))
prompt = context_previous + context_current + question_1
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states, answer_1, answer_2)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

prompt = context_previous + context_current + question_2
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states, answer_2, answer_1)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

entity_inds = list(range(11))
entities_hidden_states_new = entities_hidden_states[:11, :, :]
prompt = context_previous[:-1] + context_current + question_1
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_1, answer_2)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

prompt = context_previous[:-1] + context_current + question_2
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_2, answer_1)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))



new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5


In [69]:
entity_inds = list(range(10))
entities_hidden_states_new = torch.cat((entities_hidden_states[:5, :, :], entities_hidden_states[6:11, :, :]), dim=0)
prompt = "John travelled to Oslo Emma travelled to Sydney" + context_current + question_1
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_1, answer_2)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

prompt = "John travelled to Oslo Emma travelled to Sydney" + context_current + question_2
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_2, answer_1)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5


In [75]:
entity_inds = list(range(9))
entities_hidden_states_new = torch.cat((entities_hidden_states[:3, :, :], entities_hidden_states[4:5, :, :], entities_hidden_states[6:11, :, :]), dim=0)
prompt = "John travelled Oslo Emma travelled to Sydney" + context_current + question_1
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_1, answer_2)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

prompt = "John travelled Oslo Emma travelled to Sydney" + context_current + question_2
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_2, answer_1)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

entity_inds = list(range(8))
entities_hidden_states_new = torch.cat((entities_hidden_states[:3, :, :], 
                                    entities_hidden_states[4:5, :, :], 
                                    entities_hidden_states[6:9, :, :],
                                    entities_hidden_states[10:11, :, :]), dim=0)
prompt = "John travelled Oslo Emma travelled Sydney" + context_current + question_1
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_1, answer_2)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

prompt = "John travelled Oslo Emma travelled Sydney" + context_current + question_2
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_2, answer_1)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5


In [77]:
entity_inds = list(range(6))
entities_hidden_states_new = torch.cat((entities_hidden_states[:1, :, :], 
                                        entities_hidden_states[4:5, :, :],
                                        entities_hidden_states[6:9, :, :],
                                        entities_hidden_states[10:11, :, :]), dim=0)
prompt = "John Oslo Emma travelled Sydney" + context_current + question_1
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_1, answer_2)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

prompt = "John Oslo Emma travelled Sydney" + context_current + question_2
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_2, answer_1)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

entity_inds = list(range(4))
entities_hidden_states_new = torch.cat((entities_hidden_states[:1, :, :], 
                                        entities_hidden_states[4:5, :, :],
                                        entities_hidden_states[6:7, :, :],
                                        entities_hidden_states[10:11, :, :]), dim=0)
prompt = "John Oslo Emma Sydney" + context_current + question_1
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_1, answer_2)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

prompt = "John Oslo Emma Sydney" + context_current + question_2
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_2, answer_1)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5


In [80]:
entity_inds = list(range(8))
entities_hidden_states_new = torch.cat((entities_hidden_states[:1, :, :], entities_hidden_states[3:5, :, :], entities_hidden_states[6:11, :, :]), dim=0)
prompt = "John to Oslo Emma travelled to Sydney" + context_current + question_1
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_1, answer_2)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

prompt = "John to Oslo Emma travelled to Sydney" + context_current + question_2
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_2, answer_1)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

entity_inds = list(range(6))
entities_hidden_states_new = torch.cat((entities_hidden_states[:1, :, :], 
                                    entities_hidden_states[3:5, :, :], 
                                    entities_hidden_states[6:7, :, :],
                                    entities_hidden_states[9:11, :, :]), dim=0)
prompt = "John to Oslo Emma to Sydney" + context_current + question_1
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_1, answer_2)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

prompt = "John to Oslo Emma to Sydney" + context_current + question_2
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_2, answer_1)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5


In [82]:
entity_inds = list(range(5))
entities_hidden_states_new = torch.cat((entities_hidden_states[:1, :, :], 
                                        entities_hidden_states[4:5, :, :], 
                                        entities_hidden_states[6:7, :, :],
                                        entities_hidden_states[9:11, :, :]), dim=0)
prompt = "John Oslo Emma to Sydney" + context_current + question_1
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_1, answer_2)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

prompt = "John Oslo Emma to Sydney" + context_current + question_2
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_2, answer_1)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5


In [84]:
entity_inds = list(range(7))
entities_hidden_states_new = torch.cat((entities_hidden_states[:1, :, :], entities_hidden_states[4:5, :, :], entities_hidden_states[6:11, :, :]), dim=0)
prompt = "John Oslo Emma travelled to Sydney" + context_current + question_1
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_1, answer_2)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

prompt = "John Oslo Emma travelled to Sydney" + context_current + question_2
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_2, answer_1)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

entity_inds = list(range(7))
entities_hidden_states_new = torch.cat((entities_hidden_states[:5, :, :], 
                                    entities_hidden_states[6:7, :, :], 
                                    entities_hidden_states[10:11, :, :]), dim=0)
prompt = "John travelled to Oslo Emma Sydney" + context_current + question_1
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_1, answer_2)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

prompt = "John travelled to Oslo Emma Sydney" + context_current + question_2
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_2, answer_1)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

entity_inds = list(range(7))
entities_hidden_states_new = torch.cat((entities_hidden_states[:1, :, :], 
                                        entities_hidden_states[3:5, :, :], 
                                        entities_hidden_states[6:9, :, :],
                                        entities_hidden_states[10:11, :, :]), dim=0)
prompt = "John to Oslo Emma travelled Sydney" + context_current + question_1
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_1, answer_2)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

prompt = "John to Oslo Emma travelled Sydney" + context_current + question_2
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_2, answer_1)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

entity_inds = list(range(7))
entities_hidden_states_new = torch.cat((entities_hidden_states[:3, :, :], 
                                        entities_hidden_states[4:5, :, :], 
                                        entities_hidden_states[6:7, :, :],
                                        entities_hidden_states[9:11, :, :]), dim=0)
prompt = "John travelled Oslo Emma to Sydney" + context_current + question_1
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_1, answer_2)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

prompt = "John travelled Oslo Emma to Sydney" + context_current + question_2
probability_correct, probability_wrong, max_prob = my_pipeline(prompt, entity_inds, entities_hidden_states_new, answer_2, answer_1)
results.append(PathResult(prompt, probability_correct, probability_wrong, max_prob))

new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5


In [85]:
for result in results:
    print(result.prompt)
    print(result.probability_correct)
    print(result.probability_wrong)
    print(result.max_prob)
    print()

John travelled to Oslo. Emma travelled to Sydney. Lucas was 30 years old. Where did John travel to?
0.2510606646537781
0.16400770843029022
0.2510606646537781

John travelled to Oslo. Emma travelled to Sydney. Lucas was 30 years old. Where did Emma travel to?
0.4167127311229706
0.01614193432033062
0.4167127311229706

John travelled to Oslo. Emma travelled to Sydney Lucas was 30 years old. Where did John travel to?
0.3200170695781708
0.09942752867937088
0.3200170695781708

John travelled to Oslo. Emma travelled to Sydney Lucas was 30 years old. Where did Emma travel to?
0.3432735204696655
0.01956755854189396
0.3432735204696655

John travelled to Oslo Emma travelled to Sydney Lucas was 30 years old. Where did John travel to?
0.23133867979049683
0.10086718946695328
0.23133867979049683

John travelled to Oslo Emma travelled to Sydney Lucas was 30 years old. Where did Emma travel to?
0.29553642868995667
0.015663813799619675
0.29553642868995667

John travelled Oslo Emma travelled to Sydney Lu

In [61]:
context_current = " Lucas was 30 years old. "
question_1 = "Where did John travel to?"
question_2 = "Where did Emma travel to?"

prompt_2 = context_previous + context_current + question_2

input_ids = tokenizer.encode(prompt_2, return_tensors="pt")
entity_inds = list(range(12))
out = model.generate(
        input_ids=input_ids,
        entity_hidden_states=entities_hidden_states,
        entity_inds=entity_inds,
        max_new_tokens=1,
        return_dict_in_generate=True,
        output_scores=True
    ) 
next_token_scores = torch.nn.functional.softmax(
                    out.scores[0].float(), dim=-1
                )  # (batch_size * num_beams, vocab_size)

answer_correct = "Sydney"
answer_wrong = "Oslo"
probability_correct = next_token_scores[0][tokenizer.encode(answer_correct)[0]].item()
probability_wrong = next_token_scores[0][tokenizer.encode(answer_wrong)[0]].item()

probability_correct, probability_wrong

new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5


(0.4167155921459198, 0.016142046079039574)

In [62]:
next_token_scores[0,next_token_scores.argmax().item()].item()

0.4167155921459198

In [49]:
context_current = " Lucas was 30 years old. "
question_1 = "Where did John travel to?"
question_2 = "Where did Emma travel to?"

prompt_1 = context_previous + context_current + question_1

input_ids = tokenizer.encode(prompt_1, return_tensors="pt")
entity_inds = list(range(12))
out = model.generate(
        input_ids=input_ids,
        #entity_hidden_states=entities_hidden_states,
        #entity_inds=entity_inds,
        max_new_tokens=1,
        return_dict_in_generate=True,
        output_scores=True
    ) 
next_token_scores = torch.nn.functional.softmax(
                    out.scores[0].float(), dim=-1
                )  # (batch_size * num_beams, vocab_size)

answer_correct = "Oslo"
answer_wrong = "Sydney"
probability_correct = next_token_scores[0][tokenizer.encode(answer_correct)[0]].item()
probability_wrong = next_token_scores[0][tokenizer.encode(answer_wrong)[0]].item()

probability_correct, probability_wrong

new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5


In [51]:
context_current = " Lucas was 30 years old. "
question_1 = "Where did John travel to?"
question_2 = "Where did Emma travel to?"

prompt_1 = context_current + question_1

input_ids = tokenizer.encode(prompt_1, return_tensors="pt")
entity_inds = list(range(12))
out = model.generate(
        input_ids=input_ids,
        #entity_hidden_states=entities_hidden_states,
        #entity_inds=entity_inds,
        max_new_tokens=1,
        return_dict_in_generate=True,
        output_scores=True
    ) 
next_token_scores = torch.nn.functional.softmax(
                    out.scores[0].float(), dim=-1
                )  # (batch_size * num_beams, vocab_size)

answer_correct = "Oslo"
answer_wrong = "Sydney"
probability_correct = next_token_scores[0][tokenizer.encode(answer_correct)[0]].item()
probability_wrong = next_token_scores[0][tokenizer.encode(answer_wrong)[0]].item()

probability_correct, probability_wrong

new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5


(0.00019699727999977767, 0.0060422383248806)

In [None]:
template = tm11
context_current = "Lucas was 30 years old."

context_previous = "John travelled to Oslo. Emma travelled to Sydney."
char_1 = "John"
char_2 = "Emma"
answer_1 = "Oslo"
answer_2 = "Sydney"

question_1 = "Where did John travel to?"
question_2 = "Where did Emma travel to?"

entities_list = ["John", " ", "travelled", "to", "Oslo", ".", "Emma", " ", "travelled", "to", "Sydney", "."]

entity_hidden_states = previous_timestep(context_previous,  entities_list)

to_prepend = " ".join(entities_list)

prompt_current = to_prepend + " " + context_current

probability_correct_1, probability_wrong_1, _ = current_timestep_regular(prompt_current, question_1, answer_1, answer_2, template)
probability_correct_2, probability_wrong_2, _ = current_timestep_regular(prompt_current, question_2, answer_2, answer_1, template)

probability_correct_enhanced_1, probability_wrong_enhanced_1, _ = current_timestep_enhanced(prompt_current, 
                                                                      question_1, 
                                                                      answer_1, 
                                                                      answer_2,
                                                                      template, 
                                                                      entities_list, 
                                                                      entity_hidden_states)

probability_correct_enhanced_2, probability_wrong_enhanced_2, _ = current_timestep_enhanced(prompt_current, 
                                                                      question_2, 
                                                                      answer_2,
                                                                      answer_1,
                                                                      template, 
                                                                      entities_list, 
                                                                      entity_hidden_states)

improvement_correct_1 = probability_correct_enhanced_1 - probability_correct_1
improvement_correct_2 = probability_correct_enhanced_2 - probability_correct_2
improvement_wrong_1 = probability_wrong_enhanced_1 - probability_wrong_1
improvement_wrong_2 = probability_wrong_enhanced_2 - probability_wrong_2
improvements_correct_1.append( improvement_correct_1 )
improvements_correct_2.append( improvement_correct_2 )
improvements_wrong_1.append( improvement_wrong_1 )
improvements_wrong_2.append( improvement_wrong_2 )

#NOT RUN

if probability_correct_1 > probability_wrong_1:
    correct_guesses.append(1)
else:
    correct_guesses.append(0)
if probability_correct_enhanced_1 > probability_wrong_enhanced_1:
    correct_guesses.append(1)
else:
    correct_guesses.append(0) 

# Open-Ended Generation - Two chars two locs

In [3]:
def find_index_one(input_ids, entity_str, index):
    
    entity_id = tokenizer.encode(entity_str)
    
    if len(entity_id) != 2:
        print("Not an appropriate entity!")
        return
    
    entity_id = entity_id[0]
    
    input_ids_list = input_ids.tolist()

    all_entity_mention_indices = []
    for i, j in enumerate(input_ids_list[0]):
        if j == entity_id:
            all_entity_mention_indices.append(i)
    try:
        entity_ind = all_entity_mention_indices[index]
        return entity_ind
    except:
        print("entity not found in the input!")
        return

In [4]:
def previous_timestep(context, entities):
    
    input_ids = tokenizer.encode(context, return_tensors="pt")
    len_input_ids = len(input_ids[0])
    out = model.encoder(input_ids, output_special=True, output_hidden_states=True)
    special_hidden = out.special_hidden_states # 24 x (1, T, d)

    special_reformatted = torch.zeros(num_layers, len_input_ids, d_model) # (24, T, d)
    for i, hidden in enumerate(special_hidden):
        special_reformatted[i:i+1, :, :] = hidden
    
    entity_ind = find_index_one(input_ids, entities[0], 0)
    entities_hidden_states = special_reformatted[:, entity_ind, :].unsqueeze(0)

    for entity in entities[1:]:
        entity_ind = find_index_one(input_ids, entity, 0)
        entity_hidden_states = special_reformatted[:, entity_ind, :].unsqueeze(0)
        entities_hidden_states = torch.cat((entities_hidden_states,
                                       entity_hidden_states), dim=0)
    
    return entities_hidden_states

def current_timestep_regular(context, question, answer_correct, answer_wrong, template):
    
    prompt = template.render(context=context, question=question)
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    out = model.generate(input_ids, max_new_tokens=1, return_dict_in_generate=True, output_scores=True)
    next_token_scores = torch.nn.functional.softmax(
                    out.scores[0].float(), dim=-1
                )  # (batch_size * num_beams, vocab_size)

    probability_correct = next_token_scores[0][tokenizer.encode(answer_correct)[0]].item()
    probability_wrong = next_token_scores[0][tokenizer.encode(answer_wrong)[0]].item()

    scores = []
    for i, score in enumerate(next_token_scores[0]):
        scores.append( (i, score.item()) )

    scores.sort(key=lambda x: x[1], reverse=True)
    scores = [(tokenizer.decode(a), b) for a, b in scores]

    return probability_correct, probability_wrong, scores

def current_timestep_enhanced(context, question, answer_correct, answer_wrong, template, entities, entity_hidden_states):
    
    prompt = template.render(context=context, question=question)
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    entity_inds = []
    for entity in entities:
        entity_inds.append(find_index_one(input_ids, entity, 0))
        
    out = model.generate(
        input_ids=input_ids,
        entity_hidden_states=entity_hidden_states,
        entity_inds=entity_inds,
        max_new_tokens=1,
        return_dict_in_generate=True,
        output_scores=True
    ) 
    next_token_scores = torch.nn.functional.softmax(
                    out.scores[0].float(), dim=-1
                )  # (batch_size * num_beams, vocab_size)
    
    probability_correct = next_token_scores[0][tokenizer.encode(answer_correct)[0]].item()
    probability_wrong = next_token_scores[0][tokenizer.encode(answer_wrong)[0]].item()

    scores = []
    for i, score in enumerate(next_token_scores[0]):
        scores.append( (i, score.item()) )

    scores.sort(key=lambda x: x[1], reverse=True)
    scores = [(tokenizer.decode(a), b) for a, b in scores]

    return probability_correct, probability_wrong, scores


In [17]:
len(data_points)

625

In [20]:
template = tm11
context_current = "Lucas was 30 years old."
improvements_correct_1 = []
improvements_correct_2 = []
improvements_wrong_1 = []
improvements_wrong_2 = []
for data_point in data_points:
    
    char_1 = data_point.name_1
    char_2 = data_point.name_2
    answer_1 = data_point.city_1
    answer_2 = data_point.city_2
    context_previous = data_point.context
    
    question_1 = "Where did " + char_1 + " travel to?"
    question_2 = "Where did " + char_2 + " travel to?"
    
    entities_list = [char_1, char_2, answer_1, answer_2]
    
    entity_hidden_states = previous_timestep(context_previous,  entities_list)
    
    to_prepend = " ".join(entities_list)
    
    prompt_current = to_prepend + " " + context_current
    
    probability_correct_1, probability_wrong_1, _ = current_timestep_regular(prompt_current, question_1, answer_1, answer_2, template)
    probability_correct_2, probability_wrong_2, _ = current_timestep_regular(prompt_current, question_2, answer_2, answer_1, template)
    
    probability_correct_enhanced_1, probability_wrong_enhanced_1, _ = current_timestep_enhanced(prompt_current, 
                                                                          question_1, 
                                                                          answer_1, 
                                                                          answer_2,
                                                                          template, 
                                                                          entities_list, 
                                                                          entity_hidden_states)
    
    probability_correct_enhanced_2, probability_wrong_enhanced_2, _ = current_timestep_enhanced(prompt_current, 
                                                                          question_2, 
                                                                          answer_2,
                                                                          answer_1,
                                                                          template, 
                                                                          entities_list, 
                                                                          entity_hidden_states)
    
    improvement_correct_1 = probability_correct_enhanced_1 - probability_correct_1
    improvement_correct_2 = probability_correct_enhanced_2 - probability_correct_2
    improvement_wrong_1 = probability_wrong_enhanced_1 - probability_wrong_1
    improvement_wrong_2 = probability_wrong_enhanced_2 - probability_wrong_2
    improvements_correct_1.append( improvement_correct_1 )
    improvements_correct_2.append( improvement_correct_2 )
    improvements_wrong_1.append( improvement_wrong_1 )
    improvements_wrong_2.append( improvement_wrong_2 )
    
    #NOT RUN
    
    if probability_correct_1 > probability_wrong_1:
        correct_guesses.append(1)
    else:
        correct_guesses.append(0)
    if probability_correct_enhanced_1 > probability_wrong_enhanced_1:
        correct_guesses.append(1)
    else:
        correct_guesses.append(0) 

new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
new code working-modeling_t5
new code working-utils
new code working-m

In [None]:
with open("/kuacc/users/bozyurt20/hpc_run/predictions/holmes_two_char_two_locs.txt", "wb") as f:
    pickle.dump(results_dict, f)

In [None]:
with open("/kuacc/users/bozyurt20/hpc_run/predictions/holmes_two_char_two_locs.txt", "rb") as f:
    results_dict = pickle.load(f)

# Templates

In [5]:
# Multiple Choice

tm1 = Template("""Read the following context and choose the best option to answer the question.
Context: {{ context }}
Question: {{ question }}
Options:
 - {{ answer_choices | join("\n - ") }}""")

tm2 = Template("""{{ context }}
{{ question }}
- {{ answer_choices | join("\n- ") }}""")

tm3 = Template("""{{ context }}
{{ question }}
Pick the correct answer from the following options:
- {{ answer_choices | join("\n- ") }}""")

tm4 = Template("""{{ context }}
According to the above context, choose the best option to answer the following question.
Question: {{ question }}
Options:
- {{answer_choices | join("\n - ")}}
""")

tm5 = Template("""{{ context }}
{{ question }}
Pick the best answer from the following options:
A. {{ answer0 }}
B. {{ answer1 }}
C. {{ answer2 }}
D. {{ answer3 }}""")

tm6 = Template("""{{ context }}
According to the above context, choose the best option to answer the following question.
Question: {{ question }}
Options:
A. {{ answer0 }}
B. {{ answer1 }}
C. {{ answer2 }}
D. {{ answer3 }}""")

tm7 = Template("""{{ context }}
{{ question }}
A. {{ answer0 }}
B. {{ answer1 }}
C. {{ answer2 }}
D. {{ answer3 }}""")

# Open-Ended

tm8 = Template("""Question: "{{question}}"
Context: "{{context}}"
Answer:""")

tm9 = Template("""{{ context }}
Given the paragraph above, please answer correctly the following
question:
{{ question }}""")

tm10 = Template("""Given the following passage
"{{context}}",
answer the following question. Note that the answer is present within
the text.
Question: {{question}}""")

tm11 = Template("{{context}} What is the answer to: {{question}}")

In [15]:
import sys
sys.path.append('/kuacc/users/bozyurt20/.conda/envs/hf/lib/python3.8/site-packages/transformers/models/')
cp modeling_t5.py /kuacc/users/bozyurt20/.conda/envs/hf/lib/python3.8/site-packages/transformers/models/t5/
cp utils.py /kuacc/users/bozyurt20/.conda/envs/hf/lib/python3.8/site-packages/transformers/generation/
cp modeling_outputs.py /kuacc/users/bozyurt20/.conda/envs/hf/lib/python3.8/site-packages/transformers/

SyntaxError: invalid syntax (676971914.py, line 3)