# Necessary Imports and Settings

In [1]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import transformers
import torch
import os
import nltk
import pandas as pd
import torch
import numpy as np
from jinja2 import Template
import xmltodict
import pickle
from collections import defaultdict
from fuzzywuzzy import fuzz
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

import sys
sys.path.append('/scratch/users/bozyurt20/hpc_run/utilities')
sys.path.append("/scratch/users/bozyurt20/hpc_run/blobs/")
from util_research import *

#from toy_dataset import contexts

max_len = 512
num_layers = 24
d_model = 4096

In [2]:
tokenizer = AutoTokenizer.from_pretrained("bigscience/T0pp", truncation_side="right", add_prefix_space=True)

In [3]:
model = AutoModelForSeq2SeqLM.from_pretrained("bigscience/T0pp")#, device_map="auto", load_in_8bit=True)

In [21]:
def find_index_one(input_ids, entity_str, index):
    
    entity_id = tokenizer.encode(entity_str)
    
    if len(entity_id) != 2:
        print("Not an appropriate entity!")
        return
    
    entity_id = entity_id[0]
    
    input_ids_list = input_ids.tolist()

    all_entity_mention_indices = []
    for i, j in enumerate(input_ids_list[0]):
        if j == entity_id:
            all_entity_mention_indices.append(i)
    try:
        entity_ind = all_entity_mention_indices[index]
        return entity_ind
    except:
        print("entity not found in the input!")
        return

# Open-Ended Generation

In [None]:
def find_index_one(input_ids, entity_str, index):
    
    entity_id = tokenizer.encode(entity_str)
    
    if len(entity_id) != 2:
        print("Not an appropriate entity!")
        return
    
    entity_id = entity_id[0]
    
    input_ids_list = input_ids.tolist()

    all_entity_mention_indices = []
    for i, j in enumerate(input_ids_list[0]):
        if j == entity_id:
            all_entity_mention_indices.append(i)
    try:
        entity_ind = all_entity_mention_indices[index]
        return entity_ind
    except:
        print("entity not found in the input!")
        return

In [None]:
def current_timestep_regular(context, question, answer, template):
    
    prompt = template.render(context=context, question=question)
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    out = model.generate(input_ids, max_new_tokens=1, return_dict_in_generate=True, output_scores=True)
    next_token_scores = torch.nn.functional.softmax(
                    out.scores[0], dim=-1
                )  # (batch_size * num_beams, vocab_size)

    probability = next_token_scores[0][tokenizer.encode(answer)[0]].item()

    scores = []
    for i, score in enumerate(next_token_scores[0]):
        scores.append( (i, score.item()) )

    scores.sort(key=lambda x: x[1], reverse=True)
    scores = [(tokenizer.decode(a), b) for a, b in scores]

    return probability, scores

def previous_timestep(context, entity):
    
    input_ids = tokenizer.encode(context, return_tensors="pt")
    len_input_ids = len(input_ids[0])
    out = model.encoder(input_ids, output_special=True, output_hidden_states=True)
    special_hidden = out.special_hidden_states # 24 x (1, T, d)

    special_reformatted = torch.zeros(num_layers, len_input_ids, d_model) # (24, T, d)
    for i, hidden in enumerate(special_hidden):
        special_reformatted[i:i+1, :, :] = hidden
    
    entity_ind = find_index_one(input_ids, entity, 0)
    print("entity_ind:" , entity_ind)
    entity_hidden_state = special_reformatted[:, entity_ind, :].unsqueeze(0)
    
    return entity_hidden_state

def current_timestep_enhanced(context, question, answer, template, entity, entity_hidden_state):
    prompt = template.render(context=context, question=question)
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    entity_inds = [ find_index_one(input_ids, entity, 1) ]
    out = model.generate(
        input_ids=input_ids,
        entity_hidden_states=entity_hidden_state,
        entity_inds=entity_inds,
        max_new_tokens=1,
        return_dict_in_generate=True,
        output_scores=True
    ) 
    next_token_scores = torch.nn.functional.softmax(
                    out.scores[0], dim=-1
                )  # (batch_size * num_beams, vocab_size)

    probability = next_token_scores[0][tokenizer.encode(answer)[0]].item()

    scores = []
    for i, score in enumerate(next_token_scores[0]):
        scores.append( (i, score.item()) )

    scores.sort(key=lambda x: x[1], reverse=True)
    scores = [(tokenizer.decode(a), b) for a, b in scores]

    return probability, scores


In [None]:
template = tm8
improvements = []
for context_previous in sentences:
    
    char = context_previous.split()[0]
    answer = context_previous.split()[-1][:-1]
    question = "Where was " + char + "?"
    entity_hidden_state = previous_timestep(context_previous, char)
    print(char)
    print(answer)
    print(question)
    
    context_current = "Lucas was 30 years old."
    probability, scores = current_timestep_regular(context_current, question, answer, template)
    
    context_enhanced = char_previous + " " + context_current
    print(context_enhanced)
    probability_enhanced, scores_enhanced = current_timestep_enhanced(context_enhanced, 
                                                                      question, 
                                                                      answer, 
                                                                      template, 
                                                                      char, 
                                                                      entity_hidden_state)
    
    improvement = probability_enhanced-probability
    print(improvement)
    improvements.append( improvement )

In [None]:
sum(improvements)/len(improvements)

# Open-Ended Generation - Where did he travel to? - Character

In [22]:
def current_timestep_regular(context, question, answer, template):
    
    prompt = template.render(context=context, question=question)
    print(prompt)
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    out = model.generate(input_ids, max_new_tokens=1, return_dict_in_generate=True, output_scores=True)
    next_token_scores = torch.nn.functional.softmax(
                    out.scores[0], dim=-1
                )  # (batch_size * num_beams, vocab_size)

    probability = next_token_scores[0][tokenizer.encode(answer)[0]].item()

    scores = []
    for i, score in enumerate(next_token_scores[0]):
        scores.append( (i, score.item()) )

    scores.sort(key=lambda x: x[1], reverse=True)
    scores = [(tokenizer.decode(a), b) for a, b in scores]

    return probability, scores

def previous_timestep(context, entity):
    
    input_ids = tokenizer.encode(context, return_tensors="pt")
    len_input_ids = len(input_ids[0])
    out = model.encoder(input_ids, output_special=True, output_hidden_states=True)
    special_hidden = out.special_hidden_states # 24 x (1, T, d)

    special_reformatted = torch.zeros(num_layers, len_input_ids, d_model) # (24, T, d)
    for i, hidden in enumerate(special_hidden):
        special_reformatted[i:i+1, :, :] = hidden
    
    entity_ind = find_index_one(input_ids, entity, 0)
    print("entity_ind:" , entity_ind)
    entity_hidden_state = special_reformatted[:, entity_ind, :].unsqueeze(0)
    
    return entity_hidden_state

def current_timestep_enhanced(context, question, answer, template, entity, entity_hidden_state):
    prompt = template.render(context=context, question=question)
    print(prompt)
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    entity_inds = [ find_index_one(input_ids, entity, 1) ]
    out = model.generate(
        input_ids=input_ids,
        entity_hidden_states=entity_hidden_state,
        entity_inds=entity_inds,
        max_new_tokens=1,
        return_dict_in_generate=True,
        output_scores=True
    ) 
    next_token_scores = torch.nn.functional.softmax(
                    out.scores[0], dim=-1
                )  # (batch_size * num_beams, vocab_size)

    probability = next_token_scores[0][tokenizer.encode(answer)[0]].item()

    scores = []
    for i, score in enumerate(next_token_scores[0]):
        scores.append( (i, score.item()) )

    scores.sort(key=lambda x: x[1], reverse=True)
    scores = [(tokenizer.decode(a), b) for a, b in scores]

    return probability, scores


In [None]:
template = tm8
improvements = []
for context_previous in sentences:
    
    char = context_previous.split()[0]
    answer = context_previous.split()[-1][:-1]
    question = "Where did " + char + " travel to?"
    entity_hidden_state = previous_timestep(context_previous, char)
    print(char)
    print(answer)
    print(question)
    
    context_current = "Lucas was 30 years old."
    probability, scores = current_timestep_regular(context_current, question, answer, template)
    
    context_enhanced = char + " " + context_current
    print(context_enhanced)
    probability_enhanced, scores_enhanced = current_timestep_enhanced(context_enhanced, 
                                                                      question, 
                                                                      answer, 
                                                                      template, 
                                                                      char, 
                                                                      entity_hidden_state)
    
    improvement = probability_enhanced-probability
    print(improvement)
    improvements.append( improvement )

new code working-modeling_t5
entity_ind: 0
John
London
Where did John travel to?
Question: "Where did John travel to?"
Context: "Lucas was 30 years old."
Answer:
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
John Lucas was 30 years old.
Question: "Where did John travel to?"
Context: "John Lucas was 30 years old."
Answer:
entity not found in the input!
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
-0.013598007551081537
new code working-modeling_t5
entity_ind: 0
John
Paris
Where did John travel to?
Question: "Where did John travel to?"
Context: "Lucas was 30 years old."
Answer:
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
John Lucas was 30 years old.
Question: "Where did John travel to?"
Context: "John Lucas was 30 years old."
Answer:
entity not found in the input!
new code working-utils
new code work

In [None]:
results_dict = {}

In [None]:
results_dict["chars"] = improvements

In [132]:
sum(improvements)/len(improvements)

-5.305687400116832e-05

# Open-Ended Generation - Where did he travel to? - "travelled"

In [None]:
def find_index_one(input_ids, entity_str, index):
    
    entity_id = tokenizer.encode(entity_str)
    
    if len(entity_id) != 3:
        print("Not an appropriate entity!")
        return
    
    entity_id = entity_id[1]
    
    input_ids_list = input_ids.tolist()

    all_entity_mention_indices = []
    for i, j in enumerate(input_ids_list[0]):
        if j == entity_id:
            all_entity_mention_indices.append(i)
    try:
        entity_ind = all_entity_mention_indices[index]
        return entity_ind
    except:
        print("entity not found in the input!")
        return

In [None]:
def current_timestep_regular(context, question, answer, template):
    
    prompt = template.render(context=context, question=question)
    print(prompt)
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    out = model.generate(input_ids, max_new_tokens=1, return_dict_in_generate=True, output_scores=True)
    next_token_scores = torch.nn.functional.softmax(
                    out.scores[0], dim=-1
                )  # (batch_size * num_beams, vocab_size)

    probability = next_token_scores[0][tokenizer.encode(answer)[0]].item()

    scores = []
    for i, score in enumerate(next_token_scores[0]):
        scores.append( (i, score.item()) )

    scores.sort(key=lambda x: x[1], reverse=True)
    scores = [(tokenizer.decode(a), b) for a, b in scores]

    return probability, scores

def previous_timestep(context, entity):
    
    input_ids = tokenizer.encode(context, return_tensors="pt")
    len_input_ids = len(input_ids[0])
    out = model.encoder(input_ids, output_special=True, output_hidden_states=True)
    special_hidden = out.special_hidden_states # 24 x (1, T, d)

    special_reformatted = torch.zeros(num_layers, len_input_ids, d_model) # (24, T, d)
    for i, hidden in enumerate(special_hidden):
        special_reformatted[i:i+1, :, :] = hidden
    
    entity_ind = find_index_one(input_ids, entity, 0)
    print("entity_ind:" , entity_ind)
    entity_hidden_state = special_reformatted[:, entity_ind, :].unsqueeze(0)
    
    return entity_hidden_state

def current_timestep_enhanced(context, question, answer, template, entity, entity_hidden_state):
    prompt = template.render(context=context, question=question)
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    entity_inds = [ find_index_one(input_ids, entity, 0) ]
    out = model.generate(
        input_ids=input_ids,
        entity_hidden_states=entity_hidden_state,
        entity_inds=entity_inds,
        max_new_tokens=1,
        return_dict_in_generate=True,
        output_scores=True
    ) 
    next_token_scores = torch.nn.functional.softmax(
                    out.scores[0], dim=-1
                )  # (batch_size * num_beams, vocab_size)

    probability = next_token_scores[0][tokenizer.encode(answer)[0]].item()

    scores = []
    for i, score in enumerate(next_token_scores[0]):
        scores.append( (i, score.item()) )

    scores.sort(key=lambda x: x[1], reverse=True)
    scores = [(tokenizer.decode(a), b) for a, b in scores]

    return probability, scores


In [None]:
template = tm8
improvements = []
for context_previous in sentences:
    
    char = context_previous.split()[0]
    answer = context_previous.split()[-1][:-1]
    question = "Where did " + char + " travel to?"
    entity_hidden_state = previous_timestep(context_previous, "travelled")
    print(char)
    print(answer)
    print(question)
    
    context_current = "Lucas was 30 years old."
    probability, scores = current_timestep_regular(context_current, question, answer, template)
    
    context_enhanced = " travelled " + context_current
    print(context_enhanced)
    probability_enhanced, scores_enhanced = current_timestep_enhanced(context_enhanced, 
                                                                      question, 
                                                                      answer, 
                                                                      template, 
                                                                      "travelled", 
                                                                      entity_hidden_state)
    
    improvement = probability_enhanced-probability
    print(improvement)
    improvements.append( improvement )

In [131]:
sum(improvements)/len(improvements)

-5.305687400116832e-05

In [None]:
results_dict["travelled"] = improvements

# Open-Ended Generation - Where did he travel to? - "to"

In [None]:
def find_index_one(input_ids, entity_str, index):
    
    entity_id = tokenizer.encode(entity_str)
    
    if len(entity_id) != 2:
        print("Not an appropriate entity!")
        return
    
    entity_id = entity_id[0]
    
    input_ids_list = input_ids.tolist()

    all_entity_mention_indices = []
    for i, j in enumerate(input_ids_list[0]):
        if j == entity_id:
            all_entity_mention_indices.append(i)
    try:
        entity_ind = all_entity_mention_indices[index]
        return entity_ind
    except:
        print("entity not found in the input!")
        return

In [None]:
def current_timestep_regular(context, question, answer, template):
    
    prompt = template.render(context=context, question=question)
    print(prompt)
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    out = model.generate(input_ids, max_new_tokens=1, return_dict_in_generate=True, output_scores=True)
    next_token_scores = torch.nn.functional.softmax(
                    out.scores[0], dim=-1
                )  # (batch_size * num_beams, vocab_size)

    probability = next_token_scores[0][tokenizer.encode(answer)[0]].item()

    scores = []
    for i, score in enumerate(next_token_scores[0]):
        scores.append( (i, score.item()) )

    scores.sort(key=lambda x: x[1], reverse=True)
    scores = [(tokenizer.decode(a), b) for a, b in scores]

    return probability, scores

def previous_timestep(context, entity):
    
    input_ids = tokenizer.encode(context, return_tensors="pt")
    len_input_ids = len(input_ids[0])
    out = model.encoder(input_ids, output_special=True, output_hidden_states=True)
    special_hidden = out.special_hidden_states # 24 x (1, T, d)

    special_reformatted = torch.zeros(num_layers, len_input_ids, d_model) # (24, T, d)
    for i, hidden in enumerate(special_hidden):
        special_reformatted[i:i+1, :, :] = hidden
    
    entity_ind = find_index_one(input_ids, entity, 0)
    print("entity_ind:" , entity_ind)
    entity_hidden_state = special_reformatted[:, entity_ind, :].unsqueeze(0)
    
    return entity_hidden_state

def current_timestep_enhanced(context, question, answer, template, entity, entity_hidden_state):
    prompt = template.render(context=context, question=question)
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    entity_inds = [ find_index_one(input_ids, entity, 1) ]
    out = model.generate(
        input_ids=input_ids,
        entity_hidden_states=entity_hidden_state,
        entity_inds=entity_inds,
        max_new_tokens=1,
        return_dict_in_generate=True,
        output_scores=True
    ) 
    next_token_scores = torch.nn.functional.softmax(
                    out.scores[0], dim=-1
                )  # (batch_size * num_beams, vocab_size)

    probability = next_token_scores[0][tokenizer.encode(answer)[0]].item()

    scores = []
    for i, score in enumerate(next_token_scores[0]):
        scores.append( (i, score.item()) )

    scores.sort(key=lambda x: x[1], reverse=True)
    scores = [(tokenizer.decode(a), b) for a, b in scores]

    return probability, scores


In [None]:
template = tm8
improvements = []
for context_previous in sentences:
    
    char = context_previous.split()[0]
    answer = context_previous.split()[-1][:-1]
    question = "Where did " + char + " travel to?"
    entity_hidden_state = previous_timestep(context_previous, "to")
    print(char)
    print(answer)
    print(question)
    
    context_current = "Lucas was 30 years old."
    probability, scores = current_timestep_regular(context_current, question, answer, template)
    
    context_enhanced = " to " + context_current
    print(context_enhanced)
    probability_enhanced, scores_enhanced = current_timestep_enhanced(context_enhanced, 
                                                                      question, 
                                                                      answer, 
                                                                      template, 
                                                                      "to", 
                                                                      entity_hidden_state)
    
    improvement = probability_enhanced-probability
    print(improvement)
    improvements.append( improvement )

In [None]:
results_dict["to"] = improvements

# Open-Ended Generation - Where did he travel to? - cities

In [None]:
def find_index_one(input_ids, entity_str, index):
    
    entity_id = tokenizer.encode(entity_str)
    
    if len(entity_id) != 2:
        print("Not an appropriate entity!")
        return
    
    entity_id = entity_id[0]
    
    input_ids_list = input_ids.tolist()

    all_entity_mention_indices = []
    for i, j in enumerate(input_ids_list[0]):
        if j == entity_id:
            all_entity_mention_indices.append(i)
    try:
        entity_ind = all_entity_mention_indices[index]
        return entity_ind
    except:
        print("entity not found in the input!")
        return

In [None]:
def current_timestep_regular(context, question, answer, template):
    
    prompt = template.render(context=context, question=question)
    print(prompt)
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    out = model.generate(input_ids, max_new_tokens=1, return_dict_in_generate=True, output_scores=True)
    next_token_scores = torch.nn.functional.softmax(
                    out.scores[0], dim=-1
                )  # (batch_size * num_beams, vocab_size)

    probability = next_token_scores[0][tokenizer.encode(answer)[0]].item()

    scores = []
    for i, score in enumerate(next_token_scores[0]):
        scores.append( (i, score.item()) )

    scores.sort(key=lambda x: x[1], reverse=True)
    scores = [(tokenizer.decode(a), b) for a, b in scores]

    return probability, scores

def previous_timestep(context, entity):
    
    input_ids = tokenizer.encode(context, return_tensors="pt")
    len_input_ids = len(input_ids[0])
    out = model.encoder(input_ids, output_special=True, output_hidden_states=True)
    special_hidden = out.special_hidden_states # 24 x (1, T, d)

    special_reformatted = torch.zeros(num_layers, len_input_ids, d_model) # (24, T, d)
    for i, hidden in enumerate(special_hidden):
        special_reformatted[i:i+1, :, :] = hidden
    
    entity_ind = find_index_one(input_ids, entity, 0)
    print("entity_ind:" , entity_ind)
    entity_hidden_state = special_reformatted[:, entity_ind, :].unsqueeze(0)
    
    return entity_hidden_state

def current_timestep_enhanced(context, question, answer, template, entity, entity_hidden_state):
    prompt = template.render(context=context, question=question)
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    entity_inds = [ find_index_one(input_ids, entity, 0) ]
    out = model.generate(
        input_ids=input_ids,
        entity_hidden_states=entity_hidden_state,
        entity_inds=entity_inds,
        max_new_tokens=1,
        return_dict_in_generate=True,
        output_scores=True
    ) 
    next_token_scores = torch.nn.functional.softmax(
                    out.scores[0], dim=-1
                )  # (batch_size * num_beams, vocab_size)

    probability = next_token_scores[0][tokenizer.encode(answer)[0]].item()

    scores = []
    for i, score in enumerate(next_token_scores[0]):
        scores.append( (i, score.item()) )

    scores.sort(key=lambda x: x[1], reverse=True)
    scores = [(tokenizer.decode(a), b) for a, b in scores]

    return probability, scores


In [None]:
template = tm8
improvements = []
for context_previous in sentences:
    
    char = context_previous.split()[0]
    answer = context_previous.split()[-1][:-1]
    question = "Where did " + char + " travel to?"
    entity_hidden_state = previous_timestep(context_previous, answer)
    print(char)
    print(answer)
    print(question)
    
    context_current = "Lucas was 30 years old."
    probability, scores = current_timestep_regular(context_current, question, answer, template)
    
    context_enhanced = " to " + context_current
    print(context_enhanced)
    probability_enhanced, scores_enhanced = current_timestep_enhanced(context_enhanced, 
                                                                      question, 
                                                                      answer, 
                                                                      template, 
                                                                      answer, 
                                                                      entity_hidden_state)
    
    improvement = probability_enhanced-probability
    print(improvement)
    improvements.append( improvement )

In [None]:
results_dict["cities"] = improvements

In [None]:
with open("/kuacc/users/bozyurt20/hpc_run/predictions/holmes_results.txt", "wb") as f:
    pickle.dump(results_dict, f)

In [41]:
results_dict.keys()

dict_keys(['chars', 'travelled', 'to', 'cities'])

In [3]:
with open("/kuacc/users/bozyurt20/hpc_run/predictions/holmes_results.txt", "rb") as f:
    results_dict = pickle.load( f)

In [4]:
for key in results_dict:
    value = results_dict[key]
    print(key, sum(value) / len(value))

chars -0.008961449623581218
travelled -0.005377411903955362
to -0.0040957831087871455
cities 0.9195870674045545


# Multiple Choice

In [None]:
def current_timestep_regular(context, question, answer, template):
    
    prompt = template.render(context=context, question=question)
    print(prompt)
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    out = model.generate(input_ids, max_new_tokens=1, return_dict_in_generate=True, output_scores=True)
    next_token_scores = torch.nn.functional.softmax(
                    out.scores[0], dim=-1
                )  # (batch_size * num_beams, vocab_size)

    probability = next_token_scores[0][tokenizer.encode(answer)[0]].item()

    scores = []
    for i, score in enumerate(next_token_scores[0]):
        scores.append( (i, score.item()) )

    scores.sort(key=lambda x: x[1], reverse=True)
    scores = [(tokenizer.decode(a), b) for a, b in scores]

    return probability, scores

def previous_timestep(context, entity):
    
    input_ids = tokenizer.encode(context, return_tensors="pt")
    len_input_ids = len(input_ids[0])
    out = model.encoder(input_ids, output_special=True, output_hidden_states=True)
    special_hidden = out.special_hidden_states # 24 x (1, T, d)

    special_reformatted = torch.zeros(num_layers, len_input_ids, d_model) # (24, T, d)
    for i, hidden in enumerate(special_hidden):
        special_reformatted[i:i+1, :, :] = hidden
    
    entity_ind = find_index_one(input_ids, entity, 0)
    print("entity_ind:" , entity_ind)
    entity_hidden_state = special_reformatted[:, entity_ind, :].unsqueeze(0)
    
    return entity_hidden_state

def current_timestep_enhanced(context, question, answer, template, entity, entity_hidden_state):
    prompt = template.render(context=context, question=question)
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    entity_inds = [ find_index_one(input_ids, entity, 0) ]
    out = model.generate(
        input_ids=input_ids,
        entity_hidden_states=entity_hidden_state,
        entity_inds=entity_inds,
        max_new_tokens=1,
        return_dict_in_generate=True,
        output_scores=True
    ) 
    next_token_scores = torch.nn.functional.softmax(
                    out.scores[0], dim=-1
                )  # (batch_size * num_beams, vocab_size)

    probability = next_token_scores[0][tokenizer.encode(answer)[0]].item()

    scores = []
    for i, score in enumerate(next_token_scores[0]):
        scores.append( (i, score.item()) )

    scores.sort(key=lambda x: x[1], reverse=True)
    scores = [(tokenizer.decode(a), b) for a, b in scores]

    return probability, scores


In [108]:
def current_timestep_regular(context, question, answer_choices, answer, template):
    
    prompt = template.render(context=context, question=question, answer_choices=answer_choices)
    prompt = prompt.replace('"', '" ')
    print(prompt)
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    out = model.generate(input_ids, max_new_tokens=1, return_dict_in_generate=True, output_scores=True)
    next_token_scores = torch.nn.functional.softmax(
                    out.scores[0], dim=-1
                )  # (batch_size * num_beams, vocab_size)

    loc_tokens = []
    for location in answer_choices:
        loc_tokens.append(tokenizer.encode(location)[0])

    probabilities = {}
    for loc_token in loc_tokens:
        probabilities[tokenizer.decode(loc_token)] = next_token_scores[0][loc_token].item()

    the_sum = sum(probabilities.values())
    for key, value in probabilities.items():
        probabilities[key] = value/the_sum

    scores = []
    for i, score in enumerate(next_token_scores[0]):
        scores.append( (i, score.item()) )

    scores.sort(key=lambda x: x[1], reverse=True)
    scores = [(tokenizer.decode(a), b) for a, b in scores]
    
    answer_probability = probabilities[answer]

    return probabilities, scores, answer_probability

def previous_timestep(context, entity):
    
    input_ids = tokenizer.encode(context, return_tensors="pt")
    len_input_ids = len(input_ids[0])
    out = model.encoder(input_ids, output_special=True, output_hidden_states=True)
    special_hidden = out.special_hidden_states # 24 x (1, T, d)

    special_reformatted = torch.zeros(num_layers, len_input_ids, d_model) # (24, T, d)
    for i, hidden in enumerate(special_hidden):
        special_reformatted[i:i+1, :, :] = hidden
    
    entity_ind = find_index_one(input_ids, entity, 0)
    #print("entity_ind:" , entity_ind)
    entity_hidden_state = special_reformatted[:, entity_ind, :].unsqueeze(0)
    
    return entity_hidden_state

def current_timestep_enhanced(context, question, answer_choices, answer, template, entity, entity_hidden_state):
    prompt = template.render(context=context, question=question, answer_choices=answer_choices)
    prompt = prompt.replace('"', '" ')
    print(prompt)
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    entity_inds = [ find_index_one(input_ids, entity, 0) ]
    out = model.generate(
        input_ids=input_ids,
        entity_hidden_states=entity_hidden_state,
        entity_inds=entity_inds,
        max_new_tokens=1,
        return_dict_in_generate=True,
        output_scores=True
    ) 
    next_token_scores = torch.nn.functional.softmax(
                    out.scores[0], dim=-1
                )  # (batch_size * num_beams, vocab_size)

    loc_tokens = []
    for location in answer_choices:
        loc_tokens.append(tokenizer.encode(location)[0])

    probabilities = {}
    for loc_token in loc_tokens:
        probabilities[tokenizer.decode(loc_token)] = next_token_scores[0][loc_token].item()

    the_sum = sum(probabilities.values())
    for key, value in probabilities.items():
        probabilities[key] = value/the_sum

    scores = []
    for i, score in enumerate(next_token_scores[0]):
        scores.append( (i, score.item()) )

    scores.sort(key=lambda x: x[1], reverse=True)
    scores = [(tokenizer.decode(a), b) for a, b in scores]
    
    answer_probability = probabilities[answer]

    return probabilities, scores, answer_probability

In [109]:
template = tm1
mc_improvements = []
for context_previous in sentences:
    
    char = context_previous.split()[0]
    answer = context_previous.split()[-1][:-1]
    wrong_answer = random.choice(cities)
    while wrong_answer == answer:
        wrong_answer = random.choice(cities)
    
    answer_choices = [wrong_answer, answer]
    
    question = "Where was " + char + "?"
    
    entity_hidden_state = previous_timestep(context_previous, char)
    print(char)
    print(answer)
    print(question)
    
    context_current = "Lucas was 30 years old."
    probabilities, scores, answer_probability = current_timestep_regular(context_current, question, answer_choices, answer, template)
    
    context_enhanced = char_previous + " " + context_current
    print(context_enhanced)
    probabilities_enhanced, scores_enhanced, answer_probability_enhanced = current_timestep_enhanced(context_enhanced, 
                                                                      question, 
                                                                      answer_choices,
                                                                      answer, 
                                                                      template, 
                                                                      char, 
                                                                      entity_hidden_state)
    
    improvement = answer_probability_enhanced - answer_probability
    print(improvement)
    mc_improvements.append( improvement )

new code working-modeling_t5
John
London
Where was John?
Read the following context and choose the best option to answer the question.
Context: Lucas was 30 years old.
Question: Where was John?
Options:
 - Oslo
 - London
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
John Lucas was 30 years old.
Read the following context and choose the best option to answer the question.
Context: John Lucas was 30 years old.
Question: Where was John?
Options:
 - Oslo
 - London
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
-0.05038603603111219
new code working-modeling_t5
John
Paris
Where was John?
Read the following context and choose the best option to answer the question.
Context: Lucas was 30 years old.
Question: Where was John?
Options:
 - Oslo
 - Paris
new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5
John Lucas was

In [110]:
sum(mc_improvements)/len(mc_improvements)

-0.04816384322775649

In [77]:
template = tm1

In [78]:
context_previous = random.choice(sentences)
print(context_previous)
char_previous = context_previous.split()[0]
answer_previous = context_previous.split()[-1][:-1]
question_previous = "Where was " + char_previous + "?"
print(question_previous)
print(answer_previous)
print(char_previous)

John travelled to Oslo.
Where was John?
Oslo
John


In [80]:
context = random.choice(sentences)
print(context)
char = context.split()[0]
answer = context.split()[-1][:-1]
question = "Where was " + char_previous + "?"
print(question)
print(answer_previous)
print(char)

Emma travelled to London.
Where was John?
Oslo
Emma


In [79]:
entity_hidden_state = previous_timestep(context_previous, char_previous)

new code working-modeling_t5
entity_ind: 0


In [81]:
probability, scores = current_timestep_regular(context, question, answer_previous, template)

new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5


In [82]:
context = char_previous + " " + context
print(context)

John Emma travelled to London.


In [89]:
probability_enhanced, scores_enhanced = current_timestep_enhanced(context, question, answer_previous, template, char_previous, entity_hidden_state)

new code working-utils
new code working-modeling_t5
llama generation happening.
new code working-modeling_t5


In [90]:
probability

3.429386924835853e-05

In [91]:
probability_enhanced

1.2664374480664264e-05

In [87]:
context

'John Emma travelled to London.'

In [88]:
answer

'London'

# Templates

In [5]:
# Multiple Choice

tm1 = Template("""Read the following context and choose the best option to answer the question.
Context: {{ context }}
Question: {{ question }}
Options:
 - {{ answer_choices | join("\n - ") }}""")

tm2 = Template("""{{ context }}
{{ question }}
- {{ answer_choices | join("\n- ") }}""")

tm3 = Template("""{{ context }}
{{ question }}
Pick the correct answer from the following options:
- {{ answer_choices | join("\n- ") }}""")

tm4 = Template("""{{ context }}
According to the above context, choose the best option to answer the following question.
Question: {{ question }}
Options:
- {{answer_choices | join("\n - ")}}
""")

tm5 = Template("""{{ context }}
{{ question }}
Pick the best answer from the following options:
A. {{ answer0 }}
B. {{ answer1 }}
C. {{ answer2 }}
D. {{ answer3 }}""")

tm6 = Template("""{{ context }}
According to the above context, choose the best option to answer the following question.
Question: {{ question }}
Options:
A. {{ answer0 }}
B. {{ answer1 }}
C. {{ answer2 }}
D. {{ answer3 }}""")

tm7 = Template("""{{ context }}
{{ question }}
A. {{ answer0 }}
B. {{ answer1 }}
C. {{ answer2 }}
D. {{ answer3 }}""")

# Open-Ended

tm8 = Template("""Question: "{{question}}"
Context: "{{context}}"
Answer:""")

tm9 = Template("""{{ context }}
Given the paragraph above, please answer correctly the following
question:
{{ question }}""")

tm10 = Template("""Given the following passage
"{{context}}",
answer the following question. Note that the answer is present within
the text.
Question: {{question}}""")

# Toy Dataset

In [6]:
previous_contexts = [
    "John went to London. Mary went to Paris. John loves Mary.",
    
]

current_contexts = [
    "David went to Beijing. Henry went to Sydney.",
    
]

questions = [
    "Where did John go?",
]

In [16]:
names = ["John", "Harry", "Andrew", "Lisa", "Mary", "Henry", "David", "Sophia", "Olivia", "Emma"]
for name in names:
    print(tokenizer.encode(" " + name))
len(names)

[1079, 1]
[8929, 1]
[5954, 1]
[11712, 1]
[3790, 1]
[7780, 1]
[1955, 1]
[30174, 1]
[25051, 1]
[15325, 1]


10

In [8]:
cities = ["London", "Paris", "Oslo", "Istanbul", "Beijing", "Sydney", "Cairo", "Seoul", "Rome", "Prague"]

In [18]:
for name in cities:
    print(tokenizer.encode(name+"."))
len(cities)

[1524, 5, 1]
[1919, 5, 1]
[29206, 5, 1]
[20958, 5, 1]
[14465, 5, 1]
[7476, 5, 1]
[28600, 5, 1]
[28343, 5, 1]
[7332, 5, 1]
[23564, 5, 1]


10

In [19]:
for name in cities:
    print(tokenizer.encode(name))
len(cities)

[1524, 1]
[1919, 1]
[29206, 1]
[20958, 1]
[14465, 1]
[7476, 1]
[28600, 1]
[28343, 1]
[7332, 1]
[23564, 1]


10

In [10]:
sentences = []
for name in names:
    for city in cities:
        sentences.append(name + " travelled to " + city + ".")
    

print(sentences)

['John travelled to London.', 'John travelled to Paris.', 'John travelled to Oslo.', 'John travelled to Istanbul.', 'John travelled to Beijing.', 'John travelled to Sydney.', 'John travelled to Cairo.', 'John travelled to Seoul.', 'John travelled to Rome.', 'John travelled to Prague.', 'Harry travelled to London.', 'Harry travelled to Paris.', 'Harry travelled to Oslo.', 'Harry travelled to Istanbul.', 'Harry travelled to Beijing.', 'Harry travelled to Sydney.', 'Harry travelled to Cairo.', 'Harry travelled to Seoul.', 'Harry travelled to Rome.', 'Harry travelled to Prague.', 'Andrew travelled to London.', 'Andrew travelled to Paris.', 'Andrew travelled to Oslo.', 'Andrew travelled to Istanbul.', 'Andrew travelled to Beijing.', 'Andrew travelled to Sydney.', 'Andrew travelled to Cairo.', 'Andrew travelled to Seoul.', 'Andrew travelled to Rome.', 'Andrew travelled to Prague.', 'Lisa travelled to London.', 'Lisa travelled to Paris.', 'Lisa travelled to Oslo.', 'Lisa travelled to Istanbul

In [15]:
tokenizer.encode("travel")

[1111, 1]

In [14]:
tokenizer.encode("to?")

[12, 58, 1]

In [11]:
import random


In [15]:
import sys
sys.path.append('/kuacc/users/bozyurt20/.conda/envs/hf/lib/python3.8/site-packages/transformers/models/')
cp modeling_t5.py /kuacc/users/bozyurt20/.conda/envs/hf/lib/python3.8/site-packages/transformers/models/t5/
cp utils.py /kuacc/users/bozyurt20/.conda/envs/hf/lib/python3.8/site-packages/transformers/generation/
cp modeling_outputs.py /kuacc/users/bozyurt20/.conda/envs/hf/lib/python3.8/site-packages/transformers/

SyntaxError: invalid syntax (676971914.py, line 3)