In [1]:
import re
import torch
import json
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from datasets import load_dataset

In [64]:
tokenizer = AutoTokenizer.from_pretrained("bigscience/T0_3B")
model = AutoModelForSeq2SeqLM.from_pretrained("bigscience/T0_3B")

In [3]:
wow_path ="/home/willy/comp5214-groundedness-kgd/data/wizard_of_wikipedia/test_topic_split.json"
with open(wow_path, "r") as f:
    wow = json.loads(f.read())

In [11]:
data = wow[0]
print(len(wow), type(wow))
print(data.keys())

968 <class 'list'>
dict_keys(['chosen_topic', 'persona', 'wizard_eval', 'dialog', 'chosen_topic_passage'])


In [77]:
def get_dialog_prompt(sample_number, print_dialog=True):
    "sample_number 0-967"
    word_count = 0
    prompt = ""
    dialog = wow[sample_number]["dialog"]
    first_speaker = dialog[0]["speaker"]
    print(f"First speaker is: {first_speaker}")
    
    for i in range(len(dialog)):
        dialog_turn = dialog[i]["text"]
        speaker = dialog[i]["speaker"][2:]
        if print_dialog:
            print(dialog_turn)
        word_count += len(dialog_turn.split())
        prompt += f" {speaker}: "
        prompt += dialog_turn
    print(f"There is {word_count} words")
    return dialog, prompt

In [151]:
dialog = data["dialog"]
passage = data["chosen_topic_passage"]

In [154]:
passage

['Elvis Aaron Presley (January 8, 1935\xa0– August 16, 1977) was an American singer, musician, and actor.',
 'Regarded as one of the most significant cultural icons of the 20th century, he is often referred to as the "King of Rock and Roll" or simply "the King".',
 'Presley was born in Tupelo, Mississippi, and relocated to Memphis, Tennessee, with his family when he was 13 years old.',
 'His music career began there in 1954, recording at Sun Records with producer Sam Phillips, who wanted to bring the sound of African American music to a wider audience.',
 'Accompanied by guitarist Scotty Moore and bassist Bill Black, Presley was a pioneer of rockabilly, an uptempo, backbeat-driven fusion of country music and rhythm and blues.',
 'In 1955, drummer D.J.',
 "Fontana joined to complete the lineup of Presley's classic quartet and RCA Victor acquired his contract in a deal arranged by Colonel Tom Parker, who would manage the singer for more than two decades.",
 'Presley\'s first RCA single, 

In [70]:
dialog, prompt = get_dialog_prompt(0)

First speaker is: 0_Wizard
Oh baby.... Elvis Presley is truly The King of Rock and Roll!
yes...who doesn't love Elvis Presley and his music?
Seriously. I have been planning a vaction to Graceland myself. I have family in Tennessee so it could be good.
I would love to tour his home.  Which one of his songs is your favorite?/
I always liked "Hounddog." My grandpa heard Elvis sing it live in Memphis.
oh wow thats a classic.  what about jailhouse rock
Pretty good. Didn't Elvis make Jailhouse rock into a movie?
Yes, I think he did.  He made several movies.  
Was Elvis actually in jail? The movie depicts him dicovering his musical abilities in a cell.
Hmm...that I don't know.  I am going to have to look that up later and see
There is 128 words


In [34]:
dialog[0].keys()

dict_keys(['speaker', 'text', 'checked_sentence', 'checked_passage', 'candidate_responses', 'retrieved_passages', 'retrieved_topics'])

In [79]:
dialog, prompt = get_dialog_prompt(0, print_dialog=False)

First speaker is: 0_Wizard
There is 128 words


In [94]:
prompt

' Wizard: Oh baby.... Elvis Presley is truly The King of Rock and Roll! Apprentice: yes...who doesn\'t love Elvis Presley and his music? Wizard: Seriously. I have been planning a vaction to Graceland myself. I have family in Tennessee so it could be good. Apprentice: I would love to tour his home.  Which one of his songs is your favorite?/ Wizard: I always liked "Hounddog." My grandpa heard Elvis sing it live in Memphis. Apprentice: oh wow thats a classic.  what about jailhouse rock Wizard: Pretty good. Didn\'t Elvis make Jailhouse rock into a movie? Apprentice: Yes, I think he did.  He made several movies.   Wizard: Was Elvis actually in jail? The movie depicts him dicovering his musical abilities in a cell. Apprentice: Hmm...that I don\'t know.  I am going to have to look that up later and see'

In [142]:
print(prompt[:338])
print(prompt[338:422])

 Wizard: Oh baby.... Elvis Presley is truly The King of Rock and Roll! Apprentice: yes...who doesn't love Elvis Presley and his music? Wizard: Seriously. I have been planning a vaction to Graceland myself. I have family in Tennessee so it could be good. Apprentice: I would love to tour his home.  Which one of his songs is your favorite?
/ Wizard: I always liked "Hounddog." My grandpa heard Elvis sing it live in Memphis.


In [131]:
knowledge = list(dialog[4]["checked_sentence"].values())[0]

In [109]:
inputs = tokenizer.encode(f"Given the conversation: {prompt[:-87]}, what is the apprentice saying next?", return_tensors="pt")
outputs = model.generate(inputs)
print(tokenizer.decode(outputs[0]))
inputs = tokenizer.encode(f"Write a the next turn of the conversation. Conversation: {prompt[:-87]}", return_tensors="pt")
outputs = model.generate(inputs)
print(tokenizer.decode(outputs[0]))
inputs = tokenizer.encode(f"Given the Conversation, what is the Apprentice's answer? Conversation: {prompt[:-87]}", return_tensors="pt")
outputs = model.generate(inputs)
print(tokenizer.decode(outputs[0]))

<pad> Apprentice: I love Elvis Presley. I would love to go to Graceland.</s>
<pad> Apprentice: I love Elvis Presley. I have been planning a trip to Graceland
<pad> Apprentice: Yes, I think he made several movies.</s>


In [144]:
inputs = tokenizer.encode(f"Given the conversation: {prompt[:338]}, what is the apprentice saying next?", return_tensors="pt")
outputs = model.generate(inputs)
print(tokenizer.decode(outputs[0]))
inputs = tokenizer.encode(f"Write a the next turn of the conversation. Conversation: {prompt[:338]}", return_tensors="pt")
outputs = model.generate(inputs)
print(tokenizer.decode(outputs[0]))
inputs = tokenizer.encode(f"Given the Conversation, what is the Apprentice's answer? Conversation: {prompt[:338]}", return_tensors="pt")
outputs = model.generate(inputs)
print(tokenizer.decode(outputs[0]))

<pad> Elvis Presley is truly The King of Rock and Roll. Wizard has been planning a vacation
<pad> Elvis Presley is truly The King of Rock and Roll. Wizard has been planning a vacation
<pad> Elvis Presley</s>


In [145]:
inputs = tokenizer.encode(f"Given the conversation: {prompt[:338]}, what is the wizard saying next?", return_tensors="pt")
outputs = model.generate(inputs)
print(tokenizer.decode(outputs[0]))
inputs = tokenizer.encode(f"write a the next turn of the conversation. Conversation: {prompt[:338]}", return_tensors="pt")
outputs = model.generate(inputs)
print(tokenizer.decode(outputs[0]))
inputs = tokenizer.encode(f"Given the Conversation, what is the wizard's answer? Conversation: {prompt[:338]}", return_tensors="pt")
outputs = model.generate(inputs)
print(tokenizer.decode(outputs[0]))

<pad> Elvis Presley is truly The King of Rock and Roll. Wizard has been planning a vacation
<pad> Elvis Presley is truly The King of Rock and Roll. Wizard has been planning a vacation
<pad> Elvis Presley is truly The King of Rock and Roll. Wizard has been planning a vacation


In [146]:
inputs = tokenizer.encode(f"Given the conversation: {prompt[:338]} and the knowledge {knowledge}, what is the wizard saying next?", return_tensors="pt")
outputs = model.generate(inputs)
print(tokenizer.decode(outputs[0]))
inputs = tokenizer.encode(f"Given the Knowledge, write a the next turn of the conversation. Conversation: {prompt[:338]}. Knowledge: {knowledge}", return_tensors="pt")
outputs = model.generate(inputs)
print(tokenizer.decode(outputs[0]))
inputs = tokenizer.encode(f"Given the Conversation and the Knowledge, what is the wizard's answer? Knowledge: {knowledge}. Conversation: {prompt[:338]}", return_tensors="pt")
outputs = model.generate(inputs)
print(tokenizer.decode(outputs[0]))

<pad> Elvis Presley is truly The King of Rock and Roll. Wizard has been planning a vacation
<pad> Elvis Presley was born in Tupelo, Mississippi, and relocated to Memphis, Tennessee
<pad> Elvis Presley</s>


In [150]:
inputs = tokenizer.encode(f"Given the conversation: {prompt[:338]} and the knowledge {knowledge}, what is the wizard saying next?", return_tensors="pt")
outputs = model.generate(inputs)
print(tokenizer.decode(outputs[0]))
inputs = tokenizer.encode(f"Given the Knowledge, write the next turn of the conversation. Conversation: {prompt[:338]}. Knowledge: {knowledge}", return_tensors="pt")
outputs = model.generate(inputs)
print(tokenizer.decode(outputs[0]))
inputs = tokenizer.encode(f"Given the Conversation and the Knowledge, what is the wizard's answer? Knowledge: {knowledge}. Conversation: {prompt[:-338]}", return_tensors="pt")
outputs = model.generate(inputs)
print(tokenizer.decode(outputs[0]))
inputs = tokenizer.encode(f"""Context: "{knowledge}". Conversation: "{prompt[:338]}" Given the Conversation and the Knowledge, what is the wizard's answer?""", return_tensors="pt")
outputs = model.generate(inputs)
print(tokenizer.decode(outputs[0]))

<pad> Elvis Presley is truly The King of Rock and Roll. Wizard has been planning a vacation
<pad> Elvis Presley was born in Tupelo, Mississippi, and relocated to Memphis, Tennessee
<pad> Elvis Presley was born in Tupelo, Mississippi, and relocated to Memphis, Tennessee
<pad> Elvis Presley</s>


In [148]:
inputs = tokenizer.encode(f"""Write wizard's answer. Conversation: "{prompt[:338]}. Wizard:" """, return_tensors="pt")
outputs = model.generate(inputs)
print(tokenizer.decode(outputs[0]))

<pad> Elvis Presley is truly The King of Rock and Roll! Wizard has been planning a vacation


In [141]:
prompt[:-338]

' Wizard: Oh baby.... Elvis Presley is truly The King of Rock and Roll! Apprentice: yes...who doesn\'t love Elvis Presley and his music? Wizard: Seriously. I have been planning a vaction to Graceland myself. I have family in Tennessee so it could be good. Apprentice: I would love to tour his home.  Which one of his songs is your favorite?/ Wizard: I always liked "Hounddog." My grandpa heard Elvis sing it live in Memphis. Apprentice: oh wow thats a classic.  what'