In [26]:
import pandas as pd
import numpy as np
import evaluate
import openai
import time
from tqdm.autonotebook import tqdm

tqdm.pandas()

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [12]:
with open("/home/pparesh/.secrets/openai/openai_api_key") as f:
    openai.api_key = f.readline().strip()

## Read & Preprocess Data

In [13]:
df_data = pd.read_csv("../data/ROCStories/cloze_test_val__winter2018-cloze_test_ALL_val.csv")
df_data = df_data.head(100)
df_data.shape
df_data.head()

(100, 8)

Unnamed: 0,InputStoryid,InputSentence1,InputSentence2,InputSentence3,InputSentence4,RandomFifthSentenceQuiz1,RandomFifthSentenceQuiz2,AnswerRightEnding
0,138d5bfb-05cc-41e3-bf2c-fa85ebad14e2,Rick grew up in a troubled household.,"He never found good support in family, and tur...",It wasn't long before Rick got shot in a robbery.,The incident caused him to turn a new leaf.,He is happy now.,He joined a gang.,1
1,bff9f820-9605-4875-b9af-fe6f14d04256,Laverne needs to prepare something for her fri...,She decides to bake a batch of brownies.,She chooses a recipe and follows it closely.,Laverne tests one of the brownies to make sure...,The brownies are so delicious Laverne eats two...,Laverne doesn't go to her friend's party.,1
2,e8f628d5-9f97-40ed-8611-fc0e774673c4,Sarah had been dreaming of visiting Europe for...,She had finally saved enough for the trip.,She landed in Spain and traveled east across t...,She didn't like how different everything was.,Sarah then decided to move to Europe.,Sarah decided that she preferred her home over...,2
3,f5226bfe-9f26-4377-b05f-3d9568dbdec1,Gina was worried the cookie dough in the tube ...,She was very happy to find she was wrong.,The cookies from the tube were as good as from...,Gina intended to only eat 2 cookies and save t...,Gina liked the cookies so much she ate them al...,Gina gave the cookies away at her church.,1
4,69ac9b05-b956-402f-9fff-1f926ef9176b,It was my final performance in marching band.,I was playing the snare drum in the band.,We played Thriller and Radar Love.,The performance was flawless.,I was very proud of my performance.,I was very ashamed of my performance.,1


In [14]:
# add 5th sentence
df_data['InputSentence5'] = df_data.apply(
    lambda row: row['RandomFifthSentenceQuiz1'] if row['AnswerRightEnding']==1 \
    else row['RandomFifthSentenceQuiz2'],
    axis=1
)

# combine all 5 sentences into outline
df_data['outline'] = df_data['InputSentence1'] + "\n" + \
                     df_data['InputSentence2'] + "\n" + \
                     df_data['InputSentence3'] + "\n" + \
                     df_data['InputSentence4'] + "\n" + \
                     df_data['InputSentence5']

# create prompt
df_data['prompt'] = "Generate a multi-paragaph story using the following outline:\nOUTLINE:\n" + \
                    df_data['outline'] + "\n" + "STORY:"

In [15]:
df_data.head(2)

Unnamed: 0,InputStoryid,InputSentence1,InputSentence2,InputSentence3,InputSentence4,RandomFifthSentenceQuiz1,RandomFifthSentenceQuiz2,AnswerRightEnding,InputSentence5,outline,prompt
0,138d5bfb-05cc-41e3-bf2c-fa85ebad14e2,Rick grew up in a troubled household.,"He never found good support in family, and tur...",It wasn't long before Rick got shot in a robbery.,The incident caused him to turn a new leaf.,He is happy now.,He joined a gang.,1,He is happy now.,Rick grew up in a troubled household.\nHe neve...,Generate a multi-paragaph story using the foll...
1,bff9f820-9605-4875-b9af-fe6f14d04256,Laverne needs to prepare something for her fri...,She decides to bake a batch of brownies.,She chooses a recipe and follows it closely.,Laverne tests one of the brownies to make sure...,The brownies are so delicious Laverne eats two...,Laverne doesn't go to her friend's party.,1,The brownies are so delicious Laverne eats two...,Laverne needs to prepare something for her fri...,Generate a multi-paragaph story using the foll...


In [16]:
print(df_data['outline'][5])

Jim found an old disposable camera in the bottom of his junk drawer.
He began snapping away at everything around him.
The counter clicked down to one final photo.
The gravity of the situation began to dawn on Jim.
Jim took time to decide what he would take a picture of.


In [17]:
print(df_data['prompt'][5])

Generate a multi-paragaph story using the following outline:
OUTLINE:
Jim found an old disposable camera in the bottom of his junk drawer.
He began snapping away at everything around him.
The counter clicked down to one final photo.
The gravity of the situation began to dawn on Jim.
Jim took time to decide what he would take a picture of.
STORY:


## Generate Story

In [38]:
# model = "text-babbage-001"
# model = "text-curie-001"
# model = "text-davinci-003"

In [18]:
# prompt = df_data['prompt'][5]
# response = openai.Completion.create(
#     model=model, 
#     prompt=prompt, 
#     temperature=0.9, 
#     presence_penalty=0.5,
#     frequency_penalty=0.8,
#     max_tokens=500,
#     n=3,
# )

In [19]:
# gen_stories = []
# for s in response.choices:
#     gen_stories.append(s.text)

# for x in gen_stories:
#     print(x)

In [20]:
# gen_stories_len = [len(x) for x in gen_stories]
# final_story = gen_stories[np.argmax(gen_stories_len)]
# print(final_story)

In [30]:
def generate_stories(model, prompts, max_tokens=500, n=3):
    output_stories = []
    for prompt in tqdm(prompts):
        response = openai.Completion.create(
            model=model, 
            prompt=prompt, 
            temperature=0.9, 
            presence_penalty=0.5,
            frequency_penalty=0.8,
            max_tokens=500,
            n=3,
        )
        # get all stories
        gen_stories = [s.text for s in response.choices]
        # select story based on length
        gen_stories_len = [len(x) for x in gen_stories]
        selected_story = gen_stories[np.argmax(gen_stories_len)]
        
        output_stories.append(selected_story)
        
        # for the rate-limit
        time.sleep(3)
        
    return output_stories

In [31]:
# generate_stories("text-babbage-001", [df_data['prompt'][5]], max_tokens=500, n=1)

In [32]:
df_data['gen_stories_ada'] = generate_stories("text-ada-001", df_data['prompt'].tolist(), max_tokens=500, n=3)

  0%|          | 0/100 [00:00<?, ?it/s]

In [33]:
df_data['gen_stories_babbage'] = generate_stories("text-babbage-001", df_data['prompt'].tolist(), max_tokens=500, n=3)

  0%|          | 0/100 [00:00<?, ?it/s]

In [34]:
df_data['gen_stories_curie'] = generate_stories("text-curie-001", df_data['prompt'].tolist(), max_tokens=500, n=3)

  0%|          | 0/100 [00:00<?, ?it/s]

In [35]:
df_data['gen_stories_davinci'] = generate_stories("text-davinci-003", df_data['prompt'].tolist(), max_tokens=500, n=1)

  0%|          | 0/100 [00:00<?, ?it/s]

In [36]:
df_data.to_parquet("../data/generated_stories/generated_stories_gpt3_100.parquet")

## Evaluation

In [9]:
# outline = """Jim found an old disposable camera in the bottom of his junk drawer.
# He began snapping away at everything around him.
# The counter clicked down to one final photo.
# The gravity of the situation began to dawn on Jim.
# Jim took time to decide what he would take a picture of."""

In [10]:
# final_story = """Jim had always been a photo enthusiast, even as a kid. He loved exploring new places, taking pictures of the natural beauty around him, and capturing special moments with those he loved. But never before had he found such freedom in taking photos--and such a sense of immediacy.

# So Jim began to snap away at anything and everything around him. He would go out on walks with his dog, stop in at the grocery store, or sit at his desk at work--and snap away. The Counter turned to one final photo before disappearing for good. In each photo, Jim captured a fleeting moment that he could cherish forever.

# Gradually though, Jim realized just how precarious this new way of life was. Every time he snapped a picture, something could happen that would overwrite that moment forever with uncertainty and worry. Suddenly taking pictures became quite solemn and meaningful for Jim--it was now more than just a way to capture fleeting moments; it was an act of preservation and commemoration.

# And so Jim began to take pictures of things that meant the most to him: his wife and kids during family dinners, positive memories from trips abroad, happy moments spent with friends and family...absolutely anything that tempted fate but ultimately ended up being worth preserving anyways. And gradually as he took more photos like this, the gravity of the situation began to dawn on Jim...he wasn't just preserving moments anymore; he was documenting history!"""

In [4]:
# bleu = evaluate.load("bleu")
# results = bleu.compute(predictions=[final_story], references=[outline])
# results

In [5]:
# perplexity = evaluate.load("perplexity", module_type="metric")
# results = perplexity.compute(predictions=[final_story], model_id='gpt2')
# results

In [6]:
# rouge = evaluate.load('rouge')
# results = rouge.compute(predictions=[final_story], references=[outline])
# results

In [8]:
# bertscore = evaluate.load('bertscore')
# results = bertscore.compute(predictions=[final_story], references=[outline], model_type="distilbert-base-uncased")
# results

In [7]:
# from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM
# checkpoint = "distilgpt2"
# tokenizer = AutoTokenizer.from_pretrained(checkpoint)
# model = AutoModelForCausalLM.from_pretrained(checkpoint, torch_dtype="auto", device_map="auto")