In [3]:
import spacy, re, openai, random
from dotenv import dotenv_values
from nltk import sent_tokenize
from spacy_download import load_spacy

In [2]:
def tweet_process(tweet):
    tweet = tweet.lower()
    content_part = tweet.split("dream",1)[1]
    #skip half word like t from dreamt
    content_part = content_part[2:]

    hashtag_pattern = "(#\w+)"
    res = re.finditer(hashtag_pattern,content_part)
    hashtags = []
    if res:
        for hashtag in res:
            hashtags.append(hashtag.group(0))
            #remove the hashtag from the original string
            content_part= content_part.replace(hashtag.group(0),"")

    return content_part, hashtags

In [3]:
def get_noun_verb(tagger,tweet):
    doc = tagger(tweet)
    noun_list = []
    verb_list = []
    for token in doc:
        if token.pos_ == "NOUN":
            noun_list.append(token.lemma_)
        if token.pos_ == "VERB":
            verb_list.append(token.lemma_)

    # remove the word night from noun list, it comes from "last night"
    noun_list = [ x for x in noun_list if x != "night"]
    # move verbs like have, do
    verb_list = [x for x in verb_list if x not in ["have","do"]]

    return set(noun_list),set(verb_list)

In [65]:
def generate_prompts(nounSet,verbSet):
    #get number of prompts for each verb
    prompts_num = 8//len(verbSet)

    prompts_list_from_pgt3 = []
    #only allow one verb in a prompt
    verb_list = list(verbSet)
    for verb in verb_list:
        token_list = list(nounSet)
        random.shuffle(token_list)
        token_list.append(verb)

        #create prompt for gpt3
        prompt = "Generate a noun phrase with keywords" + ", ".join(token_list)

        #prompt = "Generate a noun phrase with keywords 'world', 'giant','alien' and 'save' "
        completions = openai.Completion.create(
            engine="text-curie-001",
            prompt=prompt,
            max_tokens=12,
            n=prompts_num,
            temperature=1,
            presence_penalty=1,
        )
        # add generated prompts from gpt to list
        prompts_list_from_pgt3= prompts_list_from_pgt3 + [x.text for x in completions.choices]
    #get rid of \n
    prompts_list_from_pgt3 = [x.replace("\n","") for x in prompts_list_from_pgt3]
    # get rid of half sentences
    #prompts_list_from_pgt3 = [sent_tokenize(x)[0] for x in prompts_list_from_pgt3]
    prompts_list_from_pgt3 = [x.split(",")[0] for x in prompts_list_from_pgt3]
    # append style
    prompts_list_from_pgt3 = [ x + " in the style of a dream" for x in prompts_list_from_pgt3]

    return prompts_list_from_pgt3



# start

In [5]:
config = dotenv_values(".env")
openai.api_key = config['OPENAI_API_KEY']

In [52]:
tweet1 = "I had a dream that I was a superhero and I was saving people from a burning building. I woke up feeling like a boss #superhero #dreams #hero"

In [68]:
processed_tweet, hashtag_list = tweet_process(tweet1)

In [4]:
# load spacy pos tagger
nlp = load_spacy("en_core_web_sm")

In [5]:
res = nlp("I had a dream that I was a superhero and I was saving people from a burning building. I woke up feeling like a boss #superhero #dreams #hero")

In [54]:
# tag the sentence
noun_set,verb_set = get_noun_verb(nlp,processed_tweet)

In [66]:
prompt_list = generate_prompts(noun_set,verb_set)

In [67]:
prompt_list

['Superheroessavingpeopleinbuildingworkingsuper in the style of a dream',
 'Superheroes save people in dangerous situations and buildings in the style of a dream',
 'A superhero theme park where guests can experience the feel in the style of a dream',
 'Superhero building boss people feel in the style of a dream',
 'A giant building with people inside is shaking violently. in the style of a dream',
 'Superhero building people wake in the style of a dream',
 'heroic people in a building burning in the style of a dream',
 "building that was once a superhero's headquarters in the style of a dream"]