# GTP2 Generated Seinfeld ChatBot with Recommender System

In [387]:
from aitextgen.TokenDataset import TokenDataset
from aitextgen.tokenizers import train_tokenizer
from aitextgen.utils import GPT2ConfigCPU
from aitextgen import aitextgen

import pickle
import spacy
import pandas as pd

In [388]:
# Loading a dictionary with all of the episode names and dialogues
with open('../data/episode_dialogues.pkl', 'rb') as f:
    episode_dialogues = pickle.load(f)

In [None]:
episode_dialogues

In [389]:
fp = '/Users/alexander.fioto/Models/Labeled-Seinfeld-Model/' 

In [4]:
ai = aitextgen(model= fp + "pytorch_model.bin",
               config = fp + 'config.json', 
               vocab_file=fp + 'aitextgen-vocab.json',
               merges_file=fp + 'aitextgen-merges.txt')

INFO:aitextgen:Loading GPT-2 model from provided /Users/alexander.fioto/Models/Labeled-Seinfeld-Model/pytorch_model.bin.
INFO:aitextgen:Using a custom tokenizer.


In [293]:
text = ai.generate(prompt='ALEX: Tell me about yourself',
            temperature = .7,
            return_as_list = True)

In [294]:
punctuation = ['.', '!', '?']


In [295]:

text = text[0].split('\n\n\n')

In [298]:
text[-1][-1]

'e'

In [303]:
text

['ALEX: Tell me about yourself.',
 'JERRY: What?',
 'GEORGE: Nothing. ',
 'JERRY: I thought you said people.']

In [302]:
if text[-1][-1] not in punctuation:
    text[-1] += '.'

In [304]:
print('\n'.join(text))

ALEX: Tell me about yourself.
JERRY: What?
GEORGE: Nothing. 
JERRY: I thought you said people.


In [390]:
class SeinfeldChatbot():
    def __init__(self, name='Buddy', fp='/Users/alexander.fioto/Models/Labeled-Seinfeld-Model/', temperature = .4):
        self.user_name_title = 'USER: '
        self.chat_dialogue = ''
        self.chat_dialogue_complete = ''
        self.name = name
        self.fp = fp
        self.greeted = False
        self.similarity_scores = None
        self.temperature = temperature
        self.exit_commands = ['bye', 'exit', 'i have to go', 'later', 'gtg', 'stop', 'end', 'done']
        self.positive_responses = ['yes', 'yep', 'sure', 'definitely', 'y']
        self.negative_responses = ['no', 'no thanks', 'nope', 'nah', 'n']
        self.punctuation = ['.', '!', '?']
        self.model = aitextgen(model= fp + "pytorch_model.bin",
                               config = fp + 'config.json', 
                               vocab_file=fp + 'aitextgen-vocab.json',
                               merges_file=fp + 'aitextgen-merges.txt')
        self.transformer = 'en_core_web_lg'
        self.recommender_initialized = False
        print("Model Loaded!")
        
        
    def change_temp(self, temp):
        if temp < 0 or temp > 1:
            raise ValueError('Value must be greater than 0 and less than or equal to 1')
        else:
            self.temperature = temp
        
        
    def greet(self):
        res = input("Welcome to the SeinfeldChatbot! Do you want to chat?")
        if res.lower() not in self.negative_responses:
            res = input("GEORGE: Can I get your name, pal?")
            if res in self.negative_responses:
                print('GEORGE: Fine. Stay anonymous. See if I care.')
            else:
                self.name = res
                self.user_name_title = self.name.upper() + ': '
            self.greeted = True
            self.chat()
        else:
            print('JERRY: OK. Fine. Leave. GO!')
    
    def generate_response(self, text_input):
        if text_input[-1] not in self.punctuation:
            text_input += '.'
        self.chat_dialogue += ' ' + text_input
        text = self.model.generate(prompt = f'{self.user_name_title}' + text_input,
                                            temperature = .4,
                                            return_as_list = True)
        self.split_text_ = text[0].split('\n\n')
        if len(self.split_text_[-1]) < 16:
            self.split_text_.pop()
        final_text = ''.join(self.split_text_)
        self.chat_dialogue_complete += final_text + '\n'
        print(final_text)
        
    
    def chat(self):
        if self.greeted == True:
            print(f'JERRY: What do you want, {self.name}?')
            chat = True
            while chat:
                text_input = input()
                if text_input in self.exit_commands:
                    chat = False
                    print('KRAMER: Who turns down a junior mint?')
                elif 'recommend' in text_input:
                    res = input("JERRY: Did you want an episode recommendation? It might take a minute.")
                    if res.lower() in self.positive_responses:
                        if self.recommender_initialized:
                            self.episode_recommendation()
                        else:
                            self.initialize_recommender()
                    else:
                        print('JERRY: Oh. Well what you said wasn\'t clear.')
                        self.generate_response(text_input)
                    

                else:
                    self.generate_response(text_input)
        else:
            self.greet()

    
    
    #### Episode Recommender #### 
    def initialize_recommender(self, transformer='en_core_web_lg'):
        '''
        This method initializes the recommender engine. You may use other pretrained transformers such as
        en_trf_bertbaseuncased_lg. If you use BERT, you will see more accurate results but it will take longer
        to load!
        '''
        self.nlp_ = spacy.load(transformer)
        self.recommender_initialized = True
        df = pd.read_csv('../data/clean_scripts.csv', index_col=0)
        self.episodes_ = df['SEID'].unique()
        
        ia = imdb.IMDb()
        self.series_ = ia.get_movie('0098904')
        ia.update(self.series_, 'episodes')
        sorted(self.series_['episodes'].keys())
        print('Recommender Initialized!')
        self.update_similarities()
        self.episode_recommendation()
        
        

    def update_similarities(self):
        if not self.recommender_initialized:
            res = input('You need to initialize the recommender system. Would you like to initialize?')
            if res.lower() in self.positive_responses:
                self.initialize_recommender()
            else:
                print('OK. Hope you come back later!')
        
        else:

            if self.chat_dialogue:
                print('''This part might take a minute...

JERRY: You know the message you're sending out to the world with these sweatpants? You're telling the world, 'I give up. I can't compete in normal society. I'm miserable, so I might as well be comfortable.'
                        ''')
                similarity_scores = []
                for episode in self.episodes_:
                    doc1 = self.nlp_(self.chat_dialogue)
                    doc2 = self.nlp_(episode_dialogues[episode])
                    similarity_scores.append((episode, doc1.similarity(doc2)))
                similarity_scores.sort(key=lambda x: x[1], reverse = True)
                self.similarity_scores = similarity_scores
                self.scores_list_ = []
                for i in range(len(scores)):
                    self.scores_list_.append([int(self.similarity_scores[i][0][1:3]), int(self.similarity_scores[i][0][-2:])])
                print('JERRY: Thanks for you patience. That took way too long.')
            else:
                print('KRAMER: It looks like you haven\'t chatted yet. Please chat for a while and come back!')
    
    def episode_recommendation(self):
        if self.similarity_scores:
            for i in range(len(self.scores_list_)):
                try:
                    episode = series['episodes'][self.scores_list_[i][0]][self.scores_list_[i][1]]
                    title = episode['title']
                    plot = episode['plot']
                    res = input(f'JERRY: Based on your chat dialogue, I would recommend you check out Seinfeld Season {self.scores_list_[i][0]}, episode {self.scores_list_[i][1]}, "{title}". Do you want to know the plot?')
                    if res in self.positive_responses:
                        print(plot)
            #         res = input('Do you want a link to the show?')
            #         if res == 
                    res = input('JERRY: Do you want another recommendation?')
                    if res == 'no':
                        print('JERRY: OK.')
                        break
                except:
                    continue
            
        else:
            res = input('ELAINE: You need to get similarity scores first. Want to do grab them?')
            if res in self.positive_responses:
                self.update_similarities()
            else:
                print('GEORGE: Fine. Have it that way.')

        
        
        

In [391]:
bot = SeinfeldChatbot()

INFO:aitextgen:Loading GPT-2 model from provided /Users/alexander.fioto/Models/Labeled-Seinfeld-Model/pytorch_model.bin.
INFO:aitextgen:Using a custom tokenizer.


Model Loaded!


In [392]:
bot.chat()

Welcome to the SeinfeldChatbot! Do you want to chat? Yes
GEORGE: Can I get your name, pal? Dan


JERRY: What do you want, Dan?


 I like coffee.


DAN: I like coffee.
GEORGE: I didn't do anything.
JERRY: Oh, you didn't tell him?


 I want to hear about George and the Yankees.


DAN: I want to hear about George and the Yankees. I'd like to ask you a question. Why, why did you tell my friend Jerry what I


 Ladjfa;l


DAN: Ladjfa;l.
GEORGE: See, that's no good. See, you don't know how to act.



 recommend
JERRY: Did you want an episode recommendation? It might take a minute. yes


2020-11-08 10:09:40,108 INFO [imdbpy.parser.http.piculet] /Users/alexander.fioto/opt/anaconda3/lib/python3.7/site-packages/imdb/parser/http/piculet.py:681: using lxml html builder
INFO:imdbpy.parser.http.piculet:using lxml html builder
2020-11-08 10:09:40,421 INFO [imdbpy.parser.http.piculet] /Users/alexander.fioto/opt/anaconda3/lib/python3.7/site-packages/imdb/parser/http/piculet.py:681: using lxml html builder
INFO:imdbpy.parser.http.piculet:using lxml html builder
2020-11-08 10:09:41,344 INFO [imdbpy.parser.http.piculet] /Users/alexander.fioto/opt/anaconda3/lib/python3.7/site-packages/imdb/parser/http/piculet.py:681: using lxml html builder
INFO:imdbpy.parser.http.piculet:using lxml html builder
2020-11-08 10:09:41,798 INFO [imdbpy.parser.http.piculet] /Users/alexander.fioto/opt/anaconda3/lib/python3.7/site-packages/imdb/parser/http/piculet.py:681: using lxml html builder
INFO:imdbpy.parser.http.piculet:using lxml html builder
2020-11-08 10:09:42,268 INFO [imdbpy.parser.http.piculet

Recommender Initialized!
This part might take a minute...

JERRY: You know the message you're sending out to the world with these sweatpants? You're telling the world, 'I give up. I can't compete in normal society. I'm miserable, so I might as well be comfortable.'
                        
JERRY: Thanks for you patience. That took way too long.


JERRY: Based on your chat dialogue, I would recommend you check out Seinfeld Season 5, episode 11, "The Conversion". Do you want to know the plot? yes



    George changes his religion to Latvian Orthodox to keep his girlfriend happy; Jerry becomes curious as to why his girlfriend needs fungus cream.    


JERRY: Do you want another recommendation? yes
JERRY: Based on your chat dialogue, I would recommend you check out Seinfeld Season 8, episode 17, "The English Patient". Do you want to know the plot? yes



    Elaine is dragged to see the movie "The English Patient (1996) " on multiple occasions, even though she absolutely hates the film.    


JERRY: Do you want another recommendation? no


JERRY: OK.


 stop


KRAMER: Who turns down a junior mint?


In [376]:
bot.chat()

JERRY: What do you want, Nan?


 I want to see you.


NAN: I want to see you.
ELAINE: So what are you doing now, Georgie?
GEORGE: I'm uh... writing


 Why are you writing?


NAN: Why are you writing?
ELAINE: I'm afraid it's your urine, Elaine. You tested positive for opium


 Opium? I love opium.


NAN: Opium? I love opium. Let me take another one, please? I'll call the doctor right now, I'll take a


 This is goofy.


NAN: This is goofy.
JERRY: Oh? What? You're crazy.
LEO: I'll tell you how crazy


 recommend
JERRY: Did you want an episode recommendation? It might take a minute. yes
JERRY: Based on your chat dialogue, I would recommend you check out Seinfeld Season 7, episode 19, "The Calzone". Do you want to know the plot? no
JERRY: Do you want another recommendation? no


JERRY: OK.


 stop


KRAMER: Who turns down a junior mint?


In [198]:
def episode_rec_chat(scores):
    scores_list = []
    for i in range(len(scores)):
        scores_list.append([int(scores[i][0][1:3]), int(scores[i][0][-2:])])
    for i in range(len(scores_list)):
        episode = series['episodes'][scores_list[i][0]][scores_list[i][1]]
        title = episode['title']
        plot = episode['plot']
        res = input(f'Based on your chat dialogue, I would recommend you check out Seinfeld Season {scores_list[0][0]}, episode {scores_list[0][1]}, "{title}". Do you want to know the plot?')
        if res == 'yes':
            print(plot)
#         res = input('Do you want a link to the show?')
#         if res == 
        res = input('Do you want another recommendation?')
        if res == 'no':
            print('OK. Thanks for chatting')
            break
    return scores_list


In [200]:
scores_list = episode_rec_chat(scores)

Based on your chat dialogue, I would recommend you check out Seinfeld Season 7, episode 1, "The Engagement". Do you want to know the plot? no
Do you want another recommendation? yes
Based on your chat dialogue, I would recommend you check out Seinfeld Season 7, episode 1, "The Deal". Do you want to know the plot? no
Do you want another recommendation? ad
Based on your chat dialogue, I would recommend you check out Seinfeld Season 7, episode 1, "The Barber". Do you want to know the plot? yes



    Jerry regrets giving his longtime barber the heave-ho after he gets a ridiculous-looking haircut. George gets a job interview but the boss leaves him with a very cryptic ending. Kramer begs Elaine to let him take part in her charity bachelor auction.    


Do you want another recommendation? no


In [190]:
series['episodes'][scores_list[0][0]][scores_list[0][1]]['title']

'The Engagement'

In [188]:
scores_list[0][0]

7

# Scractch work for IMDb API

In [111]:
import imdb
ia = imdb.IMDb()

2020-11-07 17:15:12,134 INFO [imdbpy.parser.http.piculet] /Users/alexander.fioto/opt/anaconda3/lib/python3.7/site-packages/imdb/parser/http/piculet.py:262: using lxml
INFO:imdbpy.parser.http.piculet:using lxml


In [114]:
# Seinfeld is 0098904
series = ia.get_movie('0098904')

2020-11-07 17:17:16,130 INFO [imdbpy.parser.http.piculet] /Users/alexander.fioto/opt/anaconda3/lib/python3.7/site-packages/imdb/parser/http/piculet.py:681: using lxml html builder
INFO:imdbpy.parser.http.piculet:using lxml html builder
2020-11-07 17:17:16,464 INFO [imdbpy.parser.http.piculet] /Users/alexander.fioto/opt/anaconda3/lib/python3.7/site-packages/imdb/parser/http/piculet.py:681: using lxml html builder
INFO:imdbpy.parser.http.piculet:using lxml html builder


In [120]:

ia.update(series, 'episodes')
sorted(series['episodes'].keys())


2020-11-07 17:19:24,327 INFO [imdbpy.parser.http.piculet] /Users/alexander.fioto/opt/anaconda3/lib/python3.7/site-packages/imdb/parser/http/piculet.py:681: using lxml html builder
INFO:imdbpy.parser.http.piculet:using lxml html builder
2020-11-07 17:19:24,866 INFO [imdbpy.parser.http.piculet] /Users/alexander.fioto/opt/anaconda3/lib/python3.7/site-packages/imdb/parser/http/piculet.py:681: using lxml html builder
INFO:imdbpy.parser.http.piculet:using lxml html builder
2020-11-07 17:19:25,582 INFO [imdbpy.parser.http.piculet] /Users/alexander.fioto/opt/anaconda3/lib/python3.7/site-packages/imdb/parser/http/piculet.py:681: using lxml html builder
INFO:imdbpy.parser.http.piculet:using lxml html builder
2020-11-07 17:19:26,236 INFO [imdbpy.parser.http.piculet] /Users/alexander.fioto/opt/anaconda3/lib/python3.7/site-packages/imdb/parser/http/piculet.py:681: using lxml html builder
INFO:imdbpy.parser.http.piculet:using lxml html builder
2020-11-07 17:19:27,057 INFO [imdbpy.parser.http.piculet

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [135]:
episode = series['episodes'][7][5]

In [178]:
episode['title']

'The Hot Tub'

In [182]:
series[]

<Movie id:0098904[http] title:_"Seinfeld" (1989)_>