In [1]:
from aitextgen.TokenDataset import TokenDataset
from aitextgen.tokenizers import train_tokenizer
from aitextgen.utils import GPT2ConfigCPU
from aitextgen import aitextgen

import pickle
import spacy
import pandas as pd

In [62]:
# Loading a dictionary with all of the episode names and dialogues
with open('../data/episode_dialogues.pkl', 'rb') as f:
    episode_dialogues = pickle.load(f)

In [3]:
fp = '/Users/alexander.fioto/Models/Labeled-Seinfeld-Model/' 

In [4]:
ai = aitextgen(model= fp + "pytorch_model.bin",
               config = fp + 'config.json', 
               vocab_file=fp + 'aitextgen-vocab.json',
               merges_file=fp + 'aitextgen-merges.txt')

INFO:aitextgen:Loading GPT-2 model from provided /Users/alexander.fioto/Models/Labeled-Seinfeld-Model/pytorch_model.bin.
INFO:aitextgen:Using a custom tokenizer.


In [5]:
ai.generate(prompt='ALEX: tell me about yourself',
            temperature = .01)

[1mALEX: tell me about yourself[0m.


JERRY: What?


GEORGE: You're going to get me a sample of Jerry


In [209]:
class SeinfeldChatbot():
    def __init__(self, name=None, fp='/Users/alexander.fioto/Models/Labeled-Seinfeld-Model/', temperature = .4):
        self.user_name_title = name
        self.chat_dialogue = ''
        self.name = name
        self.fp = fp
        self.similarity_scores = None
        self.temperature = temperature
        self.exit_commands = ['bye', 'exit', 'i have to go', 'later', 'gtg', 'stop', 'end', 'done']
        self.positive_commands = ['yes', 'yep', 'sure', 'definitely']
        self.punctuation = ['.', '!', '?']
        self.model = aitextgen(model= fp + "pytorch_model.bin",
                               config = fp + 'config.json', 
                               vocab_file=fp + 'aitextgen-vocab.json',
                               merges_file=fp + 'aitextgen-merges.txt')
        self.recommender_initialized = False
        print("Model Loaded!")
        
        
    def change_temp(self, temp):
        if temp < 0 or temp > 1:
            raise ValueError('Value must be greater than 0 and less than or equal to 1')
        else:
            self.temperature = temp
        
        
    def get_name(self):
        self.name = input("GEORGE: What\'s your name, pal?")
        self.user_name_title = self.name.upper() + ': '
    
    def chat(self):
        if not self.name:
            self.get_name()
            print(f'JERRY: What do you want, {self.name}?')
            chat = True
            while chat:
                text_input = input()
                if text_input in self.exit_commands:
                    chat = False
                    print('KRAMER: Who turns down a junior mint?')

                else:
                    if text_input[-1] not in self.punctuation:
                        text_input += '.'
                    self.chat_dialogue += ' ' + text_input
                    self.model.generate(prompt = f'{self.user_name_title}' + text_input,
                                        temperature = .4)

        else:
            print(f'JERRY: Oh, I see you\'re back again, {self.name}. To what do we owe the pleasure?')
            chat = True
            while chat:
                text_input = input()
                if text_input in self.exit_commands:
                    chat = False
                    print('KRAMER: Who turns down a junior mint?')

                else:
                    if text_input[-1] not in self.punctuation:
                        text_input += '.'
                    self.chat_dialogue += ' ' + text_input

                    self.model.generate(prompt = f'{self.user_name_title}' + text_input,
                                        temperature = .4)
    
    
    #### Episode Recommender #### 
    def initialize_recommender(self, model = 'en_core_web_lg'):
        '''
        This method initializes the recommender engine. You may use other pretrained transformers such as
        en_trf_bertbaseuncased_lg.
        '''
        self.nlp_ = spacy.load(model)
        self.recommender_initialized = True
        df = pd.read_csv('../data/clean_scripts.csv', index_col=0)
        self.episodes_ = df['SEID'].unique()
        
        ia = imdb.IMDb()
        self.series_ = ia.get_movie('0098904')
        ia.update(self.series_, 'episodes')
        sorted(self.series_['episodes'].keys())
        print('Recommender Initialized!')
        
        

    def get_similarities(self):
        if not self.recommender_initialized:
            res = input('You need to initialize the recommender system. Would you like to initialize? [y/n]')
            if res.lower() == 'y':
                self.initialize_recommender()
            elif res.lower() == 'n':
                print('OK. Hope you come back later!')
            else:
                raise InputError('Invalid Entry. Please input y or n')
        
        
        
        
        else:

            if self.chat_dialogue:
                print('''This might take a minute...

JERRY: You know the message you're sending out to the world with these sweatpants? You're telling the world, 'I give up. I can't compete in normal society. I'm miserable, so I might as well be comfortable.'
                        ''')
                similarity_scores = []
                for episode in self.episodes_:
                    doc1 = self.nlp_(self.chat_dialogue)
                    doc2 = self.nlp_(episode_dialogues[episode])
                    similarity_scores.append((episode, doc1.similarity(doc2)))
                similarity_scores.sort(key=lambda x: x[1], reverse = True)
                self.similarity_scores = similarity_scores
                self.scores_list_ = []
                for i in range(len(scores)):
                    self.scores_list_.append([int(self.similarity_scores[i][0][1:3]), int(self.similarity_scores[i][0][-2:])])
                print('Thanks for you patience. That took way too long.')
            else:
                print('It looks like you haven\'t chatted yet. Please chat for a while and come back!')
    
    def episode_recommendation(self):
        if self.similarity_scores:
            scores_list = []
            for i in range(len(scores)):
                scores_list.append([int(self.similarity_scores[i][0][1:3]), int(self.similarity_scores[i][0][-2:])])
            for i in range(len(scores_list)):
                episode = series['episodes'][scores_list[i][0]][scores_list[i][1]]
                title = episode['title']
                plot = episode['plot']
                res = input(f'Based on your chat dialogue, I would recommend you check out Seinfeld Season {scores_list[0][0]}, episode {scores_list[0][1]}, "{title}". Do you want to know the plot?')
                if res == 'yes':
                    print(plot)
        #         res = input('Do you want a link to the show?')
        #         if res == 
                res = input('Do you want another recommendation?')
                if res == 'no':
                    print('OK. Thanks for chatting')
                    break
            return scores_list

        
        
        

In [210]:
bot = SeinfeldChatbot()

INFO:aitextgen:Loading GPT-2 model from provided /Users/alexander.fioto/Models/Labeled-Seinfeld-Model/pytorch_model.bin.
INFO:aitextgen:Using a custom tokenizer.


Model Loaded!


In [211]:
bot.initialize_recommender()

2020-11-07 18:27:31,274 INFO [imdbpy.parser.http.piculet] /Users/alexander.fioto/opt/anaconda3/lib/python3.7/site-packages/imdb/parser/http/piculet.py:681: using lxml html builder
INFO:imdbpy.parser.http.piculet:using lxml html builder
2020-11-07 18:27:31,529 INFO [imdbpy.parser.http.piculet] /Users/alexander.fioto/opt/anaconda3/lib/python3.7/site-packages/imdb/parser/http/piculet.py:681: using lxml html builder
INFO:imdbpy.parser.http.piculet:using lxml html builder
2020-11-07 18:27:31,795 INFO [imdbpy.parser.http.piculet] /Users/alexander.fioto/opt/anaconda3/lib/python3.7/site-packages/imdb/parser/http/piculet.py:681: using lxml html builder
INFO:imdbpy.parser.http.piculet:using lxml html builder
2020-11-07 18:27:32,060 INFO [imdbpy.parser.http.piculet] /Users/alexander.fioto/opt/anaconda3/lib/python3.7/site-packages/imdb/parser/http/piculet.py:681: using lxml html builder
INFO:imdbpy.parser.http.piculet:using lxml html builder
2020-11-07 18:27:32,352 INFO [imdbpy.parser.http.piculet

Recommender Initialized!


In [212]:
bot.chat()

GEORGE: What's your name, pal? Alex


JERRY: What do you want, Alex?


 I want to talk to you


[1mALEX: I want to talk to you.[0m


JERRY:  All right. We'll see you at the race, ok?





 What race are we going to?


[1mALEX: What race are we going to?[0m


GEORGE: You're breaking up, you're going to break up with me!





 I will break up with you.


[1mALEX: I will break up with you.[0m


JERRY: The last thing this guy's qualified to give a tour of is reality


 This is silly.


[1mALEX: This is silly.[0m


GEORGE:  I'll be in my office if you need me.


CLEANING D


 Why are you going there?


[1mALEX: Why are you going there?[0m


JERRY: I'm not invited.


MIKE:  You're soap!




 You are soap


[1mALEX: You are soap.[0m


JERRY: Oh, all the way.


GEORGE: You are aware that I'm being char


 I love coffee


[1mALEX: I love coffee.[0m


GEORGE: Yeah.


JERRY: So I guess your hearing goes in and out huh?




 stop


KRAMER: Who turns down a junior mint?


In [213]:
bot.get_similarities()

This might take a minute...

JERRY: You know the message you're sending out to the world with these sweatpants? You're telling the world, 'I give up. I can't compete in normal society. I'm miserable, so I might as well be comfortable.'
                        
Thanks for you patience. That took way too long.


In [214]:
bot.episode_recommendation()

Based on your chat dialogue, I would recommend you check out Seinfeld Season 1, episode 3, "The Robbery". Do you want to know the plot? yes



    After Jerry's apartment is robbed, Jerry starts to look for other apartments. But Jerry and George both want the same apartment, and Elaine wants the apartment of whoever loses out.    


Do you want another recommendation? no


OK. Thanks for chatting


[[1, 3],
 [4, 24],
 [2, 7],
 [5, 6],
 [8, 21],
 [7, 19],
 [6, 11],
 [5, 13],
 [5, 18],
 [2, 9],
 [7, 1],
 [7, 24],
 [1, 4],
 [5, 1],
 [3, 6],
 [6, 9],
 [7, 9],
 [7, 20],
 [4, 5],
 [3, 3],
 [7, 16],
 [3, 12],
 [7, 6],
 [7, 11],
 [5, 15],
 [8, 5],
 [4, 3],
 [3, 7],
 [5, 11],
 [2, 1],
 [4, 23],
 [3, 18],
 [6, 1],
 [7, 3],
 [5, 8],
 [3, 4],
 [2, 11],
 [3, 15],
 [3, 2],
 [4, 4],
 [2, 10],
 [9, 2],
 [4, 10],
 [6, 20],
 [9, 3],
 [4, 2],
 [5, 2],
 [8, 17],
 [3, 17],
 [7, 2],
 [7, 23],
 [2, 4],
 [8, 22],
 [5, 9],
 [3, 10],
 [6, 21],
 [8, 18],
 [2, 2],
 [5, 16],
 [3, 22],
 [8, 20],
 [6, 19],
 [5, 10],
 [6, 13],
 [2, 3],
 [6, 16],
 [9, 15],
 [9, 18],
 [7, 8],
 [4, 17],
 [6, 24],
 [3, 23],
 [2, 5],
 [9, 11],
 [5, 21],
 [9, 8],
 [6, 10],
 [9, 13],
 [1, 1],
 [6, 18],
 [2, 6],
 [9, 20],
 [6, 22],
 [8, 3],
 [7, 21],
 [4, 12],
 [8, 6],
 [1, 2],
 [9, 23],
 [3, 14],
 [5, 3],
 [3, 13],
 [9, 5],
 [5, 4],
 [4, 13],
 [6, 7],
 [7, 15],
 [8, 7],
 [8, 9],
 [6, 12],
 [3, 19],
 [9, 14],
 [5, 17],
 [4, 18],
 [8, 1

In [110]:
bot.recommend_episode()

This might take a minute...

JERRY: You know the message you're sending out to the world with these sweatpants? You're telling the world, 'I give up. I can't compete in normal society. I'm miserable, so I might as well be comfortable.'
                        
[('S07E01', 0.9232348912109218), ('S02E09', 0.9226422593990806), ('S05E08', 0.9218102442975412)]


In [140]:
scores = bot.similarity_scores_

In [145]:
episode_rec_chat(scores)

How many episodes do you want me to recommend? 3


S07E01
S02E09
S05E08


In [198]:
def episode_rec_chat(scores):
    scores_list = []
    for i in range(len(scores)):
        scores_list.append([int(scores[i][0][1:3]), int(scores[i][0][-2:])])
    for i in range(len(scores_list)):
        episode = series['episodes'][scores_list[i][0]][scores_list[i][1]]
        title = episode['title']
        plot = episode['plot']
        res = input(f'Based on your chat dialogue, I would recommend you check out Seinfeld Season {scores_list[0][0]}, episode {scores_list[0][1]}, "{title}". Do you want to know the plot?')
        if res == 'yes':
            print(plot)
#         res = input('Do you want a link to the show?')
#         if res == 
        res = input('Do you want another recommendation?')
        if res == 'no':
            print('OK. Thanks for chatting')
            break
    return scores_list


In [200]:
scores_list = episode_rec_chat(scores)

Based on your chat dialogue, I would recommend you check out Seinfeld Season 7, episode 1, "The Engagement". Do you want to know the plot? no
Do you want another recommendation? yes
Based on your chat dialogue, I would recommend you check out Seinfeld Season 7, episode 1, "The Deal". Do you want to know the plot? no
Do you want another recommendation? ad
Based on your chat dialogue, I would recommend you check out Seinfeld Season 7, episode 1, "The Barber". Do you want to know the plot? yes



    Jerry regrets giving his longtime barber the heave-ho after he gets a ridiculous-looking haircut. George gets a job interview but the boss leaves him with a very cryptic ending. Kramer begs Elaine to let him take part in her charity bachelor auction.    


Do you want another recommendation? no


In [190]:
series['episodes'][scores_list[0][0]][scores_list[0][1]]['title']

'The Engagement'

In [188]:
scores_list[0][0]

7

In [111]:
import imdb
ia = imdb.IMDb()

2020-11-07 17:15:12,134 INFO [imdbpy.parser.http.piculet] /Users/alexander.fioto/opt/anaconda3/lib/python3.7/site-packages/imdb/parser/http/piculet.py:262: using lxml
INFO:imdbpy.parser.http.piculet:using lxml


In [114]:
series = ia.get_movie('0098904')

2020-11-07 17:17:16,130 INFO [imdbpy.parser.http.piculet] /Users/alexander.fioto/opt/anaconda3/lib/python3.7/site-packages/imdb/parser/http/piculet.py:681: using lxml html builder
INFO:imdbpy.parser.http.piculet:using lxml html builder
2020-11-07 17:17:16,464 INFO [imdbpy.parser.http.piculet] /Users/alexander.fioto/opt/anaconda3/lib/python3.7/site-packages/imdb/parser/http/piculet.py:681: using lxml html builder
INFO:imdbpy.parser.http.piculet:using lxml html builder


In [120]:
ia.update(series, 'episodes')
sorted(series['episodes'].keys())


2020-11-07 17:19:24,327 INFO [imdbpy.parser.http.piculet] /Users/alexander.fioto/opt/anaconda3/lib/python3.7/site-packages/imdb/parser/http/piculet.py:681: using lxml html builder
INFO:imdbpy.parser.http.piculet:using lxml html builder
2020-11-07 17:19:24,866 INFO [imdbpy.parser.http.piculet] /Users/alexander.fioto/opt/anaconda3/lib/python3.7/site-packages/imdb/parser/http/piculet.py:681: using lxml html builder
INFO:imdbpy.parser.http.piculet:using lxml html builder
2020-11-07 17:19:25,582 INFO [imdbpy.parser.http.piculet] /Users/alexander.fioto/opt/anaconda3/lib/python3.7/site-packages/imdb/parser/http/piculet.py:681: using lxml html builder
INFO:imdbpy.parser.http.piculet:using lxml html builder
2020-11-07 17:19:26,236 INFO [imdbpy.parser.http.piculet] /Users/alexander.fioto/opt/anaconda3/lib/python3.7/site-packages/imdb/parser/http/piculet.py:681: using lxml html builder
INFO:imdbpy.parser.http.piculet:using lxml html builder
2020-11-07 17:19:27,057 INFO [imdbpy.parser.http.piculet

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [135]:
episode = series['episodes'][7][5]

In [178]:
episode['title']

'The Hot Tub'

In [182]:
series[]

<Movie id:0098904[http] title:_"Seinfeld" (1989)_>