In [1]:
import pandas as pd
import numpy as np
import nltk
import networkx as nx
import re
import string
from nltk import word_tokenize
from nltk.corpus import stopwords
from sklearn.decomposition import NMF
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.stem import WordNetLemmatizer

In [2]:
df = pd.read_csv('the-office-lines (drive) - scripts.csv')
df_episode_full = pd.read_csv('The Office episode names2.csv')

In [3]:
df['line_text'] = df['line_text'].apply(lambda x: re.sub(r'\[.*?\]','', x))

In [4]:
df

Unnamed: 0,id,season,episode,scene,line_text,speaker,deleted
0,1,1,1,1,All right Jim. Your quarterlies look very good...,Michael,False
1,2,1,1,1,"Oh, I told you. I couldn't close it. So...",Jim,False
2,3,1,1,1,So you've come to the master for guidance? Is ...,Michael,False
3,4,1,1,1,"Actually, you called me in here, but yeah.",Jim,False
4,5,1,1,1,"All right. Well, let me show you how it's done.",Michael,False
5,6,1,1,2,"Yes, I'd like to speak to your office manager...",Michael,False
6,7,1,1,3,"I've, uh, I've been at Dunder Mifflin for 12 y...",Michael,False
7,8,1,1,3,Well. I don't know.,Pam,False
8,9,1,1,3,"If you think she's cute now, you should have s...",Michael,False
9,10,1,1,3,What?,Pam,False


In [5]:
df_episode_full['title'] = df_episode_full['title'].str[1:-1]

df_episode_list = df_episode_full.drop(columns = 'date')

In [6]:
df = df[df['deleted']==False]
df = df.drop(columns = 'deleted')
df = df.reset_index(drop=True)
df['id'] = df.reset_index()

### Preprocess the text

In [7]:
# Text preprocessing steps - remove numbers, captial letters and punctuation
alphanumeric = lambda x: re.sub('\w*\d\w*', '', x)
special_char = lambda x: re.sub('�', '', x)
punc_lower = lambda x: re.sub('[%s]' % re.escape(string.punctuation), '', x.lower())

df['line_text'] = df.line_text.map(alphanumeric).map(special_char).map(punc_lower)

In [8]:
#lower case character names
lower = lambda x: x.lower()

df['speaker'] = df.speaker.map(lower)

### Creating documents by episode

In [9]:
df_episode = pd.DataFrame(df.groupby(['season','episode'])['line_text'].apply(' '.join).reset_index())

df_episode.shape

(186, 3)

### Creating documents by scene

In [10]:
df_scene = pd.DataFrame(df.groupby(['season','episode','scene'])['line_text'].apply(' '.join).reset_index())

df_scene.shape

(8849, 4)

### Creating documents by character and season (appearing in at least 4 seasons)

In [11]:
df_char_season = pd.DataFrame(df.groupby(['season','speaker'])['line_text'].apply(' '.join).reset_index())

#4 seasons
df_char_season2 = df_char_season[df_char_season['speaker'].isin(df_char_season['speaker'].value_counts()[df_char_season['speaker'].value_counts()>3].index)]

In [12]:
print(df_char_season.shape)
print(df_char_season2.shape)

(1139, 3)
(293, 3)


### Creating documents by speaker

In [13]:
characters = ['andy',
 'angela',
 'darryl',
 'dwight',
 'jan',
 'jim',
 'kelly',
 'kevin',
 'michael',
 'oscar',
 'pam',
 'phyllis',
 'ryan',
 'toby']

In [14]:
df_speaker = pd.DataFrame(df.groupby(['speaker'])['line_text'].apply(' '.join).reset_index())

df_speaker2 = df_speaker[df_speaker['speaker'].isin(characters)]

In [15]:
print(df_speaker.shape)
print(df_speaker2.shape)

(759, 2)
(14, 2)


### Creating documents by character and episode

In [16]:
df_char_episode = pd.DataFrame(df.groupby(['season','episode','speaker'])['line_text'].apply(' '.join).reset_index())

In [17]:
df_char_episode = df_char_episode.merge(df_episode_list, on = ['season','episode'])

In [18]:
df_char_episode.head()

Unnamed: 0,season,episode,speaker,line_text,title
0,1,1,angela,i bet its gonna be me probably gonna be me,Pilot
1,1,1,dwight,shall i play for you pa rum pump um pum i ha...,Pilot
2,1,1,jan,just before lunch that would be great alright...,Pilot
3,1,1,jim,oh i told you i couldnt close it so actually y...,Pilot
4,1,1,kevin,yeah itll be you,Pilot


In [19]:
df_char_episode.shape

(4044, 5)

In [20]:
# if line text in that episode is over certain number of characters
df_char_episode2 = df_char_episode[df_char_episode['line_text'].str.len()>119]

In [21]:
df_char_episode2.head()

Unnamed: 0,season,episode,speaker,line_text,title
1,1,1,dwight,shall i play for you pa rum pump um pum i ha...,Pilot
2,1,1,jan,just before lunch that would be great alright...,Pilot
3,1,1,jim,oh i told you i couldnt close it so actually y...,Pilot
6,1,1,michael,all right jim your quarterlies look very good ...,Pilot
7,1,1,oscar,you guys better update your resumes just like ...,Pilot


In [22]:
df_titles = df_char_episode2.title

In [23]:
speakers = df_char_episode2.speaker

In [24]:
seasons = df_char_episode2['season']

In [25]:
# # if speaker appears in at least4 seasons
# df_char_episode2 = df_char_episode[df_char_episode['speaker'].isin(df_char_episode['speaker'].value_counts()[df_char_episode['speaker'].value_counts()>3].index)]

# Stop words

In [26]:
#Create my own stop words list
stoplist = set(nltk.corpus.stopwords.words('english'))



# add words that aren't in the NLTK stopwords list

new_stopwords = ["isnt", "youve", "shant", "youre", "wont",  "wasnt", "mightnt", "mustnt", "couldnt",
                 "wouldnt", "hadnt", "shouldnt",  "neednt","werent", "havent","thatll", "its",  
                 "shouldve", "arent", "hasnt", "youll","doesnt", "didnt",  "dont", "youd", "shes",
                 'im','ive','hes','na','da','lets','whoa','alright','hello','hi','wanna','ah','also','ask',
                'three','theyre','name','id','place', 'everybody', 'ta', 'room','huh','blah','done','wan',
                'seen', 'coming', 'stuff', 'every','robert','uhhuh','took','try','saying','looking','made',
                 'four', 'nothing','mom','guess','saw','use','later', 'sometimes','yet','meant',
                 'gabe','erin','creed','senator','malone','meredith','deangelo','someone','cooky','nope',
                'fine', 'point', 'gun', 'head', 'hold', 'still', 'open', 'list',
                 'stop','cool', 'scene', 'question','five','minute','week']
#                  'gotta', 'id', 'getting','theyre','huh','gets']
                 
# new_stopwords = ["isnt", "youve", "shant", "youre", "wont",  "wasnt", "mightnt", "mustnt", "couldnt",
#                  "wouldnt", "hadnt", "shouldnt",  "neednt","werent", "havent","thatll", "its",  
#                  "shouldve", "arent", "hasnt", "youll","doesnt", "didnt",  "dont", "youd", "shes",'im',
#                  'oh','uh', 'um', 'think', 'thats','okay', 'know', 'yeah', 'right', 'like', 'well', 
#                  'go', 'hey', 'ok', 'going', 'get', 'good', 'really', 'want', 'one','gonna','hes', 'would','see',
#                  'yes','could','cant','come','got','say','lets','back', 'look','alright','guys','back', 'guys',  
#                  'great', 'man','time','whoa','take','pum','ill','na','wait','us','said','da','make','ive','theres',
#                  'whats','need','sure','lot','nope','way','yall','tell','theyre',
#                  'thank', 'little', 'people', 'thing', 'something', 'two', 'let','three','even','thought']


char_stopwords = []
# char_stopwords = ['michael', 'jim', 'dwight', 'pam', 'ryan', 'andy', 'erin','robert', 'darryl', 
# 'meredith', 'jessica', 'phyllis', 'deangelo','deangelos','gabe', 'jan', 'josh','karen','roy',
# 'toby','holly','hollys','kevin','angela','kelly','mike','oscar']


new_stopwords_list = stoplist.union(new_stopwords) 
new_stopwords_list = new_stopwords_list.union(char_stopwords) 

# print(new_stopwords_list)
len(new_stopwords_list)

276

# TFIDF

In [27]:
topic_count = 9 #9 is good

In [28]:
class LemmaTokenizer(object):
    def __init__(self):
        self.wnl = WordNetLemmatizer()
    def __call__(self, articles):
        return [self.wnl.lemmatize(t) for t in word_tokenize(articles)]

In [29]:
cv_tfidf = TfidfVectorizer(tokenizer=LemmaTokenizer(), max_df=.12911, min_df=12, stop_words=new_stopwords_list)

X_tfidf = cv_tfidf.fit_transform(df_char_episode2.line_text).toarray()

df3 = pd.DataFrame(X_tfidf, columns= cv_tfidf.get_feature_names())
df3.shape

  'stop_words.' % sorted(inconsistent))


(2694, 2088)

In [30]:
#TFIDF
nmf_model = NMF(topic_count)
doc_topic = nmf_model.fit_transform(X_tfidf) #enter model here

topic_word = pd.DataFrame(nmf_model.components_.round(3),
             index = [x for x in range(topic_count)],
             columns = cv_tfidf.get_feature_names())
# topic_word

In [31]:
def display_topics(model, feature_names, no_top_words, topic_names=None):
    for ix, topic in enumerate(model.components_):
        if not topic_names or not topic_names[ix]:
            print("\nTopic ", ix)
        else:
            print("\nTopic: '",topic_names[ix],"'")
        print(", ".join([feature_names[i]
                        for i in topic.argsort()[:-no_top_words - 1:-1]]))

display_topics(nmf_model, cv_tfidf.get_feature_names(), 10)


Topic  0
wow, night, last, cause, pretty, darryl, car, around, home, keep

Topic  1
party, christmas, phyllis, committee, planning, santa, merry, birthday, cake, gift

Topic  2
baby, cute, daddy, check, picture, shower, kind, hospital, night, child

Topic  3
paper, company, dunder, mifflin, business, client, sale, david, manager, branch

Topic  4
kevin, hot, toby, cupcake, cookie, birthday, inappropriate, hand, oscar, pie

Topic  5
ryan, kelly, business, howard, temp, married, awesome, message, kapoor, fire

Topic  6
mr, scott, dream, sir, true, schrute, relationship, around, halpert, excuse

Topic  7
angela, oscar, gay, wedding, phyllis, wrong, husband, angelas, always, cat

Topic  8
jan, friend, woman, holly, kid, relationship, might, york, excuse, bos


# Count Vectorizer

In [32]:
# topic_count = 15 #12 is better #13 was good. # 14 was hazy

In [33]:
# from sklearn.feature_extraction.text import CountVectorizer

# cv1 = CountVectorizer(max_df=.12, min_df=15, stop_words=new_stopwords_list)

# X = cv1.fit_transform(df_char_episode2.line_text).toarray()

# df2 = pd.DataFrame(X, columns= cv1.get_feature_names())

In [34]:
# nmf_model = NMF(topic_count)
# doc_topic = nmf_model.fit_transform(X) #enter model here

# topic_word = pd.DataFrame(nmf_model.components_.round(3),
#              index = [x for x in range(topic_count)],
#              columns = cv1.get_feature_names())
# # topic_word

In [35]:
# def display_topics(model, feature_names, no_top_words, topic_names=None):
#     for ix, topic in enumerate(model.components_):
#         if not topic_names or not topic_names[ix]:
#             print("\nTopic ", ix)
#         else:
#             print("\nTopic: '",topic_names[ix],"'")
#         print(", ".join([feature_names[i]
#                         for i in topic.argsort()[:-no_top_words - 1:-1]]))

# display_topics(nmf_model, cv1.get_feature_names(), 10)

# START HERE - Squishing the episodes and getting topic distribution

### Seeing the topics

### Averaging groupby by title

In [36]:
doc_title = pd.DataFrame(doc_topic.round(2),
             index = df_titles,
             columns = [x for x in range(topic_count)])
# doc_title

doc_title.idxmax()

0               Jury Duty
1      Moroccan Christmas
2               New Leads
3                   Broke
4          Suit Warehouse
5          Branch Closing
6            Scott's Tots
7          Gay Witch Hunt
8    Women's Appreciation
dtype: object

In [37]:
grouped_title = doc_title.groupby('title')[[x for x in range(topic_count)]].mean()

# grouped_title.head()

grouped_title_scaled = grouped_title.div(grouped_title.sum(axis=1), axis=0)

grouped_title_scaled.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
A Benihana Christmas,0.158537,0.533537,0.064024,0.021341,0.02439,0.094512,0.012195,0.060976,0.030488
A.A.R.M.,0.34188,0.008547,0.08547,0.145299,0.200855,0.008547,0.064103,0.089744,0.055556
After Hours,0.459302,0.005814,0.180233,0.034884,0.011628,0.046512,0.017442,0.122093,0.122093
Andy's Ancestry,0.544554,0.049505,0.089109,0.138614,0.049505,0.029703,0.009901,0.059406,0.029703
Andy's Play,0.448276,0.017241,0.12931,0.017241,0.008621,0.008621,0.189655,0.103448,0.077586


In [38]:
df_episode_list2 = df_episode_list.drop(columns = 'episode')

df_episode_list2 = df_episode_list2.set_index('title')

season_title_df = grouped_title_scaled.join(df_episode_list2).reset_index()

season_title_df = season_title_df.set_index(['season','title'])

season_title_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1,2,3,4,5,6,7,8
season,title,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
3,A Benihana Christmas,0.158537,0.533537,0.064024,0.021341,0.02439,0.094512,0.012195,0.060976,0.030488
9,A.A.R.M.,0.34188,0.008547,0.08547,0.145299,0.200855,0.008547,0.064103,0.089744,0.055556
8,After Hours,0.459302,0.005814,0.180233,0.034884,0.011628,0.046512,0.017442,0.122093,0.122093
9,Andy's Ancestry,0.544554,0.049505,0.089109,0.138614,0.049505,0.029703,0.009901,0.059406,0.029703
7,Andy's Play,0.448276,0.017241,0.12931,0.017241,0.008621,0.008621,0.189655,0.103448,0.077586


In [39]:
##Inside function now
# title_search = "A Benihana Christmas"

# a = np.array(grouped_title_scaled.loc[title_search]).reshape(1,-1)
# # a.shape
# b = np.array(grouped_title_scaled)
# # b.shape
# cos = cosine_similarity(b,a)
# grouped_title_scaled.iloc[cos.argsort(axis=0)[-2]]

def similar_episode(epi):
    title_search = epi
    a = np.array(grouped_title_scaled.loc[title_search]).reshape(1,-1)
    b = np.array(grouped_title_scaled)

    cos = cosine_similarity(b,a)
    return grouped_title_scaled.iloc[pd.np.r_[[item for sublist in cos.argsort(axis=0)[-11:-1].tolist() for item in sublist][::-1]]]

In [40]:
similar_episode('A Benihana Christmas')

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Moroccan Christmas,0.117949,0.687179,0.005128,0.005128,0.020513,0.025641,0.010256,0.097436,0.030769
Dwight Christmas,0.175879,0.582915,0.005025,0.020101,0.090452,0.01005,0.01005,0.095477,0.01005
Classy Christmas,0.22623,0.409836,0.04918,0.022951,0.013115,0.019672,0.032787,0.101639,0.12459
Christmas Party,0.100877,0.372807,0.078947,0.02193,0.065789,0.179825,0.004386,0.171053,0.004386
Christmas Wishes,0.248731,0.329949,0.015228,0.030457,0.111675,0.06599,0.0,0.081218,0.116751
Secret Santa,0.118919,0.351351,0.021622,0.102703,0.07027,0.032432,0.005405,0.248649,0.048649
Welcome Party,0.277778,0.282407,0.0,0.050926,0.013889,0.106481,0.027778,0.138889,0.101852
Survivor Man,0.265823,0.272152,0.018987,0.037975,0.094937,0.202532,0.0,0.050633,0.056962
Michael's Birthday,0.19084,0.267176,0.007634,0.030534,0.274809,0.045802,0.007634,0.015267,0.160305
Spooked,0.3125,0.182292,0.151042,0.020833,0.057292,0.067708,0.015625,0.104167,0.088542


# Averaging groupby by character Preprocessing

In [41]:
doc_merge = pd.DataFrame.copy(doc_title, deep=True)

doc_merge['speaker'] = speakers.values
doc_merge['season'] = seasons.values

doc_merge2 = doc_merge.groupby(['season','speaker'])[[x for x in range(topic_count)]].mean().reset_index()

# doc_merge2.head()

doc_merge2 = doc_merge2.set_index(['season','speaker'])

In [42]:
doc_merge_scaled = doc_merge2.div(doc_merge2.sum(axis=1), axis=0)

# doc_merge_scaled.head()

doc_merge_scaled = doc_merge_scaled.dropna()

# if speaker appears in at least 5 seasons
doc_merge_scaled2 = doc_merge_scaled[doc_merge_scaled.index.get_level_values(1).isin(doc_merge_scaled.index.get_level_values(1).value_counts()[doc_merge_scaled.index.get_level_values(1).value_counts()>4].index)]

# looking at similar char including themselves

In [43]:
input_season = 1
input_char = 'michael'
char_search = input_season , input_char

a = np.array(doc_merge_scaled2.loc[char_search]).reshape(1,-1)
b = np.array(doc_merge_scaled2)

cos = cosine_similarity(b,a)
doc_merge_scaled2.iloc[cos.argsort(axis=0)[-2]]

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1,2,3,4,5,6,7,8
season,speaker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
3,dwight,0.252055,0.065753,0.010959,0.186301,0.013699,0.142466,0.057534,0.112329,0.158904


# function to get similar, excluding self

In [44]:
def similar_char(season,char):
    input_season = season
    input_char = char
    char_search = input_season , input_char

    test_doc = doc_merge_scaled2[doc_merge_scaled2.index.get_level_values(1) != input_char]
    test_doc = test_doc[test_doc.index.get_level_values(0) == input_season]
    a = np.array(doc_merge_scaled2.loc[char_search]).reshape(1,-1)
    b = np.array(test_doc)

    cos = cosine_similarity(b,a)
    
    max_list = [item for sublist in cos.argsort(axis=0)[-10:].tolist() for item in sublist][::-1]
    z_list = []
    for num in max_list:
        z_list.append(cos[num].tolist())
    y_list = [item for sublist in z_list for item in sublist]
#     for y in y_list:
#         print(y)
        
    return test_doc.iloc[pd.np.r_[[item for sublist in cos.argsort(axis=0)[-10:].tolist() for item in sublist][::-1]]]

similar_char(1,'jim')

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1,2,3,4,5,6,7,8
season,speaker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,michael,0.265896,0.028902,0.011561,0.150289,0.040462,0.156069,0.092486,0.138728,0.115607
1,dwight,0.298701,0.012987,0.077922,0.337662,0.012987,0.012987,0.051948,0.103896,0.090909
1,pam,0.170455,0.102273,0.0,0.147727,0.102273,0.034091,0.113636,0.136364,0.193182
1,stanley,0.555556,0.111111,0.0,0.0,0.111111,0.0,0.111111,0.0,0.111111
1,kevin,0.846154,0.0,0.0,0.0,0.0,0.0,0.076923,0.076923,0.0
1,darryl,0.8,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0
1,phyllis,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,jan,0.085714,0.028571,0.057143,0.514286,0.0,0.0,0.114286,0.0,0.2
1,meredith,0.25,0.166667,0.0,0.0,0.166667,0.166667,0.0,0.083333,0.166667
1,ryan,0.375,0.0,0.03125,0.0625,0.0,0.5,0.0,0.0,0.03125


In [51]:
# # getting character pair vectors
# # getting character pair avg vector
# # Getting episode for all seasons, no season column
# # Getting all episodes in all seasons
# # creating error for non-significant char in flask
# # adding both insignificant
def get_episode_char(season,char,char2):
    input_season = season
    input_char = char
    input_char2 = char2
    char_search = input_season , input_char

    test_doc = doc_merge_scaled2[doc_merge_scaled2.index.get_level_values(0) == input_season]
    test_doc = test_doc[(test_doc.index.get_level_values(1) == input_char2) | (test_doc.index.get_level_values(1) == input_char)]
    #added
    if test_doc.shape[0]==0:
        return ["Sorry! Both characters are not significant this season"]
    else:    
        array1 = test_doc.index.get_level_values(1).str.contains(input_char)
        if array1.any():
            a = np.array(doc_merge_scaled2.loc[char_search]).reshape(1,-1)
            b = np.array(test_doc)

            cos = cosine_similarity(b,a)

            #get episode vectore 
            pair_df = test_doc.iloc[pd.np.r_[[item for sublist in cos.argsort(axis=0)[-10:].tolist() for item in sublist][::-1]]]
            episode_vec =  pair_df.groupby('season')[[x for x in range(topic_count)]].mean()
            #added below
            if pair_df.shape[0] == 1:
                return ["Sorry! Second character is not significant this season"]
            if pair_df.shape[0] == 2:
                test_episode = season_title_df[season_title_df.index.get_level_values(0) == input_season]

                #find closest episode
                a2 = np.array(episode_vec.loc[input_season]).reshape(1,-1)
                b2 = np.array(test_episode)
                cos2 = cosine_similarity(b2,a2)

                return test_episode.iloc[pd.np.r_[[item for sublist in cos2.argsort(axis=0)[-11:].tolist() for item in sublist][::-1]]]
        else:
            return ["Sorry! First character is not significant this season"]
    
get_episode_char(1,'jim','pam')

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1,2,3,4,5,6,7,8
season,title,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,Diversity Day,0.300971,0.009709,0.019417,0.087379,0.048544,0.019417,0.349515,0.135922,0.029126
1,The Alliance,0.313559,0.135593,0.025424,0.127119,0.084746,0.042373,0.016949,0.177966,0.076271
1,Hot Girl,0.460784,0.0,0.04902,0.137255,0.088235,0.088235,0.019608,0.078431,0.078431
1,Pilot,0.134615,0.019231,0.153846,0.224359,0.0,0.141026,0.141026,0.032051,0.153846
1,Health Care,0.164286,0.014286,0.014286,0.171429,0.357143,0.028571,0.064286,0.057143,0.128571
1,Basketball,0.329545,0.011364,0.159091,0.136364,0.022727,0.170455,0.0,0.170455,0.0


In [52]:
list(get_episode_char(1,'michael','ryan').reset_index()['title'])

['Basketball',
 'Hot Girl',
 'The Alliance',
 'Pilot',
 'Diversity Day',
 'Health Care']

# JAVASCRIPT FLASK

In [53]:
import flask
import numpy as np
import pandas as pd
from copy import deepcopy

# Initialize the app
app = flask.Flask(__name__)

@app.route("/")
def viz_page():
    with open("rec.html", 'r') as viz_file:
        return viz_file.read()

@app.route("/gof", methods=["POST"])
def score():
    """
    When A POST request with json data is made to this url,
    Read the grid from the json, update and send it back
    """
    data = flask.request.json
    a = data['grid']
    print (a)
    
    if type(get_episode_char(a[0],a[1],a[2])) is list:
        r = get_episode_char(a[0],a[1],a[2])
        return flask.jsonify({'grid': r})
    else:
        r = list(get_episode_char(a[0],a[1],a[2]).reset_index()['title'])
        return flask.jsonify({'grid': r})

In [54]:
app.run(host='0.0.0.0', port=5000)