In [17]:
import pandas as pd
import numpy as np
np.set_printoptions(suppress=True) # Suppress scientific notation where possible

from ast import literal_eval

import re
import string

In [18]:
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [19]:
from nltk.tag import pos_tag
from nltk.chunk import ne_chunk

In [20]:
import spacy

In [21]:
from sklearn.decomposition import TruncatedSVD

from scipy.sparse import csr_matrix

from sklearn.preprocessing import StandardScaler

In [22]:
import seaborn as sns

from matplotlib import pyplot as plt
%config InlineBackend.figure_formats = ['retina']  # or svg
%matplotlib inline

In [23]:
# Make some room to see stuff (i.e. drop display limits on Pandas rows & cols - be careful w/ big df's!)

pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', None)

## 1) Read in the data

In [24]:
movies = pd.read_csv('final_dataset.csv', lineterminator='\n', index_col=0)
print(movies.shape)
movies.head()

(47723, 8)


Unnamed: 0,movieId,tmdbId,imdbId,title,genres,overview,director,actors
0,1,862,114709,Toy Story,"['Adventure', 'Animation', 'Children', 'Comedy...","Led by Woody, Andy's toys live happily in his ...",john_lasseter,"['tom_hanks', 'tim_allen', 'don_rickles', 'jim..."
1,2,8844,113497,Jumanji,"['Adventure', 'Children', 'Fantasy']",When siblings Judy and Peter discover an encha...,joe_johnston,"['robin_williams', 'kirsten_dunst', 'bradley_p..."
2,3,15602,113228,Grumpier Old Men,"['Comedy', 'Romance']",A family wedding reignites the ancient feud be...,howard_deutch,"['walter_matthau', 'jack_lemmon', 'ann-margret..."
3,4,31357,114885,Waiting to Exhale,"['Comedy', 'Drama', 'Romance']","Cheated on, mistreated and stepped on, the wom...",forest_whitaker,"['whitney_houston', 'angela_bassett', 'loretta..."
4,5,11862,113041,Father of the Bride Part II,['Comedy'],Just when George Banks has recovered from his ...,charles_shyer,"['steve_martin', 'diane_keaton', 'martin_short..."


#### Lists in columns got turned into text strings during save to csv; need to undo that

In [25]:
movies['genres'] = movies['genres'].map(literal_eval)
movies['actors'] = movies['actors'].map(literal_eval)
movies.head()

Unnamed: 0,movieId,tmdbId,imdbId,title,genres,overview,director,actors
0,1,862,114709,Toy Story,"[Adventure, Animation, Children, Comedy, Fantasy]","Led by Woody, Andy's toys live happily in his ...",john_lasseter,"[tom_hanks, tim_allen, don_rickles, jim_varney]"
1,2,8844,113497,Jumanji,"[Adventure, Children, Fantasy]",When siblings Judy and Peter discover an encha...,joe_johnston,"[robin_williams, kirsten_dunst, bradley_pierce..."
2,3,15602,113228,Grumpier Old Men,"[Comedy, Romance]",A family wedding reignites the ancient feud be...,howard_deutch,"[walter_matthau, jack_lemmon, ann-margret, sop..."
3,4,31357,114885,Waiting to Exhale,"[Comedy, Drama, Romance]","Cheated on, mistreated and stepped on, the wom...",forest_whitaker,"[whitney_houston, angela_bassett, loretta_devi..."
4,5,11862,113041,Father of the Bride Part II,[Comedy],Just when George Banks has recovered from his ...,charles_shyer,"[steve_martin, diane_keaton, martin_short, kim..."


## 2) Tokenize the 'overview' column

#### Let's continue to brush aside the Named Entity issue in 'overview'

#### Start by removing digits, capital letters and punctuation

In [26]:
alpha = lambda x: re.sub('\w*\d\w*', ' ', x)
punc_and_lower = lambda x: re.sub('[%s]' % re.escape(string.punctuation), ' ', x.lower())

movies['plot_tokens'] = movies['overview'].map(alpha).map(punc_and_lower)
movies.head()

Unnamed: 0,movieId,tmdbId,imdbId,title,genres,overview,director,actors,plot_tokens
0,1,862,114709,Toy Story,"[Adventure, Animation, Children, Comedy, Fantasy]","Led by Woody, Andy's toys live happily in his ...",john_lasseter,"[tom_hanks, tim_allen, don_rickles, jim_varney]",led by woody andy s toys live happily in his ...
1,2,8844,113497,Jumanji,"[Adventure, Children, Fantasy]",When siblings Judy and Peter discover an encha...,joe_johnston,"[robin_williams, kirsten_dunst, bradley_pierce...",when siblings judy and peter discover an encha...
2,3,15602,113228,Grumpier Old Men,"[Comedy, Romance]",A family wedding reignites the ancient feud be...,howard_deutch,"[walter_matthau, jack_lemmon, ann-margret, sop...",a family wedding reignites the ancient feud be...
3,4,31357,114885,Waiting to Exhale,"[Comedy, Drama, Romance]","Cheated on, mistreated and stepped on, the wom...",forest_whitaker,"[whitney_houston, angela_bassett, loretta_devi...",cheated on mistreated and stepped on the wom...
4,5,11862,113041,Father of the Bride Part II,[Comedy],Just when George Banks has recovered from his ...,charles_shyer,"[steve_martin, diane_keaton, martin_short, kim...",just when george banks has recovered from his ...


In [27]:
movies['genres'] = movies['genres'].map(lambda x: [el.lower() for el in x])
movies.head()

Unnamed: 0,movieId,tmdbId,imdbId,title,genres,overview,director,actors,plot_tokens
0,1,862,114709,Toy Story,"[adventure, animation, children, comedy, fantasy]","Led by Woody, Andy's toys live happily in his ...",john_lasseter,"[tom_hanks, tim_allen, don_rickles, jim_varney]",led by woody andy s toys live happily in his ...
1,2,8844,113497,Jumanji,"[adventure, children, fantasy]",When siblings Judy and Peter discover an encha...,joe_johnston,"[robin_williams, kirsten_dunst, bradley_pierce...",when siblings judy and peter discover an encha...
2,3,15602,113228,Grumpier Old Men,"[comedy, romance]",A family wedding reignites the ancient feud be...,howard_deutch,"[walter_matthau, jack_lemmon, ann-margret, sop...",a family wedding reignites the ancient feud be...
3,4,31357,114885,Waiting to Exhale,"[comedy, drama, romance]","Cheated on, mistreated and stepped on, the wom...",forest_whitaker,"[whitney_houston, angela_bassett, loretta_devi...",cheated on mistreated and stepped on the wom...
4,5,11862,113041,Father of the Bride Part II,[comedy],Just when George Banks has recovered from his ...,charles_shyer,"[steve_martin, diane_keaton, martin_short, kim...",just when george banks has recovered from his ...


#### Tokenize plot summaries - start with simple NLTK word_tokenize:

In [28]:
movies['plot_tokens'] = movies['plot_tokens'].map(word_tokenize)
movies.head()

Unnamed: 0,movieId,tmdbId,imdbId,title,genres,overview,director,actors,plot_tokens
0,1,862,114709,Toy Story,"[adventure, animation, children, comedy, fantasy]","Led by Woody, Andy's toys live happily in his ...",john_lasseter,"[tom_hanks, tim_allen, don_rickles, jim_varney]","[led, by, woody, andy, s, toys, live, happily,..."
1,2,8844,113497,Jumanji,"[adventure, children, fantasy]",When siblings Judy and Peter discover an encha...,joe_johnston,"[robin_williams, kirsten_dunst, bradley_pierce...","[when, siblings, judy, and, peter, discover, a..."
2,3,15602,113228,Grumpier Old Men,"[comedy, romance]",A family wedding reignites the ancient feud be...,howard_deutch,"[walter_matthau, jack_lemmon, ann-margret, sop...","[a, family, wedding, reignites, the, ancient, ..."
3,4,31357,114885,Waiting to Exhale,"[comedy, drama, romance]","Cheated on, mistreated and stepped on, the wom...",forest_whitaker,"[whitney_houston, angela_bassett, loretta_devi...","[cheated, on, mistreated, and, stepped, on, th..."
4,5,11862,113041,Father of the Bride Part II,[comedy],Just when George Banks has recovered from his ...,charles_shyer,"[steve_martin, diane_keaton, martin_short, kim...","[just, when, george, banks, has, recovered, fr..."


In [29]:
movies['director'] = movies['director'].map(lambda x: [x])
movies.head()

Unnamed: 0,movieId,tmdbId,imdbId,title,genres,overview,director,actors,plot_tokens
0,1,862,114709,Toy Story,"[adventure, animation, children, comedy, fantasy]","Led by Woody, Andy's toys live happily in his ...",[john_lasseter],"[tom_hanks, tim_allen, don_rickles, jim_varney]","[led, by, woody, andy, s, toys, live, happily,..."
1,2,8844,113497,Jumanji,"[adventure, children, fantasy]",When siblings Judy and Peter discover an encha...,[joe_johnston],"[robin_williams, kirsten_dunst, bradley_pierce...","[when, siblings, judy, and, peter, discover, a..."
2,3,15602,113228,Grumpier Old Men,"[comedy, romance]",A family wedding reignites the ancient feud be...,[howard_deutch],"[walter_matthau, jack_lemmon, ann-margret, sop...","[a, family, wedding, reignites, the, ancient, ..."
3,4,31357,114885,Waiting to Exhale,"[comedy, drama, romance]","Cheated on, mistreated and stepped on, the wom...",[forest_whitaker],"[whitney_houston, angela_bassett, loretta_devi...","[cheated, on, mistreated, and, stepped, on, th..."
4,5,11862,113041,Father of the Bride Part II,[comedy],Just when George Banks has recovered from his ...,[charles_shyer],"[steve_martin, diane_keaton, martin_short, kim...","[just, when, george, banks, has, recovered, fr..."


#### Combine all tokens into one 'corpus' column

In [30]:
cols = ['genres', 'director', 'actors', 'plot_tokens']

movies['corpus'] = ''

for n, row in movies.iterrows():
    val = ''
    for col in cols:
        val += ' '.join(row[col])
        val += ' '
    movies.at[n,'corpus'] = val

In [31]:
movies.head()

Unnamed: 0,movieId,tmdbId,imdbId,title,genres,overview,director,actors,plot_tokens,corpus
0,1,862,114709,Toy Story,"[adventure, animation, children, comedy, fantasy]","Led by Woody, Andy's toys live happily in his ...",[john_lasseter],"[tom_hanks, tim_allen, don_rickles, jim_varney]","[led, by, woody, andy, s, toys, live, happily,...",adventure animation children comedy fantasy jo...
1,2,8844,113497,Jumanji,"[adventure, children, fantasy]",When siblings Judy and Peter discover an encha...,[joe_johnston],"[robin_williams, kirsten_dunst, bradley_pierce...","[when, siblings, judy, and, peter, discover, a...",adventure children fantasy joe_johnston robin_...
2,3,15602,113228,Grumpier Old Men,"[comedy, romance]",A family wedding reignites the ancient feud be...,[howard_deutch],"[walter_matthau, jack_lemmon, ann-margret, sop...","[a, family, wedding, reignites, the, ancient, ...",comedy romance howard_deutch walter_matthau ja...
3,4,31357,114885,Waiting to Exhale,"[comedy, drama, romance]","Cheated on, mistreated and stepped on, the wom...",[forest_whitaker],"[whitney_houston, angela_bassett, loretta_devi...","[cheated, on, mistreated, and, stepped, on, th...",comedy drama romance forest_whitaker whitney_h...
4,5,11862,113041,Father of the Bride Part II,[comedy],Just when George Banks has recovered from his ...,[charles_shyer],"[steve_martin, diane_keaton, martin_short, kim...","[just, when, george, banks, has, recovered, fr...",comedy charles_shyer steve_martin diane_keaton...


In [32]:
movies['corpus'][0]

'adventure animation children comedy fantasy john_lasseter tom_hanks tim_allen don_rickles jim_varney led by woody andy s toys live happily in his room until andy s birthday brings buzz lightyear onto the scene afraid of losing his place in andy s heart woody plots against buzz but when circumstances separate buzz and woody from their owner the duo eventually learns to put aside their differences '

#### Initially, let's skip stemming and just go straight to vectorizing

#### The trend seems to be fewer columns BEFORE SVD is better 

#### Use a customized class extension of CountVectorizer this time 
#### (See https://towardsdatascience.com/hacking-scikit-learns-vectorizers-9ef26a7170af )

In [35]:
# defines a custom vectorizer class
class CustomVectorizer(CountVectorizer): 
    
    # overwrite the build_analyzer method, allowing one to
    # create a custom analyzer for the vectorizer
    def build_analyzer(self):
        
        # load stop words using CountVectorizer's built in method
        stop_words = self.get_stop_words()
        
        # create the analyzer that will be returned by this method
        def analyser(doc):
            
            # load spaCy's model for english language
            spacy.load('en_core_web_sm')
            
            # instantiate a spaCy tokenizer
            lemmatizer = spacy.lang.en.English()
            
            # apply the preprocessing and tokenzation steps
            doc_clean = doc.lower()
            tokens = lemmatizer(doc_clean)
            lemmatized_tokens = [token.lemma_ for token in tokens]
            
            # use CountVectorizer's _word_ngrams built in method
            # to remove stop words and extract n-grams
            return(self._word_ngrams(lemmatized_tokens, stop_words))
        return(analyser)


In [36]:
cv1 = CustomVectorizer(stop_words='english', max_features=50, ngram_range=(1, 1))

X_cv1 = cv1.fit_transform(movies['corpus'])

doc_term = pd.DataFrame(X_cv1.toarray(), columns=cv1.get_feature_names())
doc_term.shape

(47723, 1)

#### Wow, if this is even real (meaningful), no dimensionality reduction is needed

In [39]:
doc_term.value_counts()

32     919
33     834
28     832
31     822
29     807
27     800
30     799
36     798
34     785
26     756
35     752
25     747
37     732
38     723
40     704
44     701
39     668
46     667
43     661
47     656
41     652
42     648
23     641
24     629
45     621
49     596
50     589
48     573
51     542
22     531
59     516
54     511
71     505
60     500
61     498
52     491
55     489
70     488
57     487
21     483
77     479
68     475
66     474
80     473
56     473
74     473
53     473
73     471
69     471
76     468
58     465
75     464
63     456
79     452
78     450
62     448
72     443
64     435
67     435
65     426
20     406
83     391
81     384
82     373
84     353
87     324
86     314
19     312
85     298
88     290
89     288
18     249
91     248
90     241
92     217
93     217
94     202
95     200
97     195
96     193
17     187
100    177
99     174
102    155
108    152
101    149
98     149
107    148
110    148
106    145
104    140

#### Now apply dimensionality reduction to the dataset

#### Compute Cosine Similarities in this reduced space

In [40]:
cosine_sim = cosine_similarity(doc_term, doc_term)

print(cosine_sim.shape)
print(cosine_sim)
np.save('cosine_similarities_2b.npy', cosine_sim)

(47723, 47723)
[[1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 ...
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]]


In [41]:
cosine_sim = np.load('cosine_similarities_2b.npy')

In [42]:
def recommend(movie_id, len=20):
    movie_idx = movies[movies['movieId'] == movie_id].index[0]
    rec_indices = cosine_sim[movie_idx,:].argsort()[::-1][0:len+1]
    rec_indices = [rec_idx for rec_idx in rec_indices if rec_idx != movie_idx]
    print('Recommendations based on your interest in {}:'
               .format(movies[movies['movieId'] == movie_id]['title'].values[0]))
    recommendations = movies.iloc[rec_indices, np.r_[3:5, 6:8]]
    recommendations['relevancy'] = cosine_sim[movie_idx, rec_indices]
    return recommendations

In [43]:
recommend(1, 10)

Recommendations based on your interest in Toy Story:


Unnamed: 0,title,genres,director,actors,relevancy
47722,Leal,"[action, crime, drama]",[rodrigo_salomón],"[luis_aguirre, fini_bocchino, bruno_sosa_bofin...",1.0
15904,Treeless Mountain,[drama],[so_yong_kim],"[kim_mi-hyang, kim_hee-yeon, kim_seong-hee, le...",1.0
15913,Birdwatchers,[drama],[marco_bechis],"[claudio_santamaria, alicélia_batista_cabreira...",1.0
15912,Poetry,[drama],[lee_chang-dong],"[yoon_jeong-hee, lee_da-wit, kim_hee-ra, ahn_n...",1.0
15911,S21: The Khmer Rouge Death Machine,"[documentary, war]",[rithy_panh],"[chum_mey, khieu_'poev'_ches, yeay_cheu, nhiem...",1.0
15910,The Nun,[drama],[jacques_rivette],"[anna_karina, liselotte_pulver, micheline_pres...",1.0
15909,Congorama,"[comedy, drama]",[philippe_falardeau],"[paul_ahmarani, olivier_gourmet, jean-pierre_c...",1.0
15908,Throw Down,[drama],[johnnie_to],"[louis_koo, aaron_kwok, cherrie_ying, tony_leu...",1.0
15907,Phone Call from a Stranger,[drama],[jean_negulesco],"[gary_merrill, shelley_winters, michael_rennie...",1.0
15906,Beware of a Holy Whore,"[comedy, drama]",[rainer_werner_fassbinder],"[lou_castel, eddie_constantine, marquard_bohm,...",1.0


#### Useless!

In [34]:
movies[movies['title'] == 'Chinatown']

Unnamed: 0,movieId,tmdbId,imdbId,title,genres,overview,director,actors,plot_tokens,corpus
1161,1252,829,71315,Chinatown,"[crime, film-noir, mystery, thriller]",Private eye Jake Gittes lives off of the murky...,[roman_polanski],"[jack_nicholson, faye_dunaway, john_huston, pe...","[private, eye, jake, gittes, lives, off, of, t...",crime film-noir mystery thriller roman_polansk...


In [44]:
recommend(1252, 10)

Recommendations based on your interest in Chinatown:


Unnamed: 0,title,genres,director,actors,relevancy
47722,Leal,"[action, crime, drama]",[rodrigo_salomón],"[luis_aguirre, fini_bocchino, bruno_sosa_bofin...",1.0
15904,Treeless Mountain,[drama],[so_yong_kim],"[kim_mi-hyang, kim_hee-yeon, kim_seong-hee, le...",1.0
15913,Birdwatchers,[drama],[marco_bechis],"[claudio_santamaria, alicélia_batista_cabreira...",1.0
15912,Poetry,[drama],[lee_chang-dong],"[yoon_jeong-hee, lee_da-wit, kim_hee-ra, ahn_n...",1.0
15911,S21: The Khmer Rouge Death Machine,"[documentary, war]",[rithy_panh],"[chum_mey, khieu_'poev'_ches, yeay_cheu, nhiem...",1.0
15910,The Nun,[drama],[jacques_rivette],"[anna_karina, liselotte_pulver, micheline_pres...",1.0
15909,Congorama,"[comedy, drama]",[philippe_falardeau],"[paul_ahmarani, olivier_gourmet, jean-pierre_c...",1.0
15908,Throw Down,[drama],[johnnie_to],"[louis_koo, aaron_kwok, cherrie_ying, tony_leu...",1.0
15907,Phone Call from a Stranger,[drama],[jean_negulesco],"[gary_merrill, shelley_winters, michael_rennie...",1.0
15906,Beware of a Holy Whore,"[comedy, drama]",[rainer_werner_fassbinder],"[lou_castel, eddie_constantine, marquard_bohm,...",1.0


In [36]:
movies[movies['title'] == 'Raging Bull']

Unnamed: 0,movieId,tmdbId,imdbId,title,genres,overview,director,actors,plot_tokens,corpus
1139,1228,1578,81398,Raging Bull,[drama],"The life of boxer Jake LaMotta, whose violence...",[martin_scorsese],"[robert_de_niro, joe_pesci, cathy_moriarty, fr...","[the, life, of, boxer, jake, lamotta, whose, v...",drama martin_scorsese robert_de_niro joe_pesci...


In [45]:
recommend(1228, 10)

Recommendations based on your interest in Raging Bull:


Unnamed: 0,title,genres,director,actors,relevancy
47722,Leal,"[action, crime, drama]",[rodrigo_salomón],"[luis_aguirre, fini_bocchino, bruno_sosa_bofin...",1.0
15904,Treeless Mountain,[drama],[so_yong_kim],"[kim_mi-hyang, kim_hee-yeon, kim_seong-hee, le...",1.0
15913,Birdwatchers,[drama],[marco_bechis],"[claudio_santamaria, alicélia_batista_cabreira...",1.0
15912,Poetry,[drama],[lee_chang-dong],"[yoon_jeong-hee, lee_da-wit, kim_hee-ra, ahn_n...",1.0
15911,S21: The Khmer Rouge Death Machine,"[documentary, war]",[rithy_panh],"[chum_mey, khieu_'poev'_ches, yeay_cheu, nhiem...",1.0
15910,The Nun,[drama],[jacques_rivette],"[anna_karina, liselotte_pulver, micheline_pres...",1.0
15909,Congorama,"[comedy, drama]",[philippe_falardeau],"[paul_ahmarani, olivier_gourmet, jean-pierre_c...",1.0
15908,Throw Down,[drama],[johnnie_to],"[louis_koo, aaron_kwok, cherrie_ying, tony_leu...",1.0
15907,Phone Call from a Stranger,[drama],[jean_negulesco],"[gary_merrill, shelley_winters, michael_rennie...",1.0
15906,Beware of a Holy Whore,"[comedy, drama]",[rainer_werner_fassbinder],"[lou_castel, eddie_constantine, marquard_bohm,...",1.0


In [38]:
movies[movies['title'] == 'Happy Gilmore']

Unnamed: 0,movieId,tmdbId,imdbId,title,genres,overview,director,actors,plot_tokens,corpus
102,104,9614,116483,Happy Gilmore,[comedy],Failed hockey player-turned-golf whiz Happy Gi...,[dennis_dugan],"[adam_sandler, christopher_mcdonald, julie_bow...","[failed, hockey, player, turned, golf, whiz, h...",comedy dennis_dugan adam_sandler christopher_m...


In [46]:
recommend(104, 10)

Recommendations based on your interest in Happy Gilmore:


Unnamed: 0,title,genres,director,actors,relevancy
47722,Leal,"[action, crime, drama]",[rodrigo_salomón],"[luis_aguirre, fini_bocchino, bruno_sosa_bofin...",1.0
15904,Treeless Mountain,[drama],[so_yong_kim],"[kim_mi-hyang, kim_hee-yeon, kim_seong-hee, le...",1.0
15913,Birdwatchers,[drama],[marco_bechis],"[claudio_santamaria, alicélia_batista_cabreira...",1.0
15912,Poetry,[drama],[lee_chang-dong],"[yoon_jeong-hee, lee_da-wit, kim_hee-ra, ahn_n...",1.0
15911,S21: The Khmer Rouge Death Machine,"[documentary, war]",[rithy_panh],"[chum_mey, khieu_'poev'_ches, yeay_cheu, nhiem...",1.0
15910,The Nun,[drama],[jacques_rivette],"[anna_karina, liselotte_pulver, micheline_pres...",1.0
15909,Congorama,"[comedy, drama]",[philippe_falardeau],"[paul_ahmarani, olivier_gourmet, jean-pierre_c...",1.0
15908,Throw Down,[drama],[johnnie_to],"[louis_koo, aaron_kwok, cherrie_ying, tony_leu...",1.0
15907,Phone Call from a Stranger,[drama],[jean_negulesco],"[gary_merrill, shelley_winters, michael_rennie...",1.0
15906,Beware of a Holy Whore,"[comedy, drama]",[rainer_werner_fassbinder],"[lou_castel, eddie_constantine, marquard_bohm,...",1.0


In [40]:
movies[movies['title'] == "You've Got Mail"]

Unnamed: 0,movieId,tmdbId,imdbId,title,genres,overview,director,actors,plot_tokens,corpus
2242,2424,9489,128853,You've Got Mail,"[comedy, romance]","Book superstore magnate, Joe Fox and independe...",[nora_ephron],"[tom_hanks, meg_ryan, greg_kinnear, parker_posey]","[book, superstore, magnate, joe, fox, and, ind...",comedy romance nora_ephron tom_hanks meg_ryan ...


In [47]:
recommend(2424, 10)

Recommendations based on your interest in You've Got Mail:


Unnamed: 0,title,genres,director,actors,relevancy
47722,Leal,"[action, crime, drama]",[rodrigo_salomón],"[luis_aguirre, fini_bocchino, bruno_sosa_bofin...",1.0
15904,Treeless Mountain,[drama],[so_yong_kim],"[kim_mi-hyang, kim_hee-yeon, kim_seong-hee, le...",1.0
15913,Birdwatchers,[drama],[marco_bechis],"[claudio_santamaria, alicélia_batista_cabreira...",1.0
15912,Poetry,[drama],[lee_chang-dong],"[yoon_jeong-hee, lee_da-wit, kim_hee-ra, ahn_n...",1.0
15911,S21: The Khmer Rouge Death Machine,"[documentary, war]",[rithy_panh],"[chum_mey, khieu_'poev'_ches, yeay_cheu, nhiem...",1.0
15910,The Nun,[drama],[jacques_rivette],"[anna_karina, liselotte_pulver, micheline_pres...",1.0
15909,Congorama,"[comedy, drama]",[philippe_falardeau],"[paul_ahmarani, olivier_gourmet, jean-pierre_c...",1.0
15908,Throw Down,[drama],[johnnie_to],"[louis_koo, aaron_kwok, cherrie_ying, tony_leu...",1.0
15907,Phone Call from a Stranger,[drama],[jean_negulesco],"[gary_merrill, shelley_winters, michael_rennie...",1.0
15906,Beware of a Holy Whore,"[comedy, drama]",[rainer_werner_fassbinder],"[lou_castel, eddie_constantine, marquard_bohm,...",1.0


In [42]:
movies[movies['title'] == "Reservoir Dogs"]

Unnamed: 0,movieId,tmdbId,imdbId,title,genres,overview,director,actors,plot_tokens,corpus
1020,1089,500,105236,Reservoir Dogs,"[crime, mystery, thriller]",A botched robbery indicates a police informant...,[quentin_tarantino],"[harvey_keitel, tim_roth, michael_madsen, chri...","[a, botched, robbery, indicates, a, police, in...",crime mystery thriller quentin_tarantino harve...


In [48]:
recommend(1089, 10)

Recommendations based on your interest in Reservoir Dogs:


Unnamed: 0,title,genres,director,actors,relevancy
47722,Leal,"[action, crime, drama]",[rodrigo_salomón],"[luis_aguirre, fini_bocchino, bruno_sosa_bofin...",1.0
15904,Treeless Mountain,[drama],[so_yong_kim],"[kim_mi-hyang, kim_hee-yeon, kim_seong-hee, le...",1.0
15913,Birdwatchers,[drama],[marco_bechis],"[claudio_santamaria, alicélia_batista_cabreira...",1.0
15912,Poetry,[drama],[lee_chang-dong],"[yoon_jeong-hee, lee_da-wit, kim_hee-ra, ahn_n...",1.0
15911,S21: The Khmer Rouge Death Machine,"[documentary, war]",[rithy_panh],"[chum_mey, khieu_'poev'_ches, yeay_cheu, nhiem...",1.0
15910,The Nun,[drama],[jacques_rivette],"[anna_karina, liselotte_pulver, micheline_pres...",1.0
15909,Congorama,"[comedy, drama]",[philippe_falardeau],"[paul_ahmarani, olivier_gourmet, jean-pierre_c...",1.0
15908,Throw Down,[drama],[johnnie_to],"[louis_koo, aaron_kwok, cherrie_ying, tony_leu...",1.0
15907,Phone Call from a Stranger,[drama],[jean_negulesco],"[gary_merrill, shelley_winters, michael_rennie...",1.0
15906,Beware of a Holy Whore,"[comedy, drama]",[rainer_werner_fassbinder],"[lou_castel, eddie_constantine, marquard_bohm,...",1.0


In [44]:
movies[movies['title'] == "Star Wars"]

Unnamed: 0,movieId,tmdbId,imdbId,title,genres,overview,director,actors,plot_tokens,corpus
252,260,11,76759,Star Wars,"[action, adventure, sci-fi]",Princess Leia is captured and held hostage by ...,[george_lucas],"[mark_hamill, harrison_ford, carrie_fisher, pe...","[princess, leia, is, captured, and, held, host...",action adventure sci-fi george_lucas mark_hami...


In [49]:
recommend(260, 10)

Recommendations based on your interest in Star Wars:


Unnamed: 0,title,genres,director,actors,relevancy
47722,Leal,"[action, crime, drama]",[rodrigo_salomón],"[luis_aguirre, fini_bocchino, bruno_sosa_bofin...",1.0
15904,Treeless Mountain,[drama],[so_yong_kim],"[kim_mi-hyang, kim_hee-yeon, kim_seong-hee, le...",1.0
15913,Birdwatchers,[drama],[marco_bechis],"[claudio_santamaria, alicélia_batista_cabreira...",1.0
15912,Poetry,[drama],[lee_chang-dong],"[yoon_jeong-hee, lee_da-wit, kim_hee-ra, ahn_n...",1.0
15911,S21: The Khmer Rouge Death Machine,"[documentary, war]",[rithy_panh],"[chum_mey, khieu_'poev'_ches, yeay_cheu, nhiem...",1.0
15910,The Nun,[drama],[jacques_rivette],"[anna_karina, liselotte_pulver, micheline_pres...",1.0
15909,Congorama,"[comedy, drama]",[philippe_falardeau],"[paul_ahmarani, olivier_gourmet, jean-pierre_c...",1.0
15908,Throw Down,[drama],[johnnie_to],"[louis_koo, aaron_kwok, cherrie_ying, tony_leu...",1.0
15907,Phone Call from a Stranger,[drama],[jean_negulesco],"[gary_merrill, shelley_winters, michael_rennie...",1.0
15906,Beware of a Holy Whore,"[comedy, drama]",[rainer_werner_fassbinder],"[lou_castel, eddie_constantine, marquard_bohm,...",1.0


#### It's just the same list every time - USELESS!