# Movie Recomendation - Similarity Model

The purpose of this motel is to get recomendations of movies based on a selected movie and related features.

In [1]:
import pandas as pd
import numpy as np

## Importing Database

In [2]:
data = pd.read_csv('../raw_data/treated-db/data.csv')

In [3]:
data.dropna(inplace=True)

In [4]:
data

Unnamed: 0,movieId,tmdbId,title,title_clean,genres,actors,directors,overview,keywords,popularity,vote_count,vote_average,runtime,release
0,1.0,862,Toy Story (1995),['toy story'],"['adventure', 'animation', 'children', 'comedy...","['tomhanks', 'timallen', 'donrickles']",['johnlasseter'],"led by woody, andy's toys live happily in his ...","['martialarts', 'jealousy', 'toy', 'friendship...",70.910,13173.0,7.9,81.0,1995-10-30
1,2.0,8844,Jumanji (1995),['jumanji'],"['adventure', 'children', 'fantasy']","['robinwilliams', 'jonathanhyde', 'kirstendunst']",['joejohnston'],when siblings judy and peter discover an encha...,"['giantinsect', 'boardgame', 'jungle', 'disapp...",11.919,7711.0,7.2,104.0,1995-12-15
2,3.0,15602,Grumpier Old Men (1995),['grumpier old men'],"['comedy', 'romance']","['waltermatthau', 'jacklemmon', 'ann-margret']",['howarddeutch'],a family wedding reignites the ancient feud be...,"['fishing', 'oldman', 'bestfriend', 'duringcre...",13.122,217.0,6.6,101.0,1995-12-22
3,4.0,31357,Waiting to Exhale (1995),['waiting to exhale'],"['comedy', 'drama', 'romance']","['whitneyhouston', 'angelabassett', 'lorettade...",['forestwhitaker'],"cheated on, mistreated and stepped on, the wom...","['basedonnovelorbook', 'interracialrelationshi...",5.123,79.0,6.2,127.0,1995-12-22
4,5.0,11862,Father of the Bride Part II (1995),['father of the bride part ii'],['comedy'],"['stevemartin', 'dianekeaton', 'martinshort']",['charlesshyer'],just when george banks has recovered from his ...,"['parentchildrelationship', 'baby', 'midlifecr...",15.142,446.0,6.2,106.0,1995-12-08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23308,208843.0,553882,An Acceptable Loss (2019),['an acceptable loss'],"['drama', 'thriller']","['tikasumpter', 'jamieleecurtis', 'bentavassoli']",['joechappelle'],"former top u.s. security adviser elizabeth ""li...","['regret', 'politician', 'betrayal', 'threat',...",7.757,42.0,6.3,102.0,2018-10-13
23309,208863.0,31006,Homicide: The Movie (2000),['homicide: the movie'],"['crime', 'drama']","['danielbaldwin', 'nedbeatty', 'richardbelzer']",['jeandesegonzac'],retired police lieutenant al giardello is runn...,"['usa', 'baltimore']",3.374,12.0,6.9,89.0,2000-02-13
23310,208915.0,639832,Undercover Brother 2 (2019),['undercover brother 2'],"['action', 'comedy']","['michaeljaiwhite', 'barrybostwick', 'jeffdani...",['lesliesmall'],"sixteen years ago, undercover brother and his ...",['spy'],19.101,14.0,3.9,85.0,2019-11-05
23311,208917.0,186021,Caicedo (with Pole) (1894),['caicedo (with pole)'],['documentary'],['juana.caicedo'],['williamk.l.dickson'],king of the slack wire. his daring feats of ba...,"['silentfilm', 'vaudeville', 'acrobats']",1.591,21.0,5.8,1.0,1894-07-25


## Database setup

In [5]:
import ast
def api_to_dict(x):
    return ast.literal_eval(x)

In [6]:
df = data.copy()

In [7]:
df.head(1)

Unnamed: 0,movieId,tmdbId,title,title_clean,genres,actors,directors,overview,keywords,popularity,vote_count,vote_average,runtime,release
0,1.0,862,Toy Story (1995),['toy story'],"['adventure', 'animation', 'children', 'comedy...","['tomhanks', 'timallen', 'donrickles']",['johnlasseter'],"led by woody, andy's toys live happily in his ...","['martialarts', 'jealousy', 'toy', 'friendship...",70.91,13173.0,7.9,81.0,1995-10-30


In [8]:
m = df['vote_count'].quantile(0.85)
m

596.0

In [9]:
df= df[df['vote_count']>=m]

In [10]:
df.shape

(3499, 14)

In [11]:
import ast
# df.drop(columns=['popularity','movieId','vote_count','vote_average','runtime','release'],inplace = True)

df['actors'] = df['actors'].apply(api_to_dict)
df['keywords'] = df['keywords'].apply(api_to_dict)
df['genres'] = df['genres'].apply(api_to_dict)

df.head(1)

Unnamed: 0,movieId,tmdbId,title,title_clean,genres,actors,directors,overview,keywords,popularity,vote_count,vote_average,runtime,release
0,1.0,862,Toy Story (1995),['toy story'],"[adventure, animation, children, comedy, fantasy]","[tomhanks, timallen, donrickles]",['johnlasseter'],"led by woody, andy's toys live happily in his ...","[martialarts, jealousy, toy, friendship, bully...",70.91,13173.0,7.9,81.0,1995-10-30


### Overview setup

#### Remove ponctuation and stopwords

In [12]:
import string 
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

In [13]:
def remove_punctuation(x):
    for i in string.punctuation:
        x = x.replace(i, '')
    return x

In [14]:
def stop_words(x):
    stop_words = set(stopwords.words('english'))
    word_tokens = word_tokenize(x)
    x = [w for w in word_tokens if not w in stop_words] 
  
    return x

In [15]:
df['title_clean'] = df['title_clean'].apply(remove_punctuation)
df['title_clean'] = df['title_clean'].apply(stop_words)
df['overview'] = df['overview'].apply(remove_punctuation)
df['overview'] = df['overview'].apply(stop_words)
df['directors'] = df['directors'].apply(remove_punctuation)
df['directors'] = df['directors'].apply(stop_words)

In [16]:
def to_string(x):
    x = ' '.join(x)
    return x

In [17]:
#Tudão Bolado
df['soup_1'] = (df['genres'] + df['actors'] + df['keywords'] + df['overview'] + df['directors'] + df['title_clean']).map(to_string)

#Sem title
df['soup_2'] = (df['genres'] + df['actors'] + df['keywords'] + df['overview'] + df['directors']).map(to_string)

#Sem overview e title
df['soup_3'] = (df['genres'] + df['actors'] + df['keywords'] + df['directors']).map(to_string)

#Sem Key words
df['soup_4'] = (df['genres'] + df['actors'] + df['overview'] + df['directors'] + df['title_clean']).map(to_string)

#Sem Key words e Title
df['soup_5'] = (df['genres'] + df['actors'] + df['overview'] + df['directors']).map(to_string)

#Sem Actors e Directors e titulo
df['soup_6'] = (df['genres'] + df['keywords'] + df['overview']).map(to_string)

#Genres e KW
df['soup_7'] = (df['genres'] + df['keywords']).map(to_string)

#Genres e Overview
df['soup_8'] = (df['genres'] + df['overview']).map(to_string)

In [18]:
df.head(2)

Unnamed: 0,movieId,tmdbId,title,title_clean,genres,actors,directors,overview,keywords,popularity,...,runtime,release,soup_1,soup_2,soup_3,soup_4,soup_5,soup_6,soup_7,soup_8
0,1.0,862,Toy Story (1995),"[toy, story]","[adventure, animation, children, comedy, fantasy]","[tomhanks, timallen, donrickles]",[johnlasseter],"[led, woody, andys, toys, live, happily, room,...","[martialarts, jealousy, toy, friendship, bully...",70.91,...,81.0,1995-10-30,adventure animation children comedy fantasy to...,adventure animation children comedy fantasy to...,adventure animation children comedy fantasy to...,adventure animation children comedy fantasy to...,adventure animation children comedy fantasy to...,adventure animation children comedy fantasy ma...,adventure animation children comedy fantasy ma...,adventure animation children comedy fantasy le...
1,2.0,8844,Jumanji (1995),[jumanji],"[adventure, children, fantasy]","[robinwilliams, jonathanhyde, kirstendunst]",[joejohnston],"[siblings, judy, peter, discover, enchanted, b...","[giantinsect, boardgame, jungle, disappearance...",11.919,...,104.0,1995-12-15,adventure children fantasy robinwilliams jonat...,adventure children fantasy robinwilliams jonat...,adventure children fantasy robinwilliams jonat...,adventure children fantasy robinwilliams jonat...,adventure children fantasy robinwilliams jonat...,adventure children fantasy giantinsect boardga...,adventure children fantasy giantinsect boardga...,adventure children fantasy siblings judy peter...


In [19]:
df.iloc[0].soup_1

'adventure animation children comedy fantasy tomhanks timallen donrickles martialarts jealousy toy friendship bullying elementaryschool friends rivalry rescue mission walkietalkie boynextdoor newtoy neighborhood toycomestolife resourcefulness led woody andys toys live happily room andys birthday brings buzz lightyear onto scene afraid losing place andys heart woody plots buzz circumstances separate buzz woody owner duo eventually learns put aside differences johnlasseter toy story'

In [20]:
# Import CountVectorizer and create the count matrix
from sklearn.feature_extraction.text import CountVectorizer

count1 = CountVectorizer()
count_matrix1 = count1.fit_transform(df['soup_1'])

count2 = CountVectorizer()
count_matrix2 = count2.fit_transform(df['soup_2'])

count3 = CountVectorizer()
count_matrix3 = count3.fit_transform(df['soup_3'])

count4 = CountVectorizer()
count_matrix4 = count4.fit_transform(df['soup_4'])

count5 = CountVectorizer()
count_matrix5 = count5.fit_transform(df['soup_5'])

count6 = CountVectorizer()
count_matrix6 = count6.fit_transform(df['soup_6'])

count7 = CountVectorizer()
count_matrix7 = count7.fit_transform(df['soup_7'])

count8 = CountVectorizer()
count_matrix8 = count8.fit_transform(df['soup_8'])

In [21]:
# Compute the Cosine Similarity matrix based on the count_matrix
from sklearn.metrics.pairwise import cosine_similarity

cosine_sim1 = cosine_similarity(count_matrix1, count_matrix1)
cosine_sim2 = cosine_similarity(count_matrix2, count_matrix2)
cosine_sim3 = cosine_similarity(count_matrix3, count_matrix3)
cosine_sim4 = cosine_similarity(count_matrix4, count_matrix4)
# cosine_sim5 = cosine_similarity(count_matrix5, count_matrix5)
# cosine_sim6 = cosine_similarity(count_matrix6, count_matrix6)
# cosine_sim7 = cosine_similarity(count_matrix7, count_matrix7)
# cosine_sim8 = cosine_similarity(count_matrix8, count_matrix8)

In [22]:
cosine_sim1.shape

(3499, 3499)

In [23]:
df2 = df.copy()

In [24]:
# Reset index of our main DataFrame and construct reverse mapping as before
df2 = df2.reset_index()
indices = pd.Series(df2.index, index=df2['title'])

In [25]:
indices

title
Toy Story (1995)                0
Jumanji (1995)                  1
Heat (1995)                     2
GoldenEye (1995)                3
Balto (1995)                    4
                             ... 
Countdown (2019)             3494
Let It Snow (2019)           3495
Midway (2019)                3496
Lady and the Tramp (2019)    3497
Klaus (2019)                 3498
Length: 3499, dtype: int64

In [26]:
sim_scores = list(enumerate(cosine_sim1[0]))
sim_scores

[(0, 0.9999999999999991),
 (1, 0.05773502691896257),
 (2, 0.0),
 (3, 0.013085598064755342),
 (4, 0.060241449667687415),
 (5, 0.013085598064755342),
 (6, 0.0),
 (7, 0.05175491695067656),
 (8, 0.015504341823651057),
 (9, 0.03405574568898749),
 (10, 0.024999999999999998),
 (11, 0.01507556722888818),
 (12, 0.0),
 (13, 0.0),
 (14, 0.044721359549995794),
 (15, 0.01419904585617662),
 (16, 0.03627381250550058),
 (17, 0.014085904245475274),
 (18, 0.03773659540699707),
 (19, 0.0),
 (20, 0.032274861218395144),
 (21, 0.026537244621713762),
 (22, 0.025993762245501817),
 (23, 0.021926450482675733),
 (24, 0.01386750490563073),
 (25, 0.025482359571881275),
 (26, 0.0291111254869791),
 (27, 0.016137430609197572),
 (28, 0.0),
 (29, 0.01455556274348955),
 (30, 0.0),
 (31, 0.060241449667687415),
 (32, 0.013459547551454136),
 (33, 0.01889822365046136),
 (34, 0.0654279903237767),
 (35, 0.03388154635894692),
 (36, 0.05212860351426869),
 (37, 0.0),
 (38, 0.0),
 (39, 0.0),
 (40, 0.0),
 (41, 0.024544034683690798

In [27]:
# Function that takes in movie title as input and outputs most similar movies
def get_recommendations(title, cosine_sim):
    
    # idx Get the index of the movie that matches the title
    idx = indices[title]

    # Sim_scores creates a list of all movies and the cosine similarity related to the movie selected in 'title'
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # Sort the movies based on the similarity scores
    # Note: Key is the field that the sort fuction will use to do the sort (position 1 of the tuple)
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the X most similar movies (position 0 will be the selected movie)
    sim_scores = sim_scores[1:30]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]
    
    # Get the movie cosine scores:
    cosine_scores = [i[1] for i in sim_scores]
    
    # Return the top 10 most similar movies
    #return df2['title'].iloc[movie_indices]
    a=pd.DataFrame(df2[['genres','title','actors','directors','vote_average','popularity']].iloc[movie_indices]).reset_index(drop=True)
    b=pd.DataFrame(cosine_scores, columns=['cosine_score'])
    
    return pd.concat([a,b], axis = 1).set_index('title')

In [28]:
pd.DataFrame(df2[['title','vote_average','popularity']].iloc[0]).transpose()

Unnamed: 0,title,vote_average,popularity
0,Toy Story (1995),7.9,70.91


In [29]:
df2.keys()

Index(['index', 'movieId', 'tmdbId', 'title', 'title_clean', 'genres',
       'actors', 'directors', 'overview', 'keywords', 'popularity',
       'vote_count', 'vote_average', 'runtime', 'release', 'soup_1', 'soup_2',
       'soup_3', 'soup_4', 'soup_5', 'soup_6', 'soup_7', 'soup_8'],
      dtype='object')

In [30]:
df2.shape

(3499, 23)

In [31]:
print('Everything stronger')
print('=============================================================')
get_recommendations('Dark Knight Rises, The (2012)', cosine_sim1)

Everything stronger


Unnamed: 0_level_0,genres,actors,directors,vote_average,popularity,cosine_score
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Dark Knight, The (2008)","[action, crime, drama, imax]","[christianbale, heathledger, michaelcaine]",[christophernolan],8.5,66.543,0.355161
Batman Begins (2005),"[action, crime, imax]","[christianbale, liamneeson, michaelcaine]",[christophernolan],7.7,49.706,0.308899
Batman Forever (1995),"[action, adventure, comedy, crime]","[valkilmer, tommyleejones, jimcarrey]",[joelschumacher],5.4,21.65,0.223353
Batman (1989),"[action, crime, thriller]","[jacknicholson, michaelkeaton, kimbasinger]",[timburton],7.2,43.047,0.21031
Batman & Robin (1997),"[action, adventure, fantasy, thriller]","[georgeclooney, arnoldschwarzenegger, chriso'd...",[joelschumacher],4.3,22.372,0.202597
Batman v Superman: Dawn of Justice (2016),"[action, adventure, fantasy, sci-fi]","[benaffleck, henrycavill, jesseeisenberg]",[zacksnyder],5.9,70.312,0.197939
Batman Returns (1992),"[action, crime]","[michaelkeaton, dannydevito, michellepfeiffer]",[timburton],6.8,27.871,0.196221
Teenage Mutant Ninja Turtles (2014),"[action, adventure, comedy]","[meganfox, willarnett, williamfichtner]",[jonathanliebesman],5.9,76.397,0.186591
"Mortal Instruments: City of Bones, The (2013)","[action, adventure, drama, imax]","[lilycollins, kevinzegers, jemimawest]",[haraldzwart],6.4,34.956,0.16855
Sin City (2005),"[action, crime, film-noir, mystery, thriller]","[brucewillis, jessicaalba, cliveowen]","[robertrodriguez, quentintarantino, frankmiller]",7.4,20.302,0.165823


In [32]:
print('Everything stronger')
print('=============================================================')
get_recommendations('Inglourious Basterds (2009)', cosine_sim1)

Everything stronger


Unnamed: 0_level_0,genres,actors,directors,vote_average,popularity,cosine_score
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dunkirk (2017),"[action, drama, thriller, war]","[fionnwhitehead, tomglynn-carney, aneurinbarnard]",[christophernolan],7.5,50.39,0.219469
Midway (2019),"[action, drama, war]","[edskrein, patrickwilson, woodyharrelson]",[rolandemmerich],7.1,39.889,0.186852
"Dirty Dozen, The (1967)","[action, drama, war]","[leemarvin, ernestborgnine, charlesbronson]",[robertaldrich],7.7,12.171,0.184588
Fury (2014),"[action, drama, war]","[bradpitt, shialabeouf, loganlerman]",[davidayer],7.5,66.98,0.181956
U-571 (2000),"[action, thriller, war]","[matthewmcconaughey, billpaxton, harveykeitel]",[jonathanmostow],6.4,22.193,0.177512
Unbroken (2014),"[drama, war]","[jacko'connell, alexrussell, domhnallgleeson]",[angelinajolie],7.5,25.874,0.174608
Schindler's List (1993),"[drama, war]","[liamneeson, benkingsley, ralphfiennes]",[stevenspielberg],8.6,29.169,0.173925
Allied (2016),"[action, drama, romance, thriller, war]","[bradpitt, marioncotillard, jaredharris]",[robertzemeckis],6.7,24.548,0.173925
Jojo Rabbit (2019),"[comedy, war]","[romangriffindavis, thomasinmckenzie, scarlett...",[taikawaititi],8.1,54.073,0.173205
"Great Escape, The (1963)","[action, adventure, drama, war]","[stevemcqueen, jamesgarner, richardattenborough]",[johnsturges],8.0,14.546,0.172853


In [33]:
print('Without title')
print('=============================================================')
get_recommendations('Dark Knight Rises, The (2012)', cosine_sim2)

Without title


Unnamed: 0_level_0,genres,actors,directors,vote_average,popularity,cosine_score
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Dark Knight, The (2008)","[action, crime, drama, imax]","[christianbale, heathledger, michaelcaine]",[christophernolan],8.5,66.543,0.326377
Batman Begins (2005),"[action, crime, imax]","[christianbale, liamneeson, michaelcaine]",[christophernolan],7.7,49.706,0.303615
Teenage Mutant Ninja Turtles (2014),"[action, adventure, comedy]","[meganfox, willarnett, williamfichtner]",[jonathanliebesman],5.9,76.397,0.212486
Batman (1989),"[action, crime, thriller]","[jacknicholson, michaelkeaton, kimbasinger]",[timburton],7.2,43.047,0.200232
Batman & Robin (1997),"[action, adventure, fantasy, thriller]","[georgeclooney, arnoldschwarzenegger, chriso'd...",[joelschumacher],4.3,22.372,0.196813
Batman Forever (1995),"[action, adventure, comedy, crime]","[valkilmer, tommyleejones, jimcarrey]",[joelschumacher],5.4,21.65,0.19356
Batman v Superman: Dawn of Justice (2016),"[action, adventure, fantasy, sci-fi]","[benaffleck, henrycavill, jesseeisenberg]",[zacksnyder],5.9,70.312,0.191655
Batman Returns (1992),"[action, crime]","[michaelkeaton, dannydevito, michellepfeiffer]",[timburton],6.8,27.871,0.190099
"Mortal Instruments: City of Bones, The (2013)","[action, adventure, drama, imax]","[lilycollins, kevinzegers, jemimawest]",[haraldzwart],6.4,34.956,0.145436
Star Wars: Episode VII - The Force Awakens (2015),"[action, adventure, fantasy, sci-fi, imax]","[harrisonford, markhamill, carriefisher]",[jjabrams],7.4,89.419,0.145191


In [34]:
print('Without title')
print('=============================================================')
get_recommendations('Catch Me If You Can (2002)', cosine_sim2)

Without title


Unnamed: 0_level_0,genres,actors,directors,vote_average,popularity,cosine_score
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
American Hustle (2013),"[crime, drama]","[christianbale, bradleycooper, jeremyrenner]",[davidorussell],6.8,17.433,0.2032
"X-Files: I Want to Believe, The (2008)","[drama, mystery, sci-fi, thriller]","[davidduchovny, gilliananderson, amandapeet]",[chriscarter],5.6,14.209,0.191237
Black Mass (2015),"[crime, drama]","[johnnydepp, joeledgerton, juliannenicholson]",[scottcooper],6.4,11.496,0.173205
Punisher: War Zone (2008),"[action, crime, drama, thriller]","[raystevenson, dominicwest, juliebenz]",[lexialexander],5.7,18.373,0.171791
Mississippi Burning (1988),"[crime, drama, thriller]","[genehackman, willemdafoe, francesmcdormand]",[alanparker],7.7,10.558,0.169031
Manhunter (1986),"[action, crime, drama, horror, thriller]","[williampetersen, kimgreist, dennisfarina]",[michaelmann],7.0,0.6,0.168655
Everybody's Fine (2009),[drama],"[robertdeniro, drewbarrymore, katebeckinsale]",[kirkjones],6.7,10.91,0.165703
Frailty (2001),"[crime, drama, thriller]","[billpaxton, matthewmcconaughey, powersboothe]",[billpaxton],7.0,14.017,0.161624
Miss Congeniality 2: Armed and Fabulous (2005),"[adventure, comedy, crime]","[sandrabullock, reginaking, enriquemurciano]",[johnpasquin],5.7,17.052,0.161624
Secret in Their Eyes (2015),"[crime, drama, mystery]","[chiwetelejiofor, nicolekidman, juliaroberts]",[billyray],6.4,18.31,0.160357


In [40]:
print('Everything except overview and title')
print('=============================================================')
get_recommendations('Dark Knight Rises, The (2012)', cosine_sim3)

Everything except overview and title


Unnamed: 0_level_0,genres,actors,directors,vote_average,popularity,cosine_score
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Dark Knight, The (2008)","[action, crime, drama, imax]","[christianbale, heathledger, michaelcaine]",[christophernolan],8.5,66.543,0.503953
Batman Begins (2005),"[action, crime, imax]","[christianbale, liamneeson, michaelcaine]",[christophernolan],7.7,49.706,0.466667
Superman Returns (2006),"[action, adventure, sci-fi, imax]","[brandonrouth, kevinspacey, katebosworth]",[bryansinger],5.6,24.329,0.267652
Man of Steel (2013),"[action, adventure, fantasy, sci-fi, imax]","[henrycavill, amyadams, michaelshannon]",[zacksnyder],6.5,55.745,0.267652
Kick-Ass 2 (2013),"[action, comedy, crime]","[aarontaylor-johnson, chloëgracemoretz, christ...",[jeffwadlow],6.4,29.675,0.258199
Thor: The Dark World (2013),"[action, adventure, fantasy, imax]","[chrishemsworth, natalieportman, tomhiddleston]",[alantaylor],6.6,46.641,0.25
Batman v Superman: Dawn of Justice (2016),"[action, adventure, fantasy, sci-fi]","[benaffleck, henrycavill, jesseeisenberg]",[zacksnyder],5.9,70.312,0.25
Star Wars: Episode VII - The Force Awakens (2015),"[action, adventure, fantasy, sci-fi, imax]","[harrisonford, markhamill, carriefisher]",[jjabrams],7.4,89.419,0.242536
Eagle Eye (2008),"[action, crime, thriller, imax]","[shialabeouf, michellemonaghan, rosariodawson]",[djcaruso],6.4,19.066,0.242536
Iron Man 2 (2010),"[action, adventure, sci-fi, thriller, imax]","[robertdowneyjr., gwynethpaltrow, doncheadle]",[jonfavreau],6.8,61.634,0.242536


In [36]:
print('Everything except overview and title')
print('=============================================================')
get_recommendations('Godfather, The (1972)', cosine_sim3)

Everything except overview and title


Unnamed: 0_level_0,genres,actors,directors,vote_average,popularity,cosine_score
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Godfather: Part II, The (1974)","[crime, drama]","[alpacino, robertduvall, dianekeaton]",[francisfordcoppola],8.6,36.06,0.400501
"Godfather: Part III, The (1990)","[crime, drama, mystery, thriller]","[alpacino, dianekeaton, taliashire]",[francisfordcoppola],7.4,28.596,0.350438
Scarface (1983),"[action, crime, drama]","[alpacino, stevenbauer, michellepfeiffer]",[briandepalma],8.2,44.93,0.256776
The Irishman (2019),"[crime, drama]","[robertdeniro, alpacino, joepesci]",[martinscorsese],7.7,46.494,0.243132
The Hate U Give (2018),"[crime, drama]","[amandlastenberg, reginahall, russellhornsby]",[georgetillmanjr],8.3,23.227,0.231455
Donnie Brasco (1997),"[crime, drama]","[johnnydepp, alpacino, michaelmadsen]",[mikenewell],7.5,14.776,0.218218
Live by Night (2017),"[crime, drama]","[benaffleck, ellefanning, brendangleeson]",[benaffleck],6.2,12.862,0.211702
Goodfellas (1990),"[crime, drama]","[rayliotta, robertdeniro, joepesci]",[martinscorsese],8.4,31.314,0.20261
All the Money in the World (2017),"[crime, drama, mystery, thriller]","[timothyhutton, christopherplummer, michellewi...",[ridleyscott],6.4,13.42,0.20025
Murder on the Orient Express (2017),"[crime, drama, mystery]","[kennethbranagh, penélopecruz, willemdafoe]",[kennethbranagh],6.7,21.539,0.199205


In [37]:
print('Everything except key words')
print('=============================================================')
get_recommendations('Dark Knight Rises, The (2012)', cosine_sim4)

Everything except key words


Unnamed: 0_level_0,genres,actors,directors,vote_average,popularity,cosine_score
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Dark Knight, The (2008)","[action, crime, drama, imax]","[christianbale, heathledger, michaelcaine]",[christophernolan],8.5,66.543,0.33758
Batman Begins (2005),"[action, crime, imax]","[christianbale, liamneeson, michaelcaine]",[christophernolan],7.7,49.706,0.281695
Batman Forever (1995),"[action, adventure, comedy, crime]","[valkilmer, tommyleejones, jimcarrey]",[joelschumacher],5.4,21.65,0.277735
Batman (1989),"[action, crime, thriller]","[jacknicholson, michaelkeaton, kimbasinger]",[timburton],7.2,43.047,0.209923
Batman Returns (1992),"[action, crime]","[michaelkeaton, dannydevito, michellepfeiffer]",[timburton],6.8,27.871,0.198479
"Mortal Instruments: City of Bones, The (2013)","[action, adventure, drama, imax]","[lilycollins, kevinzegers, jemimawest]",[haraldzwart],6.4,34.956,0.176643
Batman & Robin (1997),"[action, adventure, fantasy, thriller]","[georgeclooney, arnoldschwarzenegger, chriso'd...",[joelschumacher],4.3,22.372,0.175412
Unfriended: Dark Web (2018),[horror],"[colinwoodell, bettygabriel, rebeccarittenhouse]",[stephensusco],6.3,19.648,0.164591
Batman v Superman: Dawn of Justice (2016),"[action, adventure, fantasy, sci-fi]","[benaffleck, henrycavill, jesseeisenberg]",[zacksnyder],5.9,70.312,0.164288
Sin City (2005),"[action, crime, film-noir, mystery, thriller]","[brucewillis, jessicaalba, cliveowen]","[robertrodriguez, quentintarantino, frankmiller]",7.4,20.302,0.15881


In [38]:
print('Everything except key words')
print('=============================================================')
get_recommendations('Catch Me If You Can (2002)', cosine_sim4)

Everything except key words


Unnamed: 0_level_0,genres,actors,directors,vote_average,popularity,cosine_score
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
The Irishman (2019),"[crime, drama]","[robertdeniro, alpacino, joepesci]",[martinscorsese],7.7,46.494,0.190885
Everybody's Fine (2009),[drama],"[robertdeniro, drewbarrymore, katebeckinsale]",[kirkjones],6.7,10.91,0.181164
Frank (2014),"[comedy, drama, mystery]","[domhnallgleeson, maggiegyllenhaal, scootmcnairy]",[lennyabrahamson],6.9,7.869,0.16343
Robot & Frank (2012),"[comedy, drama, sci-fi]","[franklangella, livtyler, jamesmarsden]",[jakeschreier],6.9,10.191,0.159333
Frailty (2001),"[crime, drama, thriller]","[billpaxton, matthewmcconaughey, powersboothe]",[billpaxton],7.0,14.017,0.157019
"Iceman, The (2012)","[crime, drama, thriller]","[michaelshannon, winonaryder, rayliotta]",[arielvromen],6.4,15.097,0.155347
"Punisher, The (2004)","[action, crime, thriller]","[thomasjane, johntravolta, willpatton]",[jonathanhensleigh],6.2,31.4,0.154083
Manhunter (1986),"[action, crime, drama, horror, thriller]","[williampetersen, kimgreist, dennisfarina]",[michaelmann],7.0,0.6,0.148675
Hachiko: A Dog's Story (a.k.a. Hachi: A Dog's Tale) (2009),[drama],"[richardgere, joanallen, sarahroemer]",[lassehallström],8.0,28.831,0.148675
Black Mass (2015),"[crime, drama]","[johnnydepp, joeledgerton, juliannenicholson]",[scottcooper],6.4,11.496,0.148675


In [39]:
print('Everything except key words and title')
print('=============================================================')
get_recommendations('Dark Knight Rises, The (2012)', cosine_sim5)

Everything except key words and title


NameError: name 'cosine_sim5' is not defined

In [None]:
print('Everything except key words and title')
print('=============================================================')
get_recommendations('Catch Me If You Can (2002)', cosine_sim5)

In [None]:
print('Everything except actors and directors')
print('=============================================================')
get_recommendations('Dark Knight Rises, The (2012)', cosine_sim6)

In [None]:
print('Everything except actors and directors')
print('=============================================================')
get_recommendations('Catch Me If You Can (2002)', cosine_sim6)

In [None]:
print('Only Genres and Keywords')
print('=============================================================')
get_recommendations('Inglourious Basterds (2009)', cosine_sim7)

In [None]:
print('Only Genres and Keywords')
print('=============================================================')
get_recommendations('Catch Me If You Can (2002)', cosine_sim7)

In [None]:
print('Only Genres and Overview')
print('=============================================================')
get_recommendations('Dark Knight Rises, The (2012)', cosine_sim8)

In [None]:
print('Only Genres and Overview')
print('=============================================================')
get_recommendations('Inglourious Basterds (2009)', cosine_sim8)