# Systeme de recommendation

# Simple recommenders

In [1]:
# Import Pandas
import pandas as pd

# Load Movies Metadata
metadata = pd.read_csv('movies_metadata.csv', low_memory=False)

# Print the first three rows
metadata.head(3)

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0


In [2]:
#metadata.info()
metadata.columns
#metadata.index

Index(['adult', 'belongs_to_collection', 'budget', 'genres', 'homepage', 'id',
       'imdb_id', 'original_language', 'original_title', 'overview',
       'popularity', 'poster_path', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title', 'video',
       'vote_average', 'vote_count'],
      dtype='object')

W e i gh t e dR a t i n g( W R ) = ((v/(v + m))⋅ R ) + ((m/(v + m))⋅ C )
Dans l'équation ci-dessus,

- v est le nombre de votes pour le film;

- m est le nombre minimum de votes requis pour figurer dans le tableau;

- R est la note moyenne du film;

- C est le vote moyen sur l'ensemble du rapport.

Vous disposez déjà des valeurs v (vote_count)et R (vote_average)pour chaque film du jeu de données. Il est également possible de calculer directement C à partir de ces données.



In [3]:
# Calculate mean of vote average column
C = metadata['vote_average'].mean()
print(C)

5.618207215134185


In [4]:
# Calculate the minimum number of votes required to be in the chart, m
m = metadata['vote_count'].quantile(0.90)
print(m)

160.0


In [5]:
# Filter out all qualified movies into a new DataFrame
q_movies = metadata.copy().loc[metadata['vote_count'] >= m]
q_movies.shape

(4555, 24)

In [6]:
metadata.shape

(45466, 24)

In [7]:
# Function that computes the weighted rating of each movie
def weighted_rating(x, m=m, C=C):
    v = x['vote_count']
    R = x['vote_average']
    # Calculation based on the IMDB formula
    return (v/(v+m) * R) + (m/(m+v) * C)

In [8]:
# Define a new feature 'score' and calculate its value with `weighted_rating()`
q_movies['score'] = q_movies.apply(weighted_rating, axis=1)

In [9]:
#Sort movies based on score calculated above
q_movies = q_movies.sort_values('score', ascending=False)

#Print the top 15 movies
q_movies[['title', 'vote_count', 'vote_average', 'score']].head(20)

Unnamed: 0,title,vote_count,vote_average,score
314,The Shawshank Redemption,8358.0,8.5,8.445869
834,The Godfather,6024.0,8.5,8.425439
10309,Dilwale Dulhania Le Jayenge,661.0,9.1,8.421453
12481,The Dark Knight,12269.0,8.3,8.265477
2843,Fight Club,9678.0,8.3,8.256385
292,Pulp Fiction,8670.0,8.3,8.251406
522,Schindler's List,4436.0,8.3,8.206639
23673,Whiplash,4376.0,8.3,8.205404
5481,Spirited Away,3968.0,8.3,8.196055
2211,Life Is Beautiful,3643.0,8.3,8.187171


# Content-Based Recommender

In [10]:
#Print plot overviews of the first 5 movies.
metadata['overview'].head()


0    Led by Woody, Andy's toys live happily in his ...
1    When siblings Judy and Peter discover an encha...
2    A family wedding reignites the ancient feud be...
3    Cheated on, mistreated and stepped on, the wom...
4    Just when George Banks has recovered from his ...
Name: overview, dtype: object

In [11]:
#Import TfIdfVectorizer from scikit-learn
from sklearn.feature_extraction.text import TfidfVectorizer

#Define a TF-IDF Vectorizer Object. Remove all english stop words such as 'the', 'a'
tfidf = TfidfVectorizer(stop_words='english')

#Replace NaN with an empty string
metadata['overview'] = metadata['overview'].fillna('')

#Construct the required TF-IDF matrix by fitting and transforming the data
tfidf_matrix = tfidf.fit_transform(metadata['overview'])

#Output the shape of tfidf_matrix
tfidf_matrix.shape

(45466, 75827)

In [12]:
#Array mapping from feature integer indices to feature name.
tfidf.get_feature_names()[5000:5010]

['avails',
 'avaks',
 'avalanche',
 'avalanches',
 'avallone',
 'avalon',
 'avant',
 'avanthika',
 'avanti',
 'avaracious']

In [13]:
tfidf.get_feature_names()[10000:10010]
print(type(tfidf))
print(type(tfidf_matrix))

<class 'sklearn.feature_extraction.text.TfidfVectorizer'>
<class 'scipy.sparse.csr.csr_matrix'>


In [14]:
print(tfidf_matrix[18252])

  (0, 48535)	0.18049429545229065
  (0, 5738)	0.19585698339220603
  (0, 55759)	0.13584863622917445
  (0, 23549)	0.15706441466021728
  (0, 8762)	0.16113959761837482
  (0, 37195)	0.15587999618007012
  (0, 59357)	0.18272563914280895
  (0, 4540)	0.14218310171712914
  (0, 55594)	0.13049860729111584
  (0, 55828)	0.17352792407794762
  (0, 66498)	0.12046380389722414
  (0, 64285)	0.1465502629835388
  (0, 17064)	0.1231203821914654
  (0, 6214)	0.2975632133481142
  (0, 17032)	0.37635127884449665
  (0, 28939)	0.14958787836314244
  (0, 18457)	0.12810093658637228
  (0, 27103)	0.3166557021739029
  (0, 20934)	0.11076875808479081
  (0, 24163)	0.1031405544728993
  (0, 51321)	0.08428187140453369
  (0, 36331)	0.14032690939902004
  (0, 31025)	0.13687291406658086
  (0, 71380)	0.1476331019965685
  (0, 37769)	0.10096447720065604
  (0, 21013)	0.11296034410021596
  (0, 4767)	0.2529050947195973
  (0, 15076)	0.12363661373028578
  (0, 12528)	0.1585323542008791
  (0, 37773)	0.08944992162800533
  (0, 16327)	0.07863298

In [15]:
print("le nombre de mots dans tout le data = {} mots".format(len(tfidf.get_feature_names())))

le nombre de mots dans tout le data = 75827 mots


on va utiliser **cosine similarity** pour calculer une quantité numérique indiquant la similitude entre deux films, car il est indépendant de la magnitude et est relativement facile et rapide à calculer (en particulier lorsqu'il est utilisé en conjonction avec les scores TF-IDF)  

![equation](https://res.cloudinary.com/dyd911kmh/image/upload/f_auto,q_auto:best/v1590782185/cos_aalkpq.png)  


on va sklearn's linear_kernel()au lieu de cosine_similarities()car il est plus rapide.  


chaque film sera un vecteur de colonne 1x45466 où chaque colonne sera un score de similitude avec chaque film

## Utiliser cosine_similarity pour faire le produit d'une matrice et d'un vecteur

In [16]:
# Import linear_kernel
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity

# Compute the cosine similarity matrix
#cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix) à ne pas executer car plante le noyau

# avec l'argument "dense_output=False" ça passe mais sur google colab car il nous transforme la matrice dense
# en matrice sparce(creuse)
#cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix, dense_output=False)

# une autre façcon de calculer la 
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix[18252])
#on le fait pour avoir une dimension de(45466,45466) au lieu de(45466,75827)qui(nbfilm,nbmots)
print(cosine_sim.shape)



(45466, 1)


In [17]:
print(cosine_sim) 
print(type(cosine_sim))

[[0.        ]
 [0.00599988]
 [0.        ]
 ...
 [0.        ]
 [0.00642247]
 [0.00746305]]
<class 'numpy.ndarray'>


In [18]:
# affiche la première ligne de la matrice cosine_sim
import numpy as np

cosine_sim[1]
#print(cosine_sim[0:4,0:2])
#fff = np.array(cosine_sim[0:4,0:2])
#print(fff)
# on obtient 
#(array([0.01504121, 1.        , 0.04681953, ..., 0.        , 0.02198641, 0.00929411])

array([0.00599988])

On va définir une fonction qui prend un titre de film comme entrée et génère une liste des 10 films les plus similaires.  

Tout d'abord, pour cela, on a besoin d'un mappage inversé des titres de films et des indices DataFrame.  
En d'autres termes, on a besoin d'un mécanisme pour identifier l'index d'un film dans votre metadataDataFrame, compte tenu de son titre.

pd.Series "Création d'une série, c'est à dire d'un vecteur de valeurs"

In [19]:
#Construct a reverse map of indices and movie titles
indices = pd.Series(metadata.index, index=metadata['title']).drop_duplicates()

indices[:10]
indices['The Dark Knight Rises']

18252

on définir une fonction de recommandation. Voici les étapes que vous suivrez:

Obtenez l'index du film en fonction de son titre.

Obtenez la liste des scores de similarité cosinus pour ce film particulier avec tous les films. 

Convertissez-le en une liste de tuples où le premier élément est sa position et le second est le score de similarité.

Trier la liste susmentionnée de tuples en fonction des scores de similitude; c'est-à-dire le deuxième élément.

Obtenez les 10 principaux éléments de cette liste. Ignorez le premier élément car il se réfère à soi (le film le plus similaire à un film particulier est le film lui-même).

Renvoie les titres correspondant aux indices des éléments supérieurs.



# Quelle est la complexité en mémoire de cette opération ?   

La complexité en temps compte le nombre d'étapes de calcul. Il y a plusieurs façons de définir ces étapes, par exemple le nombre d'opérations dans une machine RAM1, ou des mesures plus théoriques comme le nombre de comparaisons dans le cas d'un algorithme de tri ou le nombre de pas d'une machine de Turing.  

Ici on a un tableau de dimension 45000 x 45000

In [20]:
dim = 45000 * 45000
espace = (dim * 8 )/ 100000000#8 octet
print("espace minimum pour pouvoir faire ce calcul = {} en Gbit".format(espace)) 

espace minimum pour pouvoir faire ce calcul = 162.0 en Gbit


# **get_recommandations simple**

In [21]:
from time import time


# Function that takes in movie title as input and outputs most similar movies
def get_recommendations(title):
    start = time()
    # Get the index of the movie that matches the title
    idx = indices[title]
    #print("idx=",[idx.head(5)])

    #calculer cosine_similarity (vecteur * matrice)
    cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix[idx])

    # Get the pairwsie similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:11]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    t1 = time()-start
    t1 = round(t1,3)
    print("Time = {} secondes".format(t1))
    # Return the top 10 most similar movies
    return movie_indices, metadata['title'].iloc[movie_indices]#,print(idx.head(5))


In [22]:
get_recommendations('The Dark Knight Rises')
#get_recommendations('Sabrina')


Time = 2.025 secondes


([12481, 150, 1328, 15511, 585, 21194, 9230, 18035, 19792, 3095],
 12481                                      The Dark Knight
 150                                         Batman Forever
 1328                                        Batman Returns
 15511                           Batman: Under the Red Hood
 585                                                 Batman
 21194    Batman Unmasked: The Psychology of the Dark Kn...
 9230                    Batman Beyond: Return of the Joker
 18035                                     Batman: Year One
 19792              Batman: The Dark Knight Returns, Part 1
 3095                          Batman: Mask of the Phantasm
 Name: title, dtype: object)

In [23]:
get_recommendations('The Godfather')

Time = 3.015 secondes


([1178, 44030, 1914, 23126, 11297, 34717, 10821, 38030, 17729, 26293],
 1178               The Godfather: Part II
 44030    The Godfather Trilogy: 1972-1990
 1914              The Godfather: Part III
 23126                          Blood Ties
 11297                    Household Saints
 34717                   Start Liquidation
 10821                            Election
 38030            A Mother Should Be Loved
 17729                   Short Sharp Shock
 26293                  Beck 28 - Familjen
 Name: title, dtype: object)

# **get_recommandations with PCA(TruncatedSVD)**

In [24]:

from sklearn.decomposition import PCA
from sklearn.decomposition import TruncatedSVD

# faire la pca avant avec n_components=500 pour tester
clf = TruncatedSVD(500)
X_pca = clf.fit_transform(tfidf_matrix)



In [25]:

print(X_pca.shape)

(45466, 500)


In [26]:
X_pca_shape = X_pca.reshape(-1,1)
X_pca_i_shape = (X_pca[18252]).reshape(1,-1)

print(len(X_pca[18252]))
print(len(X_pca))

500
45466


In [27]:
cosine_sim = cosine_similarity(X_pca ,X_pca_i_shape)
print(cosine_sim.shape)
print(cosine_sim)

(45466, 1)
[[-0.03212949]
 [ 0.01733888]
 [ 0.00034179]
 ...
 [-0.0184195 ]
 [ 0.04890582]
 [ 0.03220154]]


In [28]:
from sklearn.decomposition import PCA
from sklearn.decomposition import TruncatedSVD
from time import time


# Function that takes in movie title as input and outputs most similar movies
def get_recommendations_PCA(title):#, cosine_sim=cosine_sim):
    start = time()
    # Get the index of the movie that matches the title
    idx = indices[title]

    # faire la pca avant avec TruncatedSVD avec n_components=10000
    clf = TruncatedSVD(1000)
    X_pca = clf.fit_transform(tfidf_matrix)

    # if faut faire un reshape(1,-1) sur le vecteur
    X_pca_i_shape = (X_pca[idx]).reshape(1,-1)

    #calculer cosine_similarity (vecteur * matrice)
    cosine_sim = cosine_similarity(X_pca, X_pca_i_shape)

    # Get the pairwsie similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:11]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]
    #movie_indices = sim_scores
    #print("movive_indices",movie_indices)

    # Return the top 10 most similar movies
    #print("idx =",idx.head(5))
    t1 = time()-start
    t1 = round(t1,3)
    print("Time = {} secondes".format(t1))
    print("Movies_indices = ", movie_indices)
    return metadata['title'].iloc[movie_indices]#,print(idx.head(5))


In [29]:
get_recommendations_PCA('The Dark Knight Rises')

Time = 361.908 secondes
Movies_indices =  [150, 11753, 585, 12481, 15511, 13838, 27521, 29881, 18035, 3267]


150                  Batman Forever
11753                     Slow Burn
585                          Batman
12481               The Dark Knight
15511    Batman: Under the Red Hood
13838     The File on Thelma Jordon
27521                Rage of Angels
29881                       Accused
18035              Batman: Year One
3267                            JFK
Name: title, dtype: object

In [29]:
get_recommendations_PCA('The Godfather')

Time = 95.884 secondes
Movies_indices =  [44030, 28163, 1178, 31973, 15716, 26293, 30487, 11297, 37934, 29]


44030                     The Godfather Trilogy: 1972-1990
28163    Eurocrime! The Italian Cop and Gangster Films ...
1178                                The Godfather: Part II
31973                                     Honor Thy Father
15716                                         Urban Menace
26293                                   Beck 28 - Familjen
30487                       Bonnie and Clyde Italian Style
11297                                     Household Saints
37934                                  Johnny Stool Pigeon
29                                          Shanghai Triad
Name: title, dtype: object

# **get_recomandations with "popularity filter"**

In [30]:
from time import time


# Function that takes in movie title as input and outputs most similar movies
def get_recommendations_popularity_filter(title):#, cosine_sim=cosine_sim):
    start = time()
    # Get the index of the movie that matches the title
    idx = indices[title]


    #calculer cosine_similarity (vecteur * matrice)
    cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix[idx])

    # Get the pairwsie similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:11]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    # convert metadata.popularity to float
    metadata.popularity = pd.to_numeric(metadata.popularity,errors='coerce')
    
    t1 = time()-start
    t1 = round(t1,3)
    print("Time = {} secondes".format(t1))
    print("Movies_indices = ", movie_indices)
    # Return the top 10 most similar movies
    return metadata[['title', 'popularity']].iloc[movie_indices].sort_values(by= 'popularity', ascending=False)


In [31]:
get_recommendations_popularity_filter('The Dark Knight Rises')

Time = 1.599 secondes
Movies_indices =  [12481, 150, 1328, 15511, 585, 21194, 9230, 18035, 19792, 3095]


Unnamed: 0,title,popularity
12481,The Dark Knight,123.167259
585,Batman,19.10673
1328,Batman Returns,15.001681
150,Batman Forever,13.321354
19792,"Batman: The Dark Knight Returns, Part 1",12.107182
9230,Batman Beyond: Return of the Joker,7.872438
3095,Batman: Mask of the Phantasm,7.29114
15511,Batman: Under the Red Hood,7.039325
18035,Batman: Year One,5.031094
21194,Batman Unmasked: The Psychology of the Dark Kn...,1.084614


In [32]:
get_recommendations_popularity_filter('The Godfather')

Time = 1.449 secondes
Movies_indices =  [1178, 44030, 1914, 23126, 11297, 34717, 10821, 38030, 17729, 26293]


Unnamed: 0,title,popularity
1178,The Godfather: Part II,36.629307
1914,The Godfather: Part III,17.185349
23126,Blood Ties,14.934555
10821,Election,3.222361
17729,Short Sharp Shock,1.463414
26293,Beck 28 - Familjen,0.956873
34717,Start Liquidation,0.623522
38030,A Mother Should Be Loved,0.095234
11297,Household Saints,0.055173
44030,The Godfather Trilogy: 1972-1990,0.001429


# **get_recommandation with score**

In [33]:
from time import time


# Function that takes in movie title as input and outputs most similar movies
def get_recommendations_score(title):#, cosine_sim=cosine_sim):
    start = time()
    # Get the index of the movie that matches the title
    idx = indices[title]


    #calculer cosine_similarity (vecteur * matrice)
    cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix[idx])

    # Get the pairwsie similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:11]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    # convert metadata.popularity to float
    #metadata.popularity = pd.to_numeric(metadata.popularity,errors='coerce')
    metadata['scores'] = metadata.apply(weighted_rating, axis=1)
    
    t1 = time()-start
    t1 = round(t1,3)
    print("Time = {} secondes".format(t1))
    print("Movies_indices = ", movie_indices)
    # Return the top 10 most similar movies
    df = metadata[['title', 'scores']].iloc[movie_indices].sort_values(by= 'scores', ascending=False)
    print("suggestion : {}".format(df.iloc[1]))
    return df


In [34]:
get_recommendations_score('The Dark Knight Rises')

Time = 4.395 secondes
Movies_indices =  [12481, 150, 1328, 15511, 585, 21194, 9230, 18035, 19792, 3095]
suggestion : title     Batman: The Dark Knight Returns, Part 1
scores                                    7.11564
Name: 19792, dtype: object


Unnamed: 0,title,scores
12481,The Dark Knight,8.265477
19792,"Batman: The Dark Knight Returns, Part 1",7.115637
15511,Batman: Under the Red Hood,7.087743
585,Batman,6.904084
3095,Batman: Mask of the Phantasm,6.645802
9230,Batman Beyond: Return of the Joker,6.534978
18035,Batman: Year One,6.528706
1328,Batman Returns,6.515816
21194,Batman Unmasked: The Psychology of the Dark Kn...,5.846967
150,Batman Forever,5.239617


# **get_recommandation with suggestion**

In [35]:
indices.index.isnull().any()
indices.index

Index([                  'Toy Story',                     'Jumanji',
                  'Grumpier Old Men',           'Waiting to Exhale',
       'Father of the Bride Part II',                        'Heat',
                           'Sabrina',                'Tom and Huck',
                      'Sudden Death',                   'GoldenEye',
       ...
                  'House of Horrors',   'Shadow of the Blair Witch',
               'The Burkittsville 7',             'Caged Heat 3000',
                        'Robin Hood',                      'Subdue',
               'Century of Birthing',                    'Betrayal',
                  'Satan Triumphant',                    'Queerama'],
      dtype='object', name='title', length=45466)

In [57]:
from time import time


# Function that takes in movie title as input and outputs most similar movies
def get_recommendations_suggestion(word):#, cosine_sim=cosine_sim):
    start = time()
    # Get the index of the movie that matches the title
    #idx = indices[title]
    idx = []
    for k,v in indices.items():
        if word in str(k):
            idx.append(v)
    for j in idx:

        #calculer cosine_similarity (vecteur * matrice)
        cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix[j])

        # Get the pairwsie similarity scores of all movies with that movie
        sim_scores = list(enumerate(cosine_sim))

        # Sort the movies based on the similarity scores
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

        # Get the scores of the 10 most similar movies
        sim_scores = sim_scores[1:11]

        # Get the movie indices
        movie_indices = [i[0] for i in sim_scores]

        # convert metadata.popularity to float
        #metadata.popularity = pd.to_numeric(metadata.popularity,errors='coerce')
        #metadata['scores'] = metadata.apply(weighted_rating, axis=1)
    
    t1 = time()-start
    t1 = round(t1,3)
    print("Time = {} secondes".format(t1))
    print("Movies_indices = ", movie_indices)
    # Return the top 10 most similar movies
    df = metadata[['title']].iloc[movie_indices]#.sort_values(by= 'scores', ascending=False)
    print("suggestion : {}".format(df.iloc[1]))
    return df

In [58]:
get_recommendations_suggestion('Batman')

Time = 53.474 secondes
Movies_indices =  [41982, 19792, 3095, 35983, 10122, 9230, 15511, 150, 1328, 21194]
suggestion : title    Batman: The Dark Knight Returns, Part 1
Name: 19792, dtype: object


Unnamed: 0,title
41982,Batman Beyond Darwyn Cooke's Batman 75th Anniv...
19792,"Batman: The Dark Knight Returns, Part 1"
3095,Batman: Mask of the Phantasm
35983,Batman: Bad Blood
10122,Batman Begins
9230,Batman Beyond: Return of the Joker
15511,Batman: Under the Red Hood
150,Batman Forever
1328,Batman Returns
21194,Batman Unmasked: The Psychology of the Dark Kn...


In [59]:
get_recommendations_suggestion('Psycho')

Time = 36.349 secondes
Movies_indices =  [37605, 41624, 44048, 7520, 578, 32989, 44182, 4272, 37735, 16808]
suggestion : title    Kevorkian
Name: 41624, dtype: object


Unnamed: 0,title
37605,The Stalker
41624,Kevorkian
44048,Simon's Cat
7520,After the Rehearsal
578,The Brady Bunch Movie
32989,Seven Dead in the Cat's Eye
44182,Cat in the Brain
4272,Donovan's Brain
37735,The Lure
16808,She and Her Cat: Their Standing Points
