In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/movieratingsbyusers/ratings_user-item.csv
/kaggle/input/movieratingsbyusers/movies_user-item.csv
/kaggle/input/movieratingsbyusers/tagsngenres.csv


# Content-Based Filtering


In content-based filtering we are going to compare the contents of films with eachother; such as the genre of films, the directors, film casts,... and after that, we can build a model that recommend similar films base on the current film.

In [2]:
#reading the data we need for content-based filtering
tag_gnr_df = pd.read_csv('../input/movieratingsbyusers/tagsngenres.csv')
tag_gnr_df

Unnamed: 0,movieId,title,genres,tags
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,pixar
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,pixar
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,fun
3,2,Jumanji (1995),Adventure|Children|Fantasy,fantasy
4,2,Jumanji (1995),Adventure|Children|Fantasy,magic board game
...,...,...,...,...
3678,187595,Solo: A Star Wars Story (2018),Action|Adventure|Children|Sci-Fi,star wars
3679,193565,Gintama: The Movie (2010),Action|Animation|Comedy|Sci-Fi,anime
3680,193565,Gintama: The Movie (2010),Action|Animation|Comedy|Sci-Fi,comedy
3681,193565,Gintama: The Movie (2010),Action|Animation|Comedy|Sci-Fi,gintama


As we can see, there is duplicated records; so we must drop them and reset the indices of data

In [3]:
#drop duplicated records and reseting the indices after deleting some rows
tag_gnr_df = tag_gnr_df.drop_duplicates()
tag_gnr_df = tag_gnr_df.reset_index()
tag_gnr_df

Unnamed: 0,index,movieId,title,genres,tags
0,0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,pixar
1,2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,fun
2,3,2,Jumanji (1995),Adventure|Children|Fantasy,fantasy
3,4,2,Jumanji (1995),Adventure|Children|Fantasy,magic board game
4,5,2,Jumanji (1995),Adventure|Children|Fantasy,Robin Williams
...,...,...,...,...,...
3218,3678,187595,Solo: A Star Wars Story (2018),Action|Adventure|Children|Sci-Fi,star wars
3219,3679,193565,Gintama: The Movie (2010),Action|Animation|Comedy|Sci-Fi,anime
3220,3680,193565,Gintama: The Movie (2010),Action|Animation|Comedy|Sci-Fi,comedy
3221,3681,193565,Gintama: The Movie (2010),Action|Animation|Comedy|Sci-Fi,gintama


In [4]:
tag_gnr_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3223 entries, 0 to 3222
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   index    3223 non-null   int64 
 1   movieId  3223 non-null   int64 
 2   title    3223 non-null   object
 3   genres   2820 non-null   object
 4   tags     2820 non-null   object
dtypes: int64(2), object(3)
memory usage: 126.0+ KB


There are some NaN values in genres and tags columns; Because there are not many of them and we have no other choice, we can drop them!

In [5]:
#droping nan values of data
tag_gnr_df = tag_gnr_df.dropna()
tag_gnr_df

Unnamed: 0,index,movieId,title,genres,tags
0,0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,pixar
1,2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,fun
2,3,2,Jumanji (1995),Adventure|Children|Fantasy,fantasy
3,4,2,Jumanji (1995),Adventure|Children|Fantasy,magic board game
4,5,2,Jumanji (1995),Adventure|Children|Fantasy,Robin Williams
...,...,...,...,...,...
3218,3678,187595,Solo: A Star Wars Story (2018),Action|Adventure|Children|Sci-Fi,star wars
3219,3679,193565,Gintama: The Movie (2010),Action|Animation|Comedy|Sci-Fi,anime
3220,3680,193565,Gintama: The Movie (2010),Action|Animation|Comedy|Sci-Fi,comedy
3221,3681,193565,Gintama: The Movie (2010),Action|Animation|Comedy|Sci-Fi,gintama


As you can see, there is more than one tag for each movie_id; So I decide to rebuild the DataFrame and give a coherent structure to the data by collecting the tags related to each movie_id

For this, we can use a dictionary and convert it to a pandas DataFrame

In [6]:
#list of unique movie IDs
movie_ids = sorted(list(set(tag_gnr_df['movieId'].values)))
#use a dictionary to convert it to a pandas DataFrame later
movie_dic = {'movieId': [],
             'title': [],
             'genres': [],
             'tags': []}

for i in movie_ids:
    movie_dic['movieId'].append(i)
    movie_dic['title'].append(list(set(tag_gnr_df[tag_gnr_df.movieId == i]['title']))[0])
    movie_dic['genres'].append(list(set((tag_gnr_df[tag_gnr_df.movieId == i]['genres']))))
    movie_dic['tags'].append(list(set((tag_gnr_df[tag_gnr_df.movieId == i]['tags']))))
    
genres_df = pd.DataFrame(data=movie_dic)
genres_df

Unnamed: 0,movieId,title,genres,tags
0,1,Toy Story (1995),[Adventure|Animation|Children|Comedy|Fantasy],"[fun, pixar]"
1,2,Jumanji (1995),[Adventure|Children|Fantasy],"[Robin Williams, fantasy, magic board game, game]"
2,3,Grumpier Old Men (1995),[Comedy|Romance],"[moldy, old]"
3,5,Father of the Bride Part II (1995),[Comedy],"[pregnancy, remake]"
4,7,Sabrina (1995),[Comedy|Romance],[remake]
...,...,...,...,...
1164,183611,Game Night (2018),[Action|Comedy|Crime|Horror],"[Comedy, Rachel McAdams, funny]"
1165,184471,Tomb Raider (2018),[Action|Adventure|Fantasy],"[video game adaptation, adventure, Alicia Vika..."
1166,187593,Deadpool 2 (2018),[Action|Comedy|Sci-Fi],"[Ryan Reynolds, Josh Brolin, sarcasm]"
1167,187595,Solo: A Star Wars Story (2018),[Action|Adventure|Children|Sci-Fi],"[Emilia Clarke, star wars]"


In the first step, we collect the genre and tags for each movie and apply some ordinary text pre-processing on them such as removing non-alphabetic characters, lowering characters, split them word by word, stemming them and so on.

In [7]:
import nltk
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

ps = PorterStemmer()
corpus = []
for i in range(len(genres_df)):
    txt = str(genres_df['genres'][i] + genres_df['tags'][i])
    review = re.sub('[^a-zA-Z]', ' ', txt)
    review = review.lower()
    review = review.split()
    
    review = [ps.stem(word) for word in review if not word in stopwords.words('english')]
    review = ' '.join(review)
    corpus.append(review)

#genres and tags of first 5 movie(head of data)
corpus[0:4]

['adventur anim children comedi fantasi fun pixar',
 'adventur children fantasi robin william fantasi magic board game game',
 'comedi romanc moldi old',
 'comedi pregnanc remak']

The next step is converting the corpus(list of strings for each film) to vector of numbers so that we can use them to find their similarity later; In this case, I am going to use TF-IDF method but you can also use countVectorizer

In [8]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(analyzer='word',
                             ngram_range=(1,2),
                             stop_words='english')

tfidf_mtrx = vectorizer.fit_transform(corpus)

Next, we use TF-IDF matrix to find the similarity between vectors(films)

In [9]:
from sklearn.metrics.pairwise import cosine_similarity

content_similarity = cosine_similarity(tfidf_mtrx)
print(content_similarity.shape,'\n')
print(content_similarity)

(1169, 1169) 

[[1.         0.09162203 0.01480371 ... 0.00950917 0.0552683  0.08977841]
 [0.09162203 1.         0.         ... 0.         0.07602975 0.        ]
 [0.01480371 0.         1.         ... 0.0110826  0.         0.02354222]
 ...
 [0.00950917 0.         0.0110826  ... 1.         0.06842756 0.13566084]
 [0.0552683  0.07602975 0.         ... 0.06842756 1.         0.07267864]
 [0.08977841 0.         0.02354222 ... 0.13566084 0.07267864 1.        ]]


And finally, we can use a function that get a movie name and return top 5 similar films

In [10]:
def contentBased_recommender(movie_name):
    #index of movie_name
    idx = genres_df[genres_df.title == movie_name].index[0]
    #get cosine similarity scores for that movie, sort them and get top 10 similar
    scores = list(enumerate(content_similarity[idx]))
    #note: sorted_scores[0] is the film itself so dont use it!
    sorted_scores = sorted(scores, key= lambda x:x[1], reverse=True)[1:11]
    rec_films_idx = [i[0] for i in sorted_scores]
    print("Top 10 suggestions:\n")
    return genres_df['title'].iloc[rec_films_idx]

In [11]:
contentBased_recommender('Jumanji (1995)')

Top 10 suggestions:



680        Harry Potter and the Chamber of Secrets (2002)
997                            Night at the Museum (2006)
755               Sinbad: Legend of the Seven Seas (2003)
193                            Alice in Wonderland (1951)
121                                      Pinocchio (1940)
258                                       Fantasia (1940)
1165                                   Tomb Raider (2018)
737     Laputa: Castle in the Sky (Tenkû no shiro Rapy...
630     Harry Potter and the Sorcerer's Stone (a.k.a. ...
189                                     Cinderella (1950)
Name: title, dtype: object

# Collaborative Filtering


In collaborative filtering, we use the scores given by users for the films; In other words users' behavior is effective in recommendation model.

In [12]:
#reading ratings data
rating_df = pd.read_csv('../input/movieratingsbyusers/ratings_user-item.csv')
rating_df

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0
...,...,...,...
100831,610,166534,4.0
100832,610,168248,5.0
100833,610,168250,5.0
100834,610,168252,5.0


In [13]:
rating_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100836 entries, 0 to 100835
Data columns (total 3 columns):
 #   Column   Non-Null Count   Dtype  
---  ------   --------------   -----  
 0   userId   100836 non-null  int64  
 1   movieId  100836 non-null  int64  
 2   rating   100836 non-null  float64
dtypes: float64(1), int64(2)
memory usage: 2.3 MB


In [14]:
#reading movie data
movie_df = pd.read_csv('../input/movieratingsbyusers/movies_user-item.csv')
movie_df

Unnamed: 0,movieId,title
0,1,Toy Story (1995)
1,2,Jumanji (1995)
2,3,Grumpier Old Men (1995)
3,4,Waiting to Exhale (1995)
4,5,Father of the Bride Part II (1995)
...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017)
9738,193583,No Game No Life: Zero (2017)
9739,193585,Flint (2017)
9740,193587,Bungo Stray Dogs: Dead Apple (2018)


In [15]:
movie_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9742 entries, 0 to 9741
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   movieId  9742 non-null   int64 
 1   title    9742 non-null   object
dtypes: int64(1), object(1)
memory usage: 152.3+ KB


Hmmmm... There's a small problem with these datas; In 'movie_df' dataset there are some films that are not in 'rating_df' dataset but considering that our main data for recommendation model is 'rating_df', then there will be no problem!

In [16]:
movie_df_films = movie_df['movieId'].unique()
rating_df_films = rating_df['movieId'].unique()
drp_movie_id = [i for i in movie_df_films if i not in rating_df_films]
for i in drp_movie_id:
    print(movie_df[movie_df.movieId == i]['title'].values[0])

Innocents, The (1961)
Niagara (1953)
For All Mankind (1989)
Color of Paradise, The (Rang-e khoda) (1999)
I Know Where I'm Going! (1945)
Chosen, The (1981)
Road Home, The (Wo de fu qin mu qin) (1999)
Scrooge (1970)
Proof (1991)
Parallax View, The (1974)
This Gun for Hire (1942)
Roaring Twenties, The (1939)
Mutiny on the Bounty (1962)
In the Realms of the Unreal (2004)
Twentieth Century (1934)
Call Northside 777 (1948)
Browning Version, The (1951)
Chalet Girl (2011)


In the first step, we should convert 'rating_df' dataset to pivot version; This will be the data that we will use to build the model. The indeces of data are user IDs, columns are movie IDs and the values are ratings (scores) given by users to films.

In [17]:
rating_pvt_df = rating_df.pivot(index='userId', columns='movieId', values='rating')
rating_pvt_df

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,4.0,,,4.0,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,,,,,,2.5,,,,...,,,,,,,,,,
607,4.0,,,,,,,,,,...,,,,,,,,,,
608,2.5,2.0,2.0,,,,,,,4.0,...,,,,,,,,,,
609,3.0,,,,,,,,,4.0,...,,,,,,,,,,


As we expected, most of the pivot data are NaN-values because users cannot see all the movies and rate them! Considering that, filling the NaN-values by ourselves makes such datas meaningless, I use the method I learned on YouTube for such cases;

First, we fill NaN-values with 0 using; Then we can use a standardization function which almost eliminates the effect of giving zero to NaN-values.

In [18]:
rating_pvt_df = rating_pvt_df.fillna(0)

def standardization(row):
    new_row = (row - row.mean()) / (row.max() - row.min())
    return new_row

rating_pvt_df = rating_pvt_df.apply(standardization)
rating_pvt_df

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.523607,-0.12377,0.744426,-0.009016,-0.049344,0.668033,-0.056393,-0.007541,-0.016393,-0.151311,...,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639
2,-0.276393,-0.12377,-0.055574,-0.009016,-0.049344,-0.131967,-0.056393,-0.007541,-0.016393,-0.151311,...,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639
3,-0.276393,-0.12377,-0.055574,-0.009016,-0.049344,-0.131967,-0.056393,-0.007541,-0.016393,-0.151311,...,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639
4,-0.276393,-0.12377,-0.055574,-0.009016,-0.049344,-0.131967,-0.056393,-0.007541,-0.016393,-0.151311,...,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639
5,0.523607,-0.12377,-0.055574,-0.009016,-0.049344,-0.131967,-0.056393,-0.007541,-0.016393,-0.151311,...,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0.223607,-0.12377,-0.055574,-0.009016,-0.049344,-0.131967,0.443607,-0.007541,-0.016393,-0.151311,...,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639
607,0.523607,-0.12377,-0.055574,-0.009016,-0.049344,-0.131967,-0.056393,-0.007541,-0.016393,-0.151311,...,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639
608,0.223607,0.27623,0.344426,-0.009016,-0.049344,-0.131967,-0.056393,-0.007541,-0.016393,0.648689,...,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639
609,0.323607,-0.12377,-0.055574,-0.009016,-0.049344,-0.131967,-0.056393,-0.007541,-0.016393,0.648689,...,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639,-0.001639


**1- Item-to-Item collaborative filtering**

In this method, we use items(films) themselves to find similarity between them using people's ratings of those items; This can be done by transposing the data and apply cosine similarity to them.

In [19]:
#Item-based collaborative filtering
item_similarity = cosine_similarity(rating_pvt_df.T)
print(item_similarity.shape,'\n')
print(item_similarity)

(9724, 9724) 

[[ 1.          0.2313265   0.1732128  ... -0.0289057  -0.0289057
  -0.0289057 ]
 [ 0.2313265   1.          0.19194487 ... -0.01829073 -0.01829073
  -0.01829073]
 [ 0.1732128   0.19194487  1.         ... -0.01172923 -0.01172923
  -0.01172923]
 ...
 [-0.0289057  -0.01829073 -0.01172923 ...  1.          1.
  -0.00164204]
 [-0.0289057  -0.01829073 -0.01172923 ...  1.          1.
  -0.00164204]
 [-0.0289057  -0.01829073 -0.01172923 ... -0.00164204 -0.00164204
   1.        ]]


At the end, we can use a function which asks the user for the name of a movie and the rating he/she has given to it and return top 10 movies for him/her;

If the user gives a high rating(more than average) to a movie, movies similar to that movie will be suggested to the user and if the user gives it a low rating, similar movies will not be suggested to him/her; In fact, in this case, the movies that have the least similarity are suggested.


In [20]:
def item2item_CF_recommender(movie_name, user_rating):
    idx = movie_df[movie_df.title == movie_name].index[0]
    scores = list(enumerate(item_similarity[idx]*(user_rating-(rating_df['rating'].max()/2))))
    sorted_scores = sorted(scores, key= lambda x:x[1], reverse=True)[1:11]
    rec_items_idx = [i[0] for i in sorted_scores]
    print("Top 10 suggestions:\n")
    return movie_df['title'].iloc[rec_items_idx]

In [21]:
#Suggestions after the user gives a high rating to Jumanji movie
item2item_CF_recommender(movie_name='Jumanji (1995)' , user_rating=4)

Top 10 suggestions:



322                     Lion King, The (1994)
436                     Mrs. Doubtfire (1993)
325                          Mask, The (1994)
483    Nightmare Before Christmas, The (1993)
504                         Home Alone (1990)
131                             Casper (1995)
276                  Santa Clause, The (1994)
18      Ace Ventura: When Nature Calls (1995)
418                      Jurassic Park (1993)
512               Beauty and the Beast (1991)
Name: title, dtype: object

In [22]:
#Suggestions after the user gives a low rating to Jumanji movie
item2item_CF_recommender(movie_name='Jumanji (1995)' , user_rating=1)

Top 10 suggestions:



2067                        Little Shop of Horrors (1986)
1088                         Whole Wide World, The (1996)
8200                               Despicable Me 2 (2013)
1752                               Heart Condition (1990)
4161                                       Chicago (2002)
2145                               American Beauty (1999)
2557                                   School Daze (1988)
911     Star Wars: Episode VI - Return of the Jedi (1983)
1663                                   Simon Birch (1998)
1049                          Preacher's Wife, The (1996)
Name: title, dtype: object

**2- User-to-User collaborative filtering**

In this method, with the help of users' behavior in scoring movies, we find people with similar tastes and then based on their history, their movies are suggested to each other. As before, we use cosine similarity to find similar users.

In [23]:
user_similarity = cosine_similarity(rating_pvt_df)
print(user_similarity.shape,'\n')
print(user_similarity)

(610, 610) 

[[ 1.         -0.14730693 -0.09308068 ...  0.13381861 -0.12346957
   0.01239975]
 [-0.14730693  1.          0.27211578 ... -0.27181787  0.18605345
  -0.14980981]
 [-0.09308068  0.27211578  1.         ... -0.27222622  0.23457372
  -0.19386301]
 ...
 [ 0.13381861 -0.27181787 -0.27222622 ...  1.         -0.24503117
   0.19873891]
 [-0.12346957  0.18605345  0.23457372 ... -0.24503117  1.
  -0.22037256]
 [ 0.01239975 -0.14980981 -0.19386301 ...  0.19873891 -0.22037256
   1.        ]]


At the end, we can use a function which ask a user ID and then find the most similar user to him/her. Then the movies that the user has not seen while the person similar to him/her has seen them before, are suggested to the user.

In [24]:
def user2user_CF_recommender(user_id):
    #note: user_id starts from 1 while indexes start from zero!
    scores = list(enumerate(user_similarity[user_id-1]))
    sorted_scores = sorted(scores, key= lambda x:x[1], reverse=True)[1]
    sim_user_id = sorted_scores[0] + 1
    
    #the movie IDs that user has seen before
    user_seen_mov = list(set(rating_df[rating_df.userId == user_id]['movieId'].values))
    #the movie IDs that most similar person has seen before
    smlr_user_mov = list(set(rating_df[rating_df.userId == sim_user_id]['movieId'].values))
    #movie IDs for suggestion to the user
    rec_movie_ids = list(set(smlr_user_mov) - set(user_seen_mov))[:9]
    
    return movie_df['title'].iloc[rec_movie_ids]

In [25]:
user2user_CF_recommender(2)

4361                          Italian Job, The (2003)
524                            Pallbearer, The (1996)
3863    Cheech & Chong's The Corsican Brothers (1984)
1186                            Simple Wish, A (1997)
3107                                    Avalon (1990)
3363          Bill & Ted's Excellent Adventure (1989)
3752                       All the Right Moves (1983)
3510                                Indiscreet (1958)
3386                          Innocent Man, An (1989)
Name: title, dtype: object