# The Hybrid Recommender System

This system joins together our content based and collaborative recommender system through the following format:

1. Load the datasets and merge them together where need be
2. Select out movies in the ratings dataset that are also in the dataset for content-based filtering
3. When a user searches for a specific movie, they will be returned 'similar movies', ordered by their estimated rating of the movie.

I will be including movies that I have rated that are within the system, as well.

In [169]:
# import statements

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

import random

from surprise import Reader, Dataset, SVD, evaluate, accuracy
from surprise.model_selection import GridSearchCV, KFold, train_test_split
from surprise.model_selection.validation import cross_validate
from surprise.prediction_algorithms.matrix_factorization import SVDpp

import nltk
from rake_nltk import Rake
from nltk.corpus import stopwords

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

import time

import warnings; warnings.simplefilter('ignore')


%matplotlib inline

In [170]:
# Load our datasets

# Ratings dataset from MovieLens 20M
ratings = pd.read_csv('/Users/user/movielens-20m-dataset/rating.csv')

# Our information from our scraped dataset
movies = pd.read_csv('/Users/user/final-capstone/inprogress3.csv')

# Links dataset from MovieLens, which we will hopefully use to knit together the two datasets
links = pd.read_csv('/Users/user/movielens-20m-dataset/link.csv')

In [171]:
ratings.shape

(20000263, 4)

In [172]:
unique_movs = ratings['movieId'].unique().shape[0]
unique_movs

26744

In [173]:
movies.shape

(284303, 25)

In [174]:
links.head()

Unnamed: 0,movieId,imdbId,tmdbId
0,1,114709,862.0
1,2,113497,8844.0
2,3,113228,15602.0
3,4,114885,31357.0
4,5,113041,11862.0


In [175]:
links.shape

(27278, 3)

In [176]:
movies.columns

Index(['title', 'year', 'rated', 'released', 'runtime', 'genre', 'director',
       'writer', 'actors', 'plot', 'language', 'country', 'awards', 'poster',
       'ratings', 'metascore', 'imdb_rating', 'imdb_votes', 'imdb_id', 'type',
       'dvd', 'box_office', 'production', 'website', 'response'],
      dtype='object')

In [242]:
movies = movies[['title','year','rated','imdb_id', 'genre', 'director','writer','actors','plot','language','country']]

movies.head()

Unnamed: 0,title,year,rated,imdb_id,genre,director,writer,actors,plot,language,country
0,Toy Story,1995,G,tt0114709,"Animation, Adventure, Comedy, Family, Fantasy",John Lasseter,"John Lasseter (original story by), Pete Docter...","Tom Hanks, Tim Allen, Don Rickles, Jim Varney",A cowboy doll is profoundly threatened and jea...,English,USA
1,Jumanji,1995,PG,tt0113497,"Adventure, Comedy, Family, Fantasy",Joe Johnston,"Jonathan Hensleigh (screenplay by), Greg Taylo...","Robin Williams, Jonathan Hyde, Kirsten Dunst, ...",When two kids find and play a magical board ga...,"English, French",USA
2,Grumpier Old Men,1995,PG-13,tt0113228,"Comedy, Romance",Howard Deutch,"Mark Steven Johnson (characters), Mark Steven ...","Walter Matthau, Jack Lemmon, Sophia Loren, Ann...",John and Max resolve to save their beloved bai...,"English, Italian, German",USA
3,Waiting to Exhale,1995,R,tt0114885,"Comedy, Drama, Romance",Forest Whitaker,"Terry McMillan (novel), Terry McMillan (screen...","Whitney Houston, Angela Bassett, Loretta Devin...","Based on Terry McMillan's novel, this film fol...",English,USA
4,Father of the Bride Part II,1995,PG,tt0113041,"Comedy, Family, Romance",Charles Shyer,"Albert Hackett (screenplay ""Father's Little Di...","Steve Martin, Diane Keaton, Martin Short, Kimb...",George Banks must deal not only with the pregn...,English,USA


In [243]:
movies['imdb_id'] = movies['imdb_id'].str.replace('tt0','')
movies['imdb_id'] = movies['imdb_id'].str.replace('tt','')


In [244]:
movies = movies.dropna(subset=['imdb_id'],axis=0)
movies['imdb_id'].isnull().value_counts()

False    277321
Name: imdb_id, dtype: int64

In [245]:
movies['imdb_id'] = movies['imdb_id'].astype('int')

In [246]:
movies.columns = ['title','year','rated','imdbId', 'genre', 'director','writer','actors','plot','language','country']
movies.head()


Unnamed: 0,title,year,rated,imdbId,genre,director,writer,actors,plot,language,country
0,Toy Story,1995,G,114709,"Animation, Adventure, Comedy, Family, Fantasy",John Lasseter,"John Lasseter (original story by), Pete Docter...","Tom Hanks, Tim Allen, Don Rickles, Jim Varney",A cowboy doll is profoundly threatened and jea...,English,USA
1,Jumanji,1995,PG,113497,"Adventure, Comedy, Family, Fantasy",Joe Johnston,"Jonathan Hensleigh (screenplay by), Greg Taylo...","Robin Williams, Jonathan Hyde, Kirsten Dunst, ...",When two kids find and play a magical board ga...,"English, French",USA
2,Grumpier Old Men,1995,PG-13,113228,"Comedy, Romance",Howard Deutch,"Mark Steven Johnson (characters), Mark Steven ...","Walter Matthau, Jack Lemmon, Sophia Loren, Ann...",John and Max resolve to save their beloved bai...,"English, Italian, German",USA
3,Waiting to Exhale,1995,R,114885,"Comedy, Drama, Romance",Forest Whitaker,"Terry McMillan (novel), Terry McMillan (screen...","Whitney Houston, Angela Bassett, Loretta Devin...","Based on Terry McMillan's novel, this film fol...",English,USA
4,Father of the Bride Part II,1995,PG,113041,"Comedy, Family, Romance",Charles Shyer,"Albert Hackett (screenplay ""Father's Little Di...","Steve Martin, Diane Keaton, Martin Short, Kimb...",George Banks must deal not only with the pregn...,English,USA


In [247]:
movies = pd.merge(movies,links,on='imdbId')
movies.head()

Unnamed: 0,title,year,rated,imdbId,genre,director,writer,actors,plot,language,country,movieId,tmdbId
0,Toy Story,1995,G,114709,"Animation, Adventure, Comedy, Family, Fantasy",John Lasseter,"John Lasseter (original story by), Pete Docter...","Tom Hanks, Tim Allen, Don Rickles, Jim Varney",A cowboy doll is profoundly threatened and jea...,English,USA,1,862.0
1,Jumanji,1995,PG,113497,"Adventure, Comedy, Family, Fantasy",Joe Johnston,"Jonathan Hensleigh (screenplay by), Greg Taylo...","Robin Williams, Jonathan Hyde, Kirsten Dunst, ...",When two kids find and play a magical board ga...,"English, French",USA,2,8844.0
2,Grumpier Old Men,1995,PG-13,113228,"Comedy, Romance",Howard Deutch,"Mark Steven Johnson (characters), Mark Steven ...","Walter Matthau, Jack Lemmon, Sophia Loren, Ann...",John and Max resolve to save their beloved bai...,"English, Italian, German",USA,3,15602.0
3,Waiting to Exhale,1995,R,114885,"Comedy, Drama, Romance",Forest Whitaker,"Terry McMillan (novel), Terry McMillan (screen...","Whitney Houston, Angela Bassett, Loretta Devin...","Based on Terry McMillan's novel, this film fol...",English,USA,4,31357.0
4,Father of the Bride Part II,1995,PG,113041,"Comedy, Family, Romance",Charles Shyer,"Albert Hackett (screenplay ""Father's Little Di...","Steve Martin, Diane Keaton, Martin Short, Kimb...",George Banks must deal not only with the pregn...,English,USA,5,11862.0


In [248]:
movies['year'] = movies['year'].apply(lambda x: str(x))
movies['title_year'] = movies['title'].map(str) + ' (' + movies['year'] + ')'
movies = movies[['title_year','rated','imdbId','genre','director','writer','actors','plot','language','country',
                 'movieId','tmdbId', 'year']]
movies.head()

Unnamed: 0,title_year,rated,imdbId,genre,director,writer,actors,plot,language,country,movieId,tmdbId,year
0,Toy Story (1995),G,114709,"Animation, Adventure, Comedy, Family, Fantasy",John Lasseter,"John Lasseter (original story by), Pete Docter...","Tom Hanks, Tim Allen, Don Rickles, Jim Varney",A cowboy doll is profoundly threatened and jea...,English,USA,1,862.0,1995
1,Jumanji (1995),PG,113497,"Adventure, Comedy, Family, Fantasy",Joe Johnston,"Jonathan Hensleigh (screenplay by), Greg Taylo...","Robin Williams, Jonathan Hyde, Kirsten Dunst, ...",When two kids find and play a magical board ga...,"English, French",USA,2,8844.0,1995
2,Grumpier Old Men (1995),PG-13,113228,"Comedy, Romance",Howard Deutch,"Mark Steven Johnson (characters), Mark Steven ...","Walter Matthau, Jack Lemmon, Sophia Loren, Ann...",John and Max resolve to save their beloved bai...,"English, Italian, German",USA,3,15602.0,1995
3,Waiting to Exhale (1995),R,114885,"Comedy, Drama, Romance",Forest Whitaker,"Terry McMillan (novel), Terry McMillan (screen...","Whitney Houston, Angela Bassett, Loretta Devin...","Based on Terry McMillan's novel, this film fol...",English,USA,4,31357.0,1995
4,Father of the Bride Part II (1995),PG,113041,"Comedy, Family, Romance",Charles Shyer,"Albert Hackett (screenplay ""Father's Little Di...","Steve Martin, Diane Keaton, Martin Short, Kimb...",George Banks must deal not only with the pregn...,English,USA,5,11862.0,1995


In [249]:
movies.shape

(28047, 13)

In [185]:
# Make a copy of the movies dataframe before we process the text for our final product

callback = movies.copy()

In [250]:
extras = movies[['title_year','imdbId','movieId','tmdbId']]
extras.head()

Unnamed: 0,title_year,imdbId,movieId,tmdbId
0,Toy Story (1995),114709,1,862.0
1,Jumanji (1995),113497,2,8844.0
2,Grumpier Old Men (1995),113228,3,15602.0
3,Waiting to Exhale (1995),114885,4,31357.0
4,Father of the Bride Part II (1995),113041,5,11862.0


In [251]:
movies = movies[['title_year','rated','writer','genre','director','actors','plot','language','country', 'year']]

for column in movies:
    movies[column] = movies[column].apply(lambda x: str(x))
    
# discarding the commas between the actors' full names
movies['actors'] = movies['actors'].map(lambda x: x.split(','))

# putting the genres in a list of words
movies['genre'] = movies['genre'].map(lambda x: x.lower().split(','))

#movies['director'] = movies['director'].apply(lambda x: [x,x,x])

movies['director'] = movies['director'].map(lambda x: x.split(','))


# merging together first and last name for each actor and director, so it's considered as one word 
# and there is no mix up between people sharing a first name
for index, row in movies.iterrows():
    row['actors'] = [x.lower().replace(' ','') for x in row['actors']]
    row['director'] = [x.lower().replace(' ','') for x in row['director']]


# Getting rid of information between () in writers
movies['writer'] = movies['writer'].str.replace(r"\(.*\)","")

# discarding the commas between writer's names
movies['writer'] = movies['writer'].map(lambda x: x.split(','))

# putting the countries in a list of words
movies['country'] = movies['country'].map(lambda x: x.lower().split(','))

# putting the languages in a list of words
movies['language'] = movies['language'].map(lambda x: x.lower().split(','))

movies['rated'] = movies['rated'].str.replace("-","")
movies['rated'] = movies['rated'].apply(lambda x: str(x))
movies['rated'] = movies['rated'].apply(lambda x: [x,x] if x == 'G' or x == 'PG' else x.split(','))

movies['year'] = movies['year'].map(lambda x: x.split(','))

# merging together first and last name for each actor and director, so it's considered as one word 
# and there is no mix up between people sharing a first name
for index, row in movies.iterrows():
    row['writer'] = [x.lower().replace(' ','') for x in row['writer']]
    row['rated'] = [x.lower().replace(' ','') for x in row['rated']]

movies.head()

Unnamed: 0,title_year,rated,writer,genre,director,actors,plot,language,country,year
0,Toy Story (1995),"[g, g]",[johnlasseter],"[animation, adventure, comedy, family, fan...",[johnlasseter],"[tomhanks, timallen, donrickles, jimvarney]",A cowboy doll is profoundly threatened and jea...,[english],[usa],[1995]
1,Jumanji (1995),"[pg, pg]",[jonathanhensleigh],"[adventure, comedy, family, fantasy]",[joejohnston],"[robinwilliams, jonathanhyde, kirstendunst, br...",When two kids find and play a magical board ga...,"[english, french]",[usa],[1995]
2,Grumpier Old Men (1995),[pg13],"[markstevenjohnson, markstevenjohnson]","[comedy, romance]",[howarddeutch],"[waltermatthau, jacklemmon, sophialoren, ann-m...",John and Max resolve to save their beloved bai...,"[english, italian, german]",[usa],[1995]
3,Waiting to Exhale (1995),[r],[terrymcmillan],"[comedy, drama, romance]",[forestwhitaker],"[whitneyhouston, angelabassett, lorettadevine,...","Based on Terry McMillan's novel, this film fol...",[english],[usa],[1995]
4,Father of the Bride Part II (1995),"[pg, pg]",[alberthackett],"[comedy, family, romance]",[charlesshyer],"[stevemartin, dianekeaton, martinshort, kimber...",George Banks must deal not only with the pregn...,[english],[usa],[1995]


In [252]:
# initializing the new column
movies['Key_words'] = ""

for index, row in movies.iterrows():
    plot = row['plot']
    
    # instantiating Rake, by default is uses english stopwords from NLTK
    # and discard all puntuation characters
    r = Rake()

    # extracting the words by passing the text
    r.extract_keywords_from_text(plot)

    # getting the dictionary whith key words and their scores
    key_words_dict_scores = r.get_word_degrees()
    
    # assigning the key words to the new column
    row['Key_words'] = list(key_words_dict_scores.keys())

# dropping the Plot column
movies.drop(columns = ['plot'], inplace = True)

movies.shape

(28047, 10)

In [253]:
movies.set_index('title_year', inplace = True)

movies['bag_of_words'] = ''
columns = movies.columns
for index, row in movies.iterrows():
    words = ''
    for col in columns:
        #if col == 'director':
        #    words = words.join(row[col]) + ' '
        #else:
        words = words + ' '.join(row[col])+ ' '

    row['bag_of_words'] = words
    
movies.drop(columns = [col for col in movies.columns if col!= 'bag_of_words'], inplace = True)

movies['bag_of_words'] = movies['bag_of_words'].str.replace('nan','')

movies.head()

Unnamed: 0_level_0,bag_of_words
title_year,Unnamed: 1_level_1
Toy Story (1995),g g johnlasseter animation adventure comedy ...
Jumanji (1995),pg pg jonathanhensleigh adventure comedy fam...
Grumpier Old Men (1995),pg13 markstevenjohnson markstevenjohnson comed...
Waiting to Exhale (1995),r terrymcmillan comedy drama romance forestw...
Father of the Bride Part II (1995),pg pg alberthackett comedy family romance ch...


In [254]:
movies = pd.merge(movies, extras, on='title_year')
movies.head()

Unnamed: 0,title_year,bag_of_words,imdbId,movieId,tmdbId
0,Toy Story (1995),g g johnlasseter animation adventure comedy ...,114709,1,862.0
1,Jumanji (1995),pg pg jonathanhensleigh adventure comedy fam...,113497,2,8844.0
2,Grumpier Old Men (1995),pg13 markstevenjohnson markstevenjohnson comed...,113228,3,15602.0
3,Waiting to Exhale (1995),r terrymcmillan comedy drama romance forestw...,114885,4,31357.0
4,Father of the Bride Part II (1995),pg pg alberthackett comedy family romance ch...,113041,5,11862.0


In [255]:
movies = movies.drop_duplicates(subset='movieId')
movies.shape

(26701, 5)

In [192]:
ratings = ratings.drop('timestamp',axis=1)

In [256]:
movie_titles = movies[['title_year','movieId']]
movie_titles.head(1)

Unnamed: 0,title_year,movieId
0,Toy Story (1995),1


In [194]:
ratings = pd.merge(ratings,movie_titles,on='movieId')
ratings.head(5)

Unnamed: 0,userId,movieId,rating,title_year
0,1,2,3.5,Jumanji (1995)
1,5,2,3.0,Jumanji (1995)
2,13,2,3.0,Jumanji (1995)
3,29,2,3.0,Jumanji (1995)
4,34,2,3.0,Jumanji (1995)


In [195]:
# hooray

# 1. Creating the similarity matrix for movie content

Since creating the content recommender is the most computationally intensive and the most likely to overload my machine (though it shouldn't be as huge a problem with only 28k movies) I will calculate that first.

In [197]:
movies = movies.drop_duplicates(subset='movieId')
movies.shape

(26701, 5)

In [198]:
count = CountVectorizer()
count_matrix = count.fit_transform(movies['bag_of_words'])

In [199]:
movies = movies.set_index('title_year')
# creating a Series for the movie titles so they are associated to an ordered numerical
# list I will use later to match the indexes
indices = pd.Series(movies.index)
indices[:5]

0                      Toy Story (1995)
1                        Jumanji (1995)
2               Grumpier Old Men (1995)
3              Waiting to Exhale (1995)
4    Father of the Bride Part II (1995)
Name: title_year, dtype: object

In [200]:
start = time.time()

cosine_sim = cosine_similarity(count_matrix, count_matrix)
cos_sim_time = time.time() - start

print("Runtime %0.2f" % (time.time() - start))
print(cosine_sim)

Runtime 37.12
[[1.         0.22627786 0.15694121 ... 0.03390318 0.0758098  0.        ]
 [0.22627786 1.         0.11769798 ... 0.03178209 0.07106691 0.        ]
 [0.15694121 0.11769798 1.         ... 0.         0.06900656 0.        ]
 ...
 [0.03390318 0.03178209 0.         ... 1.         0.         0.24494897]
 [0.0758098  0.07106691 0.06900656 ... 0.         1.         0.18257419]
 [0.         0.         0.         ... 0.24494897 0.18257419 1.        ]]


In [352]:
# function that takes in movie title as input and returns the top 10 recommended movies
def recommendations(title, cosine_sim = cosine_sim):
    
    recommended_movies = []
    
    # gettin the index of the movie that matches the title
    idx = indices[indices == title].index[0]

    # creating a Series with the similarity scores in descending order
    score_series = pd.Series(cosine_sim[idx]).sort_values(ascending = False)

    # getting the indexes of the 10 most similar movies
    ordered_indices = list(score_series.iloc[1:100].index)
    
    # populating the list with the titles of the best 15 matching movies
    for i in ordered_indices:
        recommended_movies.append(list(movies.index)[i])
        
    return recommended_movies



In [202]:
def return_recs(title, df, n, orderby=None):
    title_a = recommendations(title)
    title_recs = df.loc[title_a].reset_index(inplace=False)
    title_recs = title_recs.set_index('title_year')
    
    print("Here are the movie recommendations for {}: ".format(title))
    if orderby == None:
        if n != 0:
            return title_recs[['rated','genre','director','plot','language','year','movieId']].head(n)
        else:
            return title_recs[['rated','genre','director','plot','language','year','movieId']]
        
    else:
        if n!= 0:
            title_recs = title_recs.sort_values(by=[orderby], ascending=False)
            return title_recs[['rated','genre','director','plot','language','year','movieId']].head(n)
        else:
            title_recs = title_recs.sort_values(by=[orderby], ascending=False)
            return title_recs[['rated','genre','director','plot','language','year','movieId']]

In [203]:
test = test.drop_duplicates(subset='movieId')

# 2. Training the SVD model

In [205]:
reader = Reader(rating_scale=(1, 5))

In [206]:
ratings.head()

Unnamed: 0,userId,movieId,rating,title_year
0,1,2,3.5,Jumanji (1995)
1,5,2,3.0,Jumanji (1995)
2,13,2,3.0,Jumanji (1995)
3,29,2,3.0,Jumanji (1995)
4,34,2,3.0,Jumanji (1995)


In [207]:
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

In [208]:
# define a cross-validation iterator

start = time.time()

kf = KFold(n_splits=5)

algo = SVD(verbose=True)

for trainset, testset in kf.split(data):

    # train and test algorithm.
    algo.fit(trainset)
    predictions = algo.test(testset)

    # Compute and print Root Mean Squared Error
    accuracy.rmse(predictions, verbose=True)
    
print("Runtime %0.2f" % (time.time() - start))

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14
Processing epoch 15
Processing epoch 16
Processing epoch 17
Processing epoch 18
Processing epoch 19
RMSE: 0.7859
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14
Processing epoch 15
Processing epoch 16
Processing epoch 17
Processing epoch 18
Processing epoch 19
RMSE: 0.7870
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Proc

In [220]:
ratings.head()

Unnamed: 0,userId,movieId,rating,title_year
0,1,2,3.5,Jumanji (1995)
1,5,2,3.0,Jumanji (1995)
2,13,2,3.0,Jumanji (1995)
3,29,2,3.0,Jumanji (1995)
4,34,2,3.0,Jumanji (1995)


In [223]:
def check_system(Id,movieId,limit,df=ratings,userId='userId',algo=algo):
    '''
    This function checks the predicted rating against ratings made by the user
    And takes in an int UserId (Id)
    an int limit (the number of movies returned)
    dataframe columns necessary (movieId, userId)
    a dataframe of ratings (df)
    an algorithm (algo)
    '''
    
    # Takes a subsample of the user's ratings
    user_df = df[df['userId'] == Id]
    if user_df.shape[0] >= df[userId].value_counts().mean():
        user_df = user_df.sample(frac=.10)
    else:
        user_df = user_df.sample(frac=.50)

    # Builds the dataframe to be returned     
    user_df['est'] = user_df['movieId'].apply(lambda x: round(algo.predict(Id,x).est,2))
    user_df['error'] = user_df['est']-user_df['rating']
    user_df['avg_error'] = user_df['error'].mean()
    
    # Returns a dataframe dependent on what the limit is set to
    if limit == None:
        return user_df[[userId,movieId,'title_year','rating','est','error','avg_error']]
    else:
        if limit >= user_df.shape[0]:
            return user_df[[userId,movieId,'title_year','rating','est','error','avg_error']]
        else:
            user_df = user_df.head(limit)
            return user_df[[userId,movieId,'title_year','rating','est','error','avg_error']]

In [224]:
check_system(345,'movieId',10)

Unnamed: 0,userId,movieId,title_year,rating,est,error,avg_error
190561,345,223,Clerks (1994),3.0,3.17,0.17,-0.138
8470977,345,2355,A Bug's Life (1998),3.5,3.26,-0.24,-0.138
3705786,345,1220,The Blues Brothers (1980),3.5,3.31,-0.19,-0.138
6252071,345,1101,Top Gun (1986),3.0,3.14,0.14,-0.138
7889371,345,1923,There's Something About Mary (1998),3.0,3.34,0.34,-0.138
3725625,345,1221,The Godfather: Part II (1974),4.0,4.06,0.06,-0.138
6121931,345,750,Dr. Strangelove or: How I Learned to Stop Worr...,5.0,3.68,-1.32,-0.138
1737724,345,2716,Ghostbusters (1984),4.0,3.46,-0.54,-0.138
7047883,345,3793,X-Men (2000),4.0,3.61,-0.39,-0.138
8375781,345,1961,Rain Man (1988),3.0,3.59,0.59,-0.138


In [258]:
def predicted_top_n(Id, movieId, n, samples, df=ratings, userId='userId', algo=algo):
    '''
    This function returns n movies, sorted by predicted user rating, from a random sample of movies. 
    '''
    #df = df[[movieId,userId,'rating']]

    movie_choices = df[movieId].unique()
    
    # Take out movies the user has already watched
    temp_df = df[df[userId] == Id]
    watched_movs = temp_df[movieId].unique()
    unwatched = np.setdiff1d(movie_choices,watched_movs)
    
    # Select random movies according to 'samples'
    if samples == None:
        movs = unwatched
        samples = movs.shape[0]
    elif samples <= unwatched.shape[0]:
         movs = np.random.choice(unwatched,samples)
    else:
        print("The sample size exceeds the available movies. Reset to {} movies".format(unwatched.shape[0]))
        movs = unwatched
        samples = movs.shape[0]
        
    # Build the dataframe that we'll return
    predicted_df = pd.DataFrame()
    predicted_df[movieId] = movs
    predicted_df[userId] = Id
    predicted_df['est'] = predicted_df[movieId].apply(lambda x: round(algo.predict(Id,x).est,2))
    predicted_df = predicted_df.sort_values(by='est', ascending=False)
    
    if n >= samples:
        predicted_df = pd.merge(predicted_df,movie_titles,on=movieId)
        return predicted_df[[userId,'title_year','est']]
    else:
        predicted_df = predicted_df.head(n)
        predicted_df = pd.merge(predicted_df,movie_titles,on=movieId)
        return predicted_df[[userId,'title_year','est']]

In [260]:
predicted_top_n(1250, 'movieId', 10, None, df=ratings, userId='userId', algo=algo)

Unnamed: 0,userId,title_year,est
0,1250,Gladiator (2000),4.61
1,1250,Life Is Beautiful (1997),4.57
2,1250,The Sixth Sense (1999),4.52
3,1250,The Dark Knight Rises (2012),4.49
4,1250,Interstellar (2014),4.43
5,1250,Harry Potter and the Deathly Hallows: Part 2 (...,4.41
6,1250,The Green Mile (1999),4.4
7,1250,Pride and Prejudice (1995),4.4
8,1250,Crash (2004),4.39
9,1250,The Bourne Ultimatum (2007),4.39


# 3. Hybrid Recommender

Great! Now we can return all movies with estimated ratings. Now what we're going to do is go back to our big dataframe with plots and all, recommend a movie based on cosine similarity, then return the results with estimated ratings!

Note, at least for this first one, I am NOT taking into account that a user has watched a movie before.

In [319]:
movies.head()

Unnamed: 0,title_year,bag_of_words,imdbId,movieId,tmdbId
0,Toy Story (1995),g g johnlasseter animation adventure comedy ...,114709,1,862.0
1,Jumanji (1995),pg pg jonathanhensleigh adventure comedy fam...,113497,2,8844.0
2,Grumpier Old Men (1995),pg13 markstevenjohnson markstevenjohnson comed...,113228,3,15602.0
3,Waiting to Exhale (1995),r terrymcmillan comedy drama romance forestw...,114885,4,31357.0
4,Father of the Bride Part II (1995),pg pg alberthackett comedy family romance ch...,113041,5,11862.0


In [327]:
test.head()

Unnamed: 0,title_year,rated,imdbId,genre,director,writer,actors,plot,language,country,movieId,tmdbId,year
0,Toy Story (1995),G,114709,"Animation, Adventure, Comedy, Family, Fantasy",John Lasseter,"John Lasseter (original story by), Pete Docter...","Tom Hanks, Tim Allen, Don Rickles, Jim Varney",A cowboy doll is profoundly threatened and jea...,English,USA,1,862.0,1995
1,Jumanji (1995),PG,113497,"Adventure, Comedy, Family, Fantasy",Joe Johnston,"Jonathan Hensleigh (screenplay by), Greg Taylo...","Robin Williams, Jonathan Hyde, Kirsten Dunst, ...",When two kids find and play a magical board ga...,"English, French",USA,2,8844.0,1995
2,Grumpier Old Men (1995),PG-13,113228,"Comedy, Romance",Howard Deutch,"Mark Steven Johnson (characters), Mark Steven ...","Walter Matthau, Jack Lemmon, Sophia Loren, Ann...",John and Max resolve to save their beloved bai...,"English, Italian, German",USA,3,15602.0,1995
3,Waiting to Exhale (1995),R,114885,"Comedy, Drama, Romance",Forest Whitaker,"Terry McMillan (novel), Terry McMillan (screen...","Whitney Houston, Angela Bassett, Loretta Devin...","Based on Terry McMillan's novel, this film fol...",English,USA,4,31357.0,1995
4,Father of the Bride Part II (1995),PG,113041,"Comedy, Family, Romance",Charles Shyer,"Albert Hackett (screenplay ""Father's Little Di...","Steve Martin, Diane Keaton, Martin Short, Kimb...",George Banks must deal not only with the pregn...,English,USA,5,11862.0,1995


In [338]:
def new_movie_rec(title, Id, n, df=ratings, userId='userId', movieId='movieId', algo=algo, cosine_sim=cosine_sim):
    '''
    Returns movies by cosine similarity, with the user's estimated rating of them attached.
    '''
    
    recommended_movies = []

    # Grab the indices for the top 100 movies by cosine similarity
    idx = indices[indices == title].index[0]
    score_series = pd.Series(cosine_sim[3124]).sort_values(ascending = False)
    ordered_indices = list(score_series.iloc[1:100].index) # Filtering out itself
    for i in ordered_indices:
            recommended_movies.append(list(movies.index)[i])
        
    # Next, build the dataframe for these movies
    # Using 'movies' dataframe from earlier
    title_a = recommendations(title)
    title_recs = movies.loc[title_a].reset_index(inplace=False)
    title_recs = title_recs.set_index('title_year')
    
    # Predict the user ratings and sort by those ratings
    movs = title_recs[movieId].unique()
    title_recs['est'] = title_recs[movieId].apply(lambda x: round(algo.predict(Id,x).est,2))
    title_recs = title_recs.sort_values(by='est', ascending=False)
    
    # Finally, return n movies in the list
    if n == None:
        title_recs = title_recs[['est']]
        pretty_df = pd.merge(title_recs,test,on='title_year')
    elif n == 0:
        title_recs = title_recs[['est']]
        pretty_df = pd.merge(title_recs,test,on='title_year')
    else:
        title_recs = title_recs[['est']].head(n)
        pretty_df = pd.merge(title_recs,test,on='title_year')
    return pretty_df[['title_year','rated','year','genre','director','plot','language','country','est']]

In [355]:
new_movie_rec('Spirited Away (2001)', 1250, 10)

Unnamed: 0,title_year,rated,year,genre,director,plot,language,country,est
0,Harry Potter and the Sorcerer's Stone (2001),PG,2001,"Adventure, Family, Fantasy",Chris Columbus,An orphaned boy enrolls in a school of wizardr...,English,"UK, USA",4.31
1,Shrek (2001),PG,2001,"Animation, Adventure, Comedy, Family, Fantasy","Andrew Adamson, Vicky Jenson",A mean lord exiles fairytale creatures to the ...,English,USA,4.27
2,Harry Potter and the Prisoner of Azkaban (2004),PG,2004,"Adventure, Family, Fantasy, Mystery",Alfonso Cuarón,It's Harry's third year at Hogwarts; not only ...,English,"UK, USA",4.24
3,Harry Potter and the Half-Blood Prince (2009),PG,2009,"Adventure, Family, Fantasy, Mystery",David Yates,As Harry Potter begins his sixth year at Hogwa...,English,"UK, USA",4.23
4,Shrek 2 (2004),PG,2004,"Animation, Adventure, Comedy, Family, Fantasy,...","Andrew Adamson, Kelly Asbury, Conrad Vernon",Princess Fiona's parents invite her and Shrek ...,English,USA,4.12
5,Song of the Sea (2014),PG,2014,"Animation, Adventure, Drama, Family, Fantasy",Tomm Moore,"Ben, a young Irish boy, and his little sister ...","English, Irish","Ireland, Denmark, Belgium, Luxembourg, France",4.08
6,Howl's Moving Castle (2004),PG,2004,"Animation, Adventure, Family, Fantasy",Hayao Miyazaki,When an unconfident young woman is cursed with...,Japanese,Japan,4.05
7,The Incredibles (2004),PG,2004,"Animation, Action, Adventure, Family",Brad Bird,"A family of undercover superheroes, while tryi...","English, French",USA,3.97
8,Tangled (2010),PG,2010,"Animation, Adventure, Comedy, Family, Fantasy,...","Nathan Greno, Byron Howard",The magically long-haired Rapunzel has spent h...,English,USA,3.89
9,Kiki's Delivery Service (1989),G,1989,"Animation, Adventure, Drama, Family, Fantasy",Hayao Miyazaki,"A young witch, on her mandatory year of indepe...","Japanese, English",Japan,3.86


In [356]:
new_movie_rec('Spirited Away (2001)', 118205, 10)

Unnamed: 0,title_year,rated,year,genre,director,plot,language,country,est
0,My Neighbor Totoro (1988),G,1988,"Animation, Family, Fantasy",Hayao Miyazaki,When two girls move to the country to be near ...,Japanese,Japan,3.95
1,Song of the Sea (2014),PG,2014,"Animation, Adventure, Drama, Family, Fantasy",Tomm Moore,"Ben, a young Irish boy, and his little sister ...","English, Irish","Ireland, Denmark, Belgium, Luxembourg, France",3.84
2,Howl's Moving Castle (2004),PG,2004,"Animation, Adventure, Family, Fantasy",Hayao Miyazaki,When an unconfident young woman is cursed with...,Japanese,Japan,3.83
3,Kiki's Delivery Service (1989),G,1989,"Animation, Adventure, Drama, Family, Fantasy",Hayao Miyazaki,"A young witch, on her mandatory year of indepe...","Japanese, English",Japan,3.8
4,Shrek (2001),PG,2001,"Animation, Adventure, Comedy, Family, Fantasy","Andrew Adamson, Vicky Jenson",A mean lord exiles fairytale creatures to the ...,English,USA,3.8
5,Nausicaä of the Valley of the Wind (1984),PG,1984,"Animation, Adventure, Fantasy, Sci-Fi",Hayao Miyazaki,Warrior and pacifist Princess Nausicaä despera...,"Japanese, English","Japan, USA",3.75
6,The Incredibles (2004),PG,2004,"Animation, Action, Adventure, Family",Brad Bird,"A family of undercover superheroes, while tryi...","English, French",USA,3.75
7,Castle in the Sky (1986),PG,1986,"Animation, Adventure, Drama, Fantasy, Sci-Fi",Hayao Miyazaki,A young boy and a girl with a magic crystal mu...,Japanese,Japan,3.74
8,The Tale of The Princess Kaguya (2013),PG,2013,"Animation, Adventure, Drama, Family, Fantasy",Isao Takahata,Found inside a shining stalk of bamboo by an o...,Japanese,Japan,3.7
9,Porco Rosso (1992),PG,1992,"Animation, Adventure, Comedy, Fantasy, Romance",Hayao Miyazaki,"In 1930s Italy, a veteran World War I pilot is...",Japanese,Japan,3.67


In [357]:
new_movie_rec('Mean Girls (2004)', 1250, 10)

Unnamed: 0,title_year,rated,year,genre,director,plot,language,country,est
0,Ferris Bueller's Day Off (1986),PG-13,1986,Comedy,John Hughes,A high school wise guy is determined to have a...,"English, German",USA,4.02
1,Anne of Green Gables (1985),TV-G,1985,"Drama, Family",,"An orphan girl, sent to an elderly brother and...",English,"Canada, West Germany, USA",3.98
2,The Terminal (2004),PG-13,2004,"Comedy, Drama, Romance",Steven Spielberg,An Eastern European tourist unexpectedly finds...,"English, Bulgarian, Spanish, Russian, Mandarin...",USA,3.9
3,50 First Dates (2004),PG-13,2004,"Comedy, Drama, Romance",Peter Segal,Henry Roth is a man afraid of commitment up un...,"English, Hawaiian, Mandarin, None",USA,3.82
4,Forgetting Sarah Marshall (2008),R,2008,"Comedy, Drama, Romance",Nicholas Stoller,Devastated Peter takes a Hawaiian vacation in ...,"English, Hawaiian, German, Japanese",USA,3.77
5,Bush's Brain (2004),PG-13,2004,Documentary,"Joseph Mealey, Michael Shoob",An examination of political consultant Karl Ro...,English,USA,3.74
6,Some Kind of Wonderful (1987),PG-13,1987,"Drama, Romance",Howard Deutch,"When Keith goes out with Amanda, the girl of h...",English,USA,3.74
7,Saved! (2004),PG-13,2004,"Comedy, Drama",Brian Dannelly,When a girl attending a Christian high school ...,English,"USA, Canada",3.74
8,Midnight in Paris (2011),PG-13,2011,"Comedy, Fantasy, Romance",Woody Allen,While on a trip to Paris with his fiancée's fa...,"English, French, Spanish, German","Spain, USA, France",3.72
9,Nick and Norah's Infinite Playlist (2008),PG-13,2008,"Comedy, Drama, Music, Romance",Peter Sollett,"High school student Nick O'Leary, member of th...",English,USA,3.71


In [358]:
new_movie_rec('Mean Girls (2004)', 118205, 10)

Unnamed: 0,title_year,rated,year,genre,director,plot,language,country,est
0,Anne of Green Gables (1985),TV-G,1985,"Drama, Family",,"An orphan girl, sent to an elderly brother and...",English,"Canada, West Germany, USA",3.93
1,Midnight in Paris (2011),PG-13,2011,"Comedy, Fantasy, Romance",Woody Allen,While on a trip to Paris with his fiancée's fa...,"English, French, Spanish, German","Spain, USA, France",3.79
2,Ferris Bueller's Day Off (1986),PG-13,1986,Comedy,John Hughes,A high school wise guy is determined to have a...,"English, German",USA,3.78
3,The Winning Season (2009),PG-13,2009,"Comedy, Sport",Jim Strouse,A comedy centered on a has-been coach who is g...,English,USA,3.67
4,Clueless (1995),PG-13,1995,"Comedy, Romance",Amy Heckerling,A rich high school student tries to boost a ne...,"English, Spanish",USA,3.53
5,The Bachelor and the Bobby-Soxer (1947),Approved,1947,"Comedy, Romance",Irving Reis,"A high school girl falls for a playboy artist,...","English, French",USA,3.52
6,Scott Pilgrim vs. the World (2010),PG-13,2010,"Action, Comedy, Fantasy, Romance",Edgar Wright,Scott Pilgrim must defeat his new girlfriend's...,English,"USA, UK, Canada, Japan",3.49
7,Enough Said (2013),PG-13,2013,"Comedy, Drama, Romance",Nicole Holofcener,A divorced woman who decides to pursue the man...,English,USA,3.48
8,Three O'Clock High (1987),PG-13,1987,Comedy,Phil Joanou,A nerd gets himself in hot water with the new ...,English,USA,3.47
9,The Terminal (2004),PG-13,2004,"Comedy, Drama, Romance",Steven Spielberg,An Eastern European tourist unexpectedly finds...,"English, Bulgarian, Spanish, Russian, Mandarin...",USA,3.47


# 4. Unwatched Movies Only

Finally, let's make a recommender that only returns movies the user hasn't before watched.

In [306]:
movies_df = test.copy()

In [363]:
movies_df.head()

Unnamed: 0,title_year,rated,imdbId,genre,director,writer,actors,plot,language,country,movieId,tmdbId,year
0,Toy Story (1995),G,114709,"Animation, Adventure, Comedy, Family, Fantasy",John Lasseter,"John Lasseter (original story by), Pete Docter...","Tom Hanks, Tim Allen, Don Rickles, Jim Varney",A cowboy doll is profoundly threatened and jea...,English,USA,1,862.0,1995
1,Jumanji (1995),PG,113497,"Adventure, Comedy, Family, Fantasy",Joe Johnston,"Jonathan Hensleigh (screenplay by), Greg Taylo...","Robin Williams, Jonathan Hyde, Kirsten Dunst, ...",When two kids find and play a magical board ga...,"English, French",USA,2,8844.0,1995
2,Grumpier Old Men (1995),PG-13,113228,"Comedy, Romance",Howard Deutch,"Mark Steven Johnson (characters), Mark Steven ...","Walter Matthau, Jack Lemmon, Sophia Loren, Ann...",John and Max resolve to save their beloved bai...,"English, Italian, German",USA,3,15602.0,1995
3,Waiting to Exhale (1995),R,114885,"Comedy, Drama, Romance",Forest Whitaker,"Terry McMillan (novel), Terry McMillan (screen...","Whitney Houston, Angela Bassett, Loretta Devin...","Based on Terry McMillan's novel, this film fol...",English,USA,4,31357.0,1995
4,Father of the Bride Part II (1995),PG,113041,"Comedy, Family, Romance",Charles Shyer,"Albert Hackett (screenplay ""Father's Little Di...","Steve Martin, Diane Keaton, Martin Short, Kimb...",George Banks must deal not only with the pregn...,English,USA,5,11862.0,1995


In [387]:
def rec_unwatched(title, Id, n, df=ratings, userId='userId', movieId='movieId', algo=algo, cosine_sim=cosine_sim):
    '''
    Returns movies by cosine similarity, with the user's estimated rating of them attached.
    '''
    
    recommended_movies = []

    # Grab the indices for the top 100 movies by cosine similarity
    idx = indices[indices == title].index[0]
    score_series = pd.Series(cosine_sim[3124]).sort_values(ascending = False)
    ordered_indices = list(score_series.iloc[1:100].index) # Filtering out itself
    for i in ordered_indices:
            recommended_movies.append(list(movies.index)[i])
        
    # Next, build the dataframe for these movies
    # Using 'movies' dataframe from earlier
    title_a = recommendations(title)
    title_recs = movies.loc[title_a].reset_index(inplace=False)
    title_recs = title_recs.set_index('title_year')
    
    # Get rid of movies the user has previously seen
    sim_movs = title_recs[movieId].unique()
    temp_df = df[df[userId] == Id]
    watched_movs = temp_df[movieId].unique()
    unwatched = np.setdiff1d(sim_movs,watched_movs)
    
    new_movs = pd.DataFrame()
    new_movs['movieId'] = unwatched
    new_movs['est'] = new_movs[movieId].apply(lambda x: round(algo.predict(Id,x).est,2))
    new_movs = pd.merge(new_movs,movies_df,on='movieId')

    new_movs = new_movs.sort_values(by='est', ascending=False)
    new_movs = new_movs.set_index('title_year')
    
    # Finally, return n movies in the list
    if n == None:
        return new_movs[['rated','year','genre','director','plot','language','country','est']].dropna()
    elif n == 0:
        return new_movs[['rated','year','genre','director','plot','language','country','est']].dropna()
    else:
        return new_movs[['rated','year','genre','director','plot','language','country','est']].dropna().head(n)

In [388]:
rec_unwatched('Se7en (1995)', 118205, 10)

Unnamed: 0_level_0,rated,year,genre,director,plot,language,country,est
title_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Citizen X (1995),R,1995,"Biography, Crime, Drama, History, Thriller",Chris Gerolmo,"During the 1980s, Soviet authorities hunt for ...",English,"USA, Hungary",3.81
The Crimson Kimono (1959),APPROVED,1959,"Crime, Drama, Mystery",Samuel Fuller,Two detectives seek a stripper's killer in the...,"English, Japanese",USA,3.41
Maniac Cop 2 (1990),R,1990,"Crime, Horror, Action, Thriller",William Lustig,"A supernatural, maniac killer cop teams up wit...",English,USA,3.38
Far from Home (1989),R,1989,"Crime, Drama, Mystery, Thriller",Meiert Avis,A teenage girl and her father driving cross-co...,English,USA,3.37
B.T.K. (2008),R,2008,"Biography, Crime, Drama, Horror, Mystery, Thri...",Michael Feifer,"Inspired by the real-life serial killer, B.T.K...",English,USA,3.36
Whistling in Brooklyn (1943),PASSED,1943,"Comedy, Crime, Mystery",S. Sylvan Simon,"Radio crime show host ""The Fox"" finds himself ...",English,USA,3.31
The Frozen Ground (2013),R,2013,"Crime, Drama, Mystery, Thriller",Scott Walker,An Alaska State Trooper partners with a young ...,"English, French",USA,3.3
Lila & Eve (2015),R,2015,"Crime, Drama, Mystery, Thriller",Charles Stone III,"Two distraught mothers, whose children were gu...",English,USA,3.29
The Nature of the Beast (1995),R,1995,"Crime, Mystery, Thriller",Victor Salva,"Two men, both hiding a deadly secret, are on a...",English,"USA, Canada",3.28
Enduring Love (2004),R,2004,"Drama, Mystery, Romance, Thriller",Roger Michell,Two strangers become dangerously close after w...,English,UK,3.23


In [389]:
new_movie_rec('Se7en (1995)', 118205, 10)

Unnamed: 0,title_year,rated,year,genre,director,plot,language,country,est
0,Witness for the Prosecution (1957),Not Rated,1957,"Crime, Drama, Mystery, Thriller",Billy Wilder,A veteran British barrister must defend his cl...,"English, German",USA,4.07
1,And Then There Were None (1945),Approved,1945,"Crime, Drama, Mystery, Thriller",René Clair,"Seven guests, a newly hired personal secretary...",English,USA,3.91
2,The Spiral Staircase (1946),Approved,1946,"Drama, Mystery, Thriller",Robert Siodmak,"In 1916, a shadowy serial killer is targeting ...",English,USA,3.83
3,Citizen X (1995),R,1995,"Biography, Crime, Drama, History, Thriller",Chris Gerolmo,"During the 1980s, Soviet authorities hunt for ...",English,"USA, Hungary",3.81
4,Rope (1948),PG,1948,"Crime, Drama, Mystery, Thriller",Alfred Hitchcock,Two men attempt to prove they committed the pe...,English,USA,3.81
5,The Last of Sheila (1973),PG,1973,"Crime, Drama, Mystery, Thriller",Herbert Ross,A year after Sheila is killed in a hit-and-run...,"English, French",USA,3.77
6,Mississippi Burning (1988),R,1988,"Crime, Drama, History, Mystery, Thriller",Alan Parker,"Two F.B.I. Agents, with wildly different style...",English,USA,3.77
7,Monster (2003),R,2003,"Biography, Crime, Drama, Thriller",Patty Jenkins,"Based on the life of Aileen Wuornos, a Daytona...",English,"Germany, USA",3.74
8,Gone Baby Gone (2007),R,2007,"Crime, Drama, Mystery, Thriller",Ben Affleck,Two Boston area detectives investigate a littl...,English,USA,3.73
9,Dolores Claiborne (1995),R,1995,"Crime, Drama, Mystery, Thriller",Taylor Hackford,A big-city reporter travels to the small town ...,English,USA,3.66


In [390]:
new_movie_rec('Spirited Away (2001)', 118205, 10)

Unnamed: 0,title_year,rated,year,genre,director,plot,language,country,est
0,My Neighbor Totoro (1988),G,1988,"Animation, Family, Fantasy",Hayao Miyazaki,When two girls move to the country to be near ...,Japanese,Japan,3.95
1,Song of the Sea (2014),PG,2014,"Animation, Adventure, Drama, Family, Fantasy",Tomm Moore,"Ben, a young Irish boy, and his little sister ...","English, Irish","Ireland, Denmark, Belgium, Luxembourg, France",3.84
2,Howl's Moving Castle (2004),PG,2004,"Animation, Adventure, Family, Fantasy",Hayao Miyazaki,When an unconfident young woman is cursed with...,Japanese,Japan,3.83
3,Kiki's Delivery Service (1989),G,1989,"Animation, Adventure, Drama, Family, Fantasy",Hayao Miyazaki,"A young witch, on her mandatory year of indepe...","Japanese, English",Japan,3.8
4,Shrek (2001),PG,2001,"Animation, Adventure, Comedy, Family, Fantasy","Andrew Adamson, Vicky Jenson",A mean lord exiles fairytale creatures to the ...,English,USA,3.8
5,Nausicaä of the Valley of the Wind (1984),PG,1984,"Animation, Adventure, Fantasy, Sci-Fi",Hayao Miyazaki,Warrior and pacifist Princess Nausicaä despera...,"Japanese, English","Japan, USA",3.75
6,The Incredibles (2004),PG,2004,"Animation, Action, Adventure, Family",Brad Bird,"A family of undercover superheroes, while tryi...","English, French",USA,3.75
7,Castle in the Sky (1986),PG,1986,"Animation, Adventure, Drama, Fantasy, Sci-Fi",Hayao Miyazaki,A young boy and a girl with a magic crystal mu...,Japanese,Japan,3.74
8,The Tale of The Princess Kaguya (2013),PG,2013,"Animation, Adventure, Drama, Family, Fantasy",Isao Takahata,Found inside a shining stalk of bamboo by an o...,Japanese,Japan,3.7
9,Porco Rosso (1992),PG,1992,"Animation, Adventure, Comedy, Fantasy, Romance",Hayao Miyazaki,"In 1930s Italy, a veteran World War I pilot is...",Japanese,Japan,3.67


In [391]:
rec_unwatched('Spirited Away (2001)', 118205, 10)

Unnamed: 0_level_0,rated,year,genre,director,plot,language,country,est
title_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Song of the Sea (2014),PG,2014,"Animation, Adventure, Drama, Family, Fantasy",Tomm Moore,"Ben, a young Irish boy, and his little sister ...","English, Irish","Ireland, Denmark, Belgium, Luxembourg, France",3.84
Nausicaä of the Valley of the Wind (1984),PG,1984,"Animation, Adventure, Fantasy, Sci-Fi",Hayao Miyazaki,Warrior and pacifist Princess Nausicaä despera...,"Japanese, English","Japan, USA",3.75
The Tale of The Princess Kaguya (2013),PG,2013,"Animation, Adventure, Drama, Family, Fantasy",Isao Takahata,Found inside a shining stalk of bamboo by an o...,Japanese,Japan,3.7
Porco Rosso (1992),PG,1992,"Animation, Adventure, Comedy, Fantasy, Romance",Hayao Miyazaki,"In 1930s Italy, a veteran World War I pilot is...",Japanese,Japan,3.67
Millennium Actress (2001),PG,2001,"Animation, Drama, Fantasy, Romance",Satoshi Kon,A TV interviewer and his cameraman meet a form...,"Japanese, English",Japan,3.65
Ponyo (2008),G,2008,"Animation, Adventure, Comedy, Family, Fantasy",Hayao Miyazaki,A five-year-old boy develops a relationship wi...,Japanese,Japan,3.62
The Cat Returns (2002),G,2002,"Animation, Adventure, Comedy, Family, Fantasy,...",Hiroyuki Morita,"After helping a cat, a 17-year-old girl finds ...","Japanese, English",Japan,3.57
Rise of the Guardians (2012),PG,2012,"Animation, Action, Adventure, Comedy, Family, ...",Peter Ramsey,When the evil spirit Pitch launches an assault...,"English, French",USA,3.52
My Neighbors the Yamadas (1999),PG,1999,"Animation, Comedy, Family",Isao Takahata,The life and misadventures of a family in cont...,Japanese,Japan,3.51
Pom Poko (1994),PG,1994,"Animation, Comedy, Drama, Family, Fantasy",Isao Takahata,A community of magical shape-shifting raccoon ...,"Japanese, English",Japan,3.49
