# Build your own movie recommender system
Dataset: https://grouplens.org/datasets/movielens/

In [8]:
import numpy as np
import pandas as pd
import ipywidgets as widgets
from IPython.display import Javascript, display
from surprise import SVD

from CFRecommenderSystem.CFMovieSystem import CFMovieSystem
from CFRecommenderSystem.CFData import CFData
from CFRecommenderSystem.CFModel import CFModel

dir_path = './data/ml-latest-small/'
movies_csv = dir_path + 'movies.csv'
ratings_csv = dir_path + 'ratings.csv'
links_csv = dir_path + 'links.csv'
#tags_csv = dir_path + 'tags.csv'

df_movie = pd.read_csv(movies_csv)
df_rating = pd.read_csv(ratings_csv)
df_link = pd.read_csv(links_csv)
#df_tag = pd.read_csv(tags_csv)

my_tmdb_key = ''

In [2]:
df_movie.head(3)

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance


In [3]:
df_rating.head(3)

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182


# Prepare the dataset for training

In [4]:
# Load rating datat to CFData class
df_data = df_rating[['userId','movieId', 'rating',]]
df_data = df_data.rename(index=str, columns={'userId': 'userID', 'movieId': 'itemID', 'rating': 'rating'})
df_id_name_table = df_movie[['movieId', 'title']]
df_id_name_table = df_id_name_table.rename(index=str, columns={'movieId':'itemID', 'title':'itemName'})

# Model training for collaborative filtering by Funk-SVD

In [5]:
# Create CFData instance
data_movie = CFData(df_data, test_ratio=None, df_id_name_table=df_id_name_table, rating_scale=(0.5, 5))
# Create CFModel instanc
model_svd = CFModel(SVD, lr_all=0.005, reg_all=0.4, n_epochs=30)
model_svd.fit(data_movie.trainset)
# Create CFDMovieSystem instance
cf_movie_sys = CFMovieSystem(data_movie, model_svd)

# Movie recommendation based on user-selected movie

In [6]:
def get_most_rated_movie(df_movie_in, df_rating_in, n_output):
    movie_list_tmp1 = pd.merge(df_movie, df_rating, on='movieId', how='inner').groupby('title').count()   
    movie_list_top_k = movie_list_tmp1['rating'].sort_values(ascending=False).index[:n_output]
    return movie_list_top_k
def run_next_cell(ev):
    display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, \
                      IPython.notebook.get_selected_index()+2)'))

# Create a top-50 most rated movie list in drop-down
movie_list_top_50 = get_most_rated_movie(df_movie, df_rating, 50)
selected_movie_name = widgets.Dropdown(options=movie_list_top_50, value='Forrest Gump (1994)', 
                                       description='Select a ovie:')
button = widgets.Button(description="Top-10 movies recommended to you", layout=widgets.Layout(width='40%', height='40px'))
button.on_click(run_next_cell)
widgets.VBox([selected_movie_name, button])

VkJveChjaGlsZHJlbj0oRHJvcGRvd24oZGVzY3JpcHRpb249dSdTZWxlY3QgYSBvdmllOicsIG9wdGlvbnM9KCdGb3JyZXN0IEd1bXAgKDE5OTQpJywgJ1B1bHAgRmljdGlvbiAoMTk5NCknLCDigKY=


In [7]:
# Obtain input movie name
movie_name = selected_movie_name.value
if my_tmdb_key:
    # Show poster recommendation if my_tmdb_key is present
    cf_movie_sys.show_recommended_movies(movie_name, k=10, tmdb_key=my_tmdb_key, df_ml_imdb_id=df_link)
else:
    # Show text-only recommendation 
    cf_movie_sys.show_recommended_movies(movie_name, k=10)

Movie you select is 'Forrest Gump (1994)'


Based on 'Forrest Gump (1994)', we recommend 10 movies below:


['Dances with Wolves (1990)', 'Darfur Now (2007)', 'Lifeboat (1944)', 'Summer Lovers (1982)', 'Die Hard (1988)', 'In the Mouth of Madness (1995)', 'Jungle Book (1942)', 'Client, The (1994)', 'No One Lives (2012)', 'Picture of Dorian Gray, The (1945)']
