# Load Function notebook

In [1]:
%run Functions.ipynb 

# Model 1: Collab Filtering: KNN with means

## Read the model from saved file

In [2]:
import pickle
knn_filename = 'Model_2_KNN_Means_ratingbased.sav'
knn_model_full = pickle.load(open(knn_filename, 'rb'))
knn_model=knn_model_full.model

# Model 2: Content Based, Cosine Similarity

## Directly read Femke's notebook

> Important to identify the following:
> cosine_sim and cosine_sim_df

In [3]:
%run Model_3_tf-idf.ipynb 

Shape: (9742, 9742)


# Hybrid Model
Hybrid Recommender leverages the best of both Content based and collaborative filtering techniques.

In [39]:
# Hybrid model
class HybridModel:
    def __init__(self, content_model, cf_model):
        self.content_model = content_model
        self.cf_model = cf_model
        
    def recommend_movies(self, user_input, movies_df, n=10):
        
        # Use the Process_Avg_Rating function to manipulate the main df and find the 
        # avg rating
        
        movies_df_summary=Process_Avg_Rating(movies_df)
        #--------------------------------------
        #Find out movie Year
        # If there are multiple movies with same name but different years, it takes
        # the latest
        #--------------------------------------
        user_year=movies_df[movies_df["title"]==user_input]['year'].unique()[-1]
        
        user_title_year=user_input+" ("+str(user_year)+")"
        print(user_title_year)
        
        #--------------------------------------
        # Content Based
        #--------------------------------------
        
        # Find the top 1000 similar movies based on the content-based model
        similar_movies_cos_sim=genre_recommendations(user_title_year,cosine_sim_df,movies,100)
        
        #........................................
        # detach the title-year again
        #........................................
        
        #Extracting the year from the title column and create a new column that contains the release "year"
        similar_movies_cos_sim["year"] = similar_movies_cos_sim['title'].replace(to_replace='[^0-9]', value='', regex=True).str[-4:].astype(int)
        
        #removing the year and paranthese from the tilte column
        similar_movies_cos_sim["title"] = similar_movies_cos_sim["title"].str.replace("(\(.*?\))", "").str.strip()
        
        #Merge
        similar_movies_cos_sim_df=pd.merge(similar_movies_cos_sim,movies_df_summary,how='left', left_on=['title','genres','year'], right_on = ['title','genres','year'])
        
        #--------------------------------------
        # Col. filter Based
        #--------------------------------------
        
        # Find the top 100 similar movies based on the Coll filter model
        similar_movies_knn=recommend_similar_items(user_input,movies_df,knn_model,100)
        
        #Take the common 10 movies
       
        similar_movies_common=pd.merge(similar_movies_knn,similar_movies_cos_sim_df, how='inner', on=['title','average rating','number of ratings'])
        similar_movies_common=similar_movies_common.sort_values(['average rating'], ascending=[False]).head(10)

        return similar_movies_cos_sim_df, similar_movies_knn,similar_movies_common

## Read the df, and Ask for input

In [5]:
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
df=pd.read_csv("../../Data/ml-latest-small/PreprocessedData_ml_latest_year_small.csv",index_col=0)
#df["title"] = df["title"].str.lower()
df.head()

Unnamed: 0,userId,movieId,rating,title,genres,year
0,1,1,4.0,Toy Story,Adventure|Animation|Children|Comedy|Fantasy,1995
1,5,1,4.0,Toy Story,Adventure|Animation|Children|Comedy|Fantasy,1995
2,7,1,4.5,Toy Story,Adventure|Animation|Children|Comedy|Fantasy,1995
3,15,1,2.5,Toy Story,Adventure|Animation|Children|Comedy|Fantasy,1995
4,17,1,4.5,Toy Story,Adventure|Animation|Children|Comedy|Fantasy,1995


In [6]:
#Ask user for the movie name
user_input=AskForUserInput(df)
#Or if you want to hardcode it
#user_input="Toy Story"

Enter your Favorite Movie: Harry Potter and the Order of the Phoenix



## Recommend movies


In [45]:
hybrid_model=HybridModel(cosine_sim, knn_model)

In [46]:
r_content,r_cf,r_hybrid=hybrid_model.recommend_movies(user_input,df,10)
print("Recommendations, Content based:")
r_content.head(10)

Harry Potter and the Order of the Phoenix (2007)
Recommendations, Content based:


  similar_movies_cos_sim["title"] = similar_movies_cos_sim["title"].str.replace("(\(.*?\))", "").str.strip()


Unnamed: 0,title,genres,year,average rating,number of ratings
0,Jack the Giant Slayer,Adventure|Fantasy|IMAX,2013,2.2,5
1,"Hobbit: The Desolation of Smaug, The",Adventure|Fantasy|IMAX,2013,3.58,25
2,"Hobbit: An Unexpected Journey, The",Adventure|Fantasy|IMAX,2012,3.81,40
3,Alice in Wonderland,Adventure|Fantasy|IMAX,2010,2.88,28
4,Harry Potter and the Prisoner of Azkaban,Adventure|Fantasy|IMAX,2004,3.91,93
5,Thor,Action|Adventure|Drama|Fantasy|IMAX,2011,3.51,34
6,"Twilight Saga: Breaking Dawn - Part 2, The",Adventure|Drama|Fantasy|Romance|IMAX,2012,1.88,4
7,Harry Potter and the Goblet of Fire,Adventure|Fantasy|Thriller|IMAX,2005,3.82,71
8,Oz the Great and Powerful,Action|Adventure|Fantasy|IMAX,2013,3.08,6
9,Wrath of the Titans,Action|Adventure|Fantasy|IMAX,2012,2.62,4


In [47]:
print("Recommendations based on CF:")
r_cf.head(10)

Recommendations based on CF:


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,average rating,number of ratings
movieId,title,year,genres,Unnamed: 4_level_1,Unnamed: 5_level_1
17,Sense and Sensibility,1995,Drama|Romance,3.78,67
231,Dumb & Dumber,1994,Adventure|Comedy,3.06,133
316,Stargate,1994,Action|Adventure|Sci-Fi,3.38,140
356,Forrest Gump,1994,Comedy|Drama|Romance|War,4.16,329
364,"Lion King, The",1994,Adventure|Animation|Children|Drama|Musical|IMAX,3.94,172
520,Robin Hood: Men in Tights,1993,Comedy,3.13,69
592,Batman,1989,Action|Crime|Thriller,3.43,189
595,Beauty and the Beast,1991,Animation|Children|Fantasy|Musical|Romance|IMAX,3.77,146
597,Pretty Woman,1990,Comedy|Romance,3.49,135
780,Independence Day,1996,Action|Adventure|Sci-Fi|Thriller,3.45,202


In [48]:
print("Recommendations, naive hybrid:")

r_hybrid.head(10)

Recommendations, naive hybrid:


Unnamed: 0,title,average rating,number of ratings,genres,year
3,"Lord of the Rings: The Return of the King, The",4.12,185,Action|Adventure|Drama|Fantasy,2003
0,"Lord of the Rings: The Fellowship of the Ring,...",4.11,198,Adventure|Fantasy,2001
2,"Lord of the Rings: The Two Towers, The",4.02,188,Adventure|Fantasy,2002
10,Harry Potter and the Deathly Hallows: Part 1,3.99,47,Action|Adventure|Fantasy|IMAX,2010
4,Harry Potter and the Prisoner of Azkaban,3.91,93,Adventure|Fantasy|IMAX,2004
12,Harry Potter and the Deathly Hallows: Part 2,3.91,50,Action|Adventure|Drama|Fantasy|Mystery|IMAX,2011
8,Harry Potter and the Half-Blood Prince,3.89,58,Adventure|Fantasy|Mystery|Romance|IMAX,2009
13,"Avengers, The",3.87,69,Action|Adventure|Sci-Fi|IMAX,2012
18,Star Wars: Episode VII - The Force Awakens,3.85,41,Action|Adventure|Fantasy|Sci-Fi|IMAX,2015
6,Harry Potter and the Goblet of Fire,3.82,71,Adventure|Fantasy|Thriller|IMAX,2005
