# Recommendation Engine Hybrid Model

Anaysis by Brendan Bullivant & Frank Flavell

## Overview

To provide Disney Plus with a more effective recommendation engine that will promote their long tail intellectual property, we decided to merge our SVDpp Collaborative Filter system with our Content-Based system so we could balance out the advantages and disadvantages of both approaches.

The result is a list of movies similar to your favorites, regardless of their user ratings, as well as well rated classics from the Sleeping Giant subset, which removes the impact of outliers.

Recommendations often promote movies that match a user's tastes while also recommending more popular crowd pleasers from the past.

# Package Import

In [9]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.formula.api import ols
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats.stats import pearsonr
import scipy as sp
from surprise.model_selection import train_test_split
from surprise.prediction_algorithms import knns
from surprise.similarities import cosine, msd, pearson
from surprise import accuracy
from surprise.model_selection import cross_validate
from surprise import Reader, Dataset
from surprise.prediction_algorithms import SVD, SVDpp
from surprise.prediction_algorithms import KNNWithMeans, KNNBasic, KNNBaseline, KNNWithZScore
from surprise.model_selection import GridSearchCV
from surprise.prediction_algorithms.random_pred import NormalPredictor
from surprise.prediction_algorithms.baseline_only import BaselineOnly
#from pyspark.ml.recommendation import ALS
import random

# Data Import

We import the SVDpp model pickled from the SVDpp Notebook.

In [10]:
svd = pd.read_pickle('movie_rec_svd_rmse_81.pickle')

We import the movies.csv

In [11]:
df_movies = pd.read_csv('ml-latest-small/movies.csv')

We import the list of sleeping giants to improve the recommendations of the SVDpp model.

In [12]:
sl_giants = pd.read_pickle('sl_giants_ids.pickle')

In [15]:
def movie_rater(movie_df,num, genre=None):
    userID = 1000
    rating_list = []
    while num > 0:
        if genre:
            movie = movie_df[movie_df['genres'].str.contains(genre)].sample(1)
        else:
            movie = movie_df.sample(1)
        print(movie)
        rating = input('How do you rate this movie on a scale of 1-5, press n if you have not seen :\n')
        if rating == 'n':
            continue
        else:
            rating_one_movie = {'userId':userID,'movieId':movie['movieId'].values[0],'rating':rating}
            rating_list.append(rating_one_movie) 
            num -= 1
    return rating_list 

In [16]:
list_of_movies = []
for m_id in sl_giants:
    list_of_movies.append((m_id,svd.predict(1000,m_id)[3]))

In [17]:
ranked_movies = sorted(list_of_movies, key=lambda x:x[1], reverse=True)

In [18]:
def recommended_movies(user_ratings,movie_title_df,n):
        for idx, rec in enumerate(user_ratings):
            title = movie_title_df.loc[movie_title_df['movieId'] == int(rec[0])]['title']
            print('Recommendation # ', idx+1, ': ', title, '\n')
            n-= 1
            if n == 0:
                break
            
recommended_movies(ranked_movies,df_movies,5)

Recommendation #  1 :  906    Lawrence of Arabia (1962)
Name: title, dtype: object 

Recommendation #  2 :  841    Streetcar Named Desire, A (1951)
Name: title, dtype: object 

Recommendation #  3 :  947    Touch of Evil (1958)
Name: title, dtype: object 

Recommendation #  4 :  680    Philadelphia Story, The (1940)
Name: title, dtype: object 

Recommendation #  5 :  2582    Guess Who's Coming to Dinner (1967)
Name: title, dtype: object 



In [19]:
def top_recs(df_movies, num, genre=None):
    x=movie_rater(df_movies,num, genre=None)
    # make predictions for the user
    list_of_movies = []
    for m_id in sl_giants:
        list_of_movies.append((m_id,svd.predict(1000, m_id)[3]))
    # order the predictions from highest to lowest rated
    ranked_movies = sorted(list_of_movies, key=lambda x:x[1], reverse=True)
    top30_random10 = random.sample(ranked_movies[:30], 10)
    n=5
    new_list = []
    for idx, rec in enumerate(top30_random10):
        title = df_movies.loc[df_movies['movieId'] == int(rec[0])]['title'].item()
        new_list.append(title)   
        #print('Recommendation # ', idx+1, ': ', title, '\n')
        n-= 1
        if n == 0:
            return new_list

In [61]:
top_recs(df_movies, 4)

     movieId                             title                          genres
273      314  Secret of Roan Inish, The (1994)  Children|Drama|Fantasy|Mystery
How do you rate this movie on a scale of 1-5, press n if you have not seen :
3
      movieId           title  genres
2686     3597  Whipped (2000)  Comedy
How do you rate this movie on a scale of 1-5, press n if you have not seen :
4
      movieId                      title genres
5430    25923  Great Expectations (1946)  Drama
How do you rate this movie on a scale of 1-5, press n if you have not seen :
3
      movieId                             title  genres
2813     3760  Kentucky Fried Movie, The (1977)  Comedy
How do you rate this movie on a scale of 1-5, press n if you have not seen :
4


  del sys.path[0]


['Sunset Blvd. (a.k.a. Sunset Boulevard) (1950)',
 'In the Name of the Father (1993)',
 'Harold and Maude (1971)',
 'Streetcar Named Desire, A (1951)',
 'Boondock Saints, The (2000)']

In [23]:
movie_content_cleaned_df = pd.read_pickle('movie_content_cleaned.pickle')

In [24]:
indices = pd.Series(movie_content_cleaned_df.index)

In [41]:
indices

0                                Toy Story (1995)
1                                  Jumanji (1995)
2                         Grumpier Old Men (1995)
3                        Waiting to Exhale (1995)
4              Father of the Bride Part II (1995)
                          ...                    
9579     Jon Stewart Has Left the Building (2015)
9580    Black Butler: Book of the Atlantic (2017)
9581                 No Game No Life: Zero (2017)
9582                                 Flint (2017)
9583          Bungo Stray Dogs: Dead Apple (2018)
Name: title, Length: 9584, dtype: object

In [25]:
similarity_matrix = pd.read_pickle('cosine_sim_matrix.pickle')

In [37]:
def recommendations(name, df_movies, num, similarity_matrix = similarity_matrix, genre=None):
    x = top_recs(df_movies,4)
    recommended_movies = []
    
    # getting the index of the movie that matches the name
    idx = indices[indices == name].index[0]

    # creating a Series with the similarity scores in descending order
    score_series = pd.Series(similarity_matrix[idx]).sort_values(ascending = False)

    # getting the indexes of the 10 most similar movies except itself
    top_10_indexes = list(score_series.iloc[1:11].index)
    
    #Cross reference top n indexes with sleeping giants (and/or giants)
    
    # populating the list with the names of the top 10 matching movies
    for i in top_10_indexes:
        recommended_movies.append(list(movie_content_cleaned_df.index)[i])
    for mov in recommended_movies[0:5]:
        x.append(mov)
    #x = x.reverse()
    return x[::-1]

In [None]:
Jumanji (1995)

In [45]:
recomendations = input("what types of movies would you like me to recommend for ya?")
recommendations(recomendations, df_movies, 4)

what types of movies would you like me to recommend for ya?Jumanji (1995)
      movieId                     title   genres
8757   128360  The Hateful Eight (2015)  Western
How do you rate this movie on a scale of 1-5, press n if you have not seen :
5
      movieId                                           title       genres
4311     6299  Winged Migration (Peuple migrateur, Le) (2001)  Documentary
How do you rate this movie on a scale of 1-5, press n if you have not seen :
4
      movieId                           title                genres
5030     7832  Thin Man Goes Home, The (1945)  Comedy|Crime|Mystery
How do you rate this movie on a scale of 1-5, press n if you have not seen :
5
      movieId                           title genres
1077     1397  Bastard Out of Carolina (1996)  Drama
How do you rate this movie on a scale of 1-5, press n if you have not seen :
2


  del sys.path[0]


['Percy Jackson: Sea of Monsters (2013)',
 'Guardians of the Galaxy (2014)',
 'Doctor Who: Last Christmas (2014)',
 'Zathura (2005)',
 'Spiderwick Chronicles, The (2008)',
 'Hoop Dreams (1994)',
 'Raging Bull (1980)',
 'Ran (1985)',
 'Outlaw Josey Wales, The (1976)',
 'High Noon (1952)']