In [None]:
# Hybrid Model
# Combination of Content-based and Collaborative Filtering

In [1]:
%run Collaborative_Filtering_Model_Based_Final

In [2]:
%run Content_based_feature_based_Model

In [45]:
import pandas as pd
import numpy as np

# import datasets
movie_cleaned = pd.read_csv('movies_matched.csv')  # cleaned imdb movies dataset(metadata), refer to data cleaning file
MovieLens_movies = pd.read_csv('movies.csv')
ML_ratings = pd.read_csv('ratings.csv')
ML_links = pd.read_csv('links.csv')

In [None]:
# We have 8562 movies avaiable in metadata set (combined)

In [59]:
ML_links.head()

Unnamed: 0,movieId,imdbId,tmdbId
0,1,114709,862.0
1,2,113497,8844.0
2,3,113228,15602.0
3,4,114885,31357.0
4,5,113041,11862.0


In [46]:
movie_cleaned.dtypes

imdb_title_id           int64
title                  object
original_title         object
year                    int64
date_published         object
genre                  object
duration                int64
country                object
language               object
director               object
writer                 object
production_company     object
actors                 object
description            object
avg_vote              float64
votes                   int64
dtype: object

In [47]:
movie_cleaned = movie_cleaned.reset_index()
titles = movie_cleaned['original_title']
indices = pd.Series(movie_cleaned.index, index=movie_cleaned['original_title'])

In [48]:
# Create an index map of movie titles
indices

original_title
The Birth of a Nation                                  0
20,000 Leagues Under the Sea                           1
Intolerance: Love's Struggle Throughout the Ages       2
Snow White                                             3
Daddy-Long-Legs                                        4
                                                    ... 
Mamma Mia! Here We Go Again                         8557
Aos Teus Olhos                                      8558
BlacKkKlansman                                      8559
Superfly                                            8560
Bungou Stray Dogs: Dead Apple                       8561
Length: 8562, dtype: int64

In [49]:
ML_links.dtypes

movieId      int64
imdbId       int64
tmdbId     float64
dtype: object

In [50]:
movie_cleaned.dtypes

index                   int64
imdb_title_id           int64
title                  object
original_title         object
year                    int64
date_published         object
genre                  object
duration                int64
country                object
language               object
director               object
writer                 object
production_company     object
actors                 object
description            object
avg_vote              float64
votes                   int64
dtype: object

In [51]:
# mapping MovieLens movie id to imdb movie title id
# extract MovieLens movieId and imdbid
id_map = ML_links[['movieId', 'imdbId']]
id_map.columns = ['movieId', 'imdb_title_id']

# merge two datasets using inner join (add MovieLens movie ids to the imdb metadata set)
id_map = pd.merge(id_map, movie_cleaned[['original_title', 'imdb_title_id']], how="inner", on='imdb_title_id')

In [52]:
# set movie title as index
# id_map shows the ML id and imdb id for each movie
id_map=id_map.set_index('original_title')

In [53]:
id_map.head(5)

Unnamed: 0_level_0,movieId,imdb_title_id
original_title,Unnamed: 1_level_1,Unnamed: 2_level_1
Toy Story,1,114709
Jumanji,2,113497
Grumpier Old Men,3,113228
Waiting to Exhale,4,114885
Father of the Bride Part II,5,113041


In [54]:
# indices_map shows the ML id for each imdb id
# used later to retrive ML movie id for collaborative filtering
indices_map = id_map.set_index('imdb_title_id')

In [55]:
indices_map.head(5)

Unnamed: 0_level_0,movieId
imdb_title_id,Unnamed: 1_level_1
114709,1
113497,2
113228,3
114885,4
113041,5


In [56]:
# Function that combines two models
def hybrid(userId, original_title):
    # Get the index of the movie that matches the title
    idx = indices[original_title]
    # Get the imdbid of the movie that matches the title
    imdbId = id_map.loc[original_title]['imdb_title_id']
    # Get the movieid of the movie that matches the title
    movie_id = id_map.loc[original_title]['movieId']
    
    # content based - Feature-based get_recommendations function
    # Get the pairwsie similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim2[int(idx)]))
    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:11]
    # return the movie indices for top 10 most sililar movies
    movie_indices = [i[0] for i in sim_scores]
    
    # collaborative filtering - user-based
    movie = movie_cleaned.iloc[movie_indices][['original_title','year','genre','avg_vote','votes','imdb_title_id']]
    # predict user ratings on movies
    # .est retrive the predicted ratings
    movie['est'] = movie['imdb_title_id'].apply(lambda x: svd.predict(userId, indices_map.loc[x]['movieId']).est) 
    # sort the movies based on predicted ratings
    movie = movie.sort_values('est', ascending=False)
    # return the top 10 movies
    return movie.head(10)

In [60]:
hybrid(1, 'Iron Man 2')

Unnamed: 0,original_title,year,genre,avg_vote,votes,imdb_title_id,est
7931,Spider-Man: Homecoming,2017,"Action, Adventure, Sci-Fi",7.4,494060,2250912,4.98418
8418,Avengers: Infinity War,2018,"Action, Adventure, Sci-Fi",8.4,796486,4154756,4.653009
8020,Avengers: Age of Ultron,2015,"Action, Adventure, Sci-Fi",7.3,722685,2395427,4.58979
5403,Spider-Man 2,2004,"Action, Adventure, Sci-Fi",7.3,532565,316654,4.491766
6649,The Avengers,2012,"Action, Adventure, Sci-Fi",8.0,1241220,848228,4.487629
5754,Iron Man,2008,"Action, Adventure, Sci-Fi",7.9,920706,371746,4.455945
8303,Captain America: Civil War,2016,"Action, Adventure, Sci-Fi",7.8,644241,3498820,4.389638
7196,Iron Man Three,2013,"Action, Adventure, Sci-Fi",7.2,739816,1300854,4.29689
4348,Spider-Man,2002,"Action, Adventure, Sci-Fi",7.3,675212,145487,4.258243
1,"20,000 Leagues Under the Sea",1916,"Action, Adventure, Sci-Fi",6.2,1501,6333,4.046815


In [67]:
hybrid(7, 'Star Trek')

Unnamed: 0,original_title,year,genre,avg_vote,votes,imdb_title_id,est
8064,Star Wars: Episode VII - The Force Awakens,2015,"Action, Adventure, Sci-Fi",7.9,845102,2488496,3.61158
1640,Star Trek II: The Wrath of Khan,1982,"Action, Adventure, Sci-Fi",7.7,110769,84726,3.534954
5416,Mission: Impossible III,2006,"Action, Adventure, Thriller",6.9,324310,317919,3.420791
7304,Star Trek Into Darkness,2013,"Action, Adventure, Sci-Fi",7.7,458568,1408101,3.375448
8117,Star Trek Beyond,2016,"Action, Adventure, Sci-Fi",7.1,223293,2660888,3.047467
7565,Super 8,2011,"Action, Adventure, Sci-Fi",7.0,334427,1650062,3.026104
6041,Transformers,2007,"Action, Adventure, Sci-Fi",7.0,589274,418279,2.941666
2744,Star Trek VI: The Undiscovered Country,1991,"Action, Adventure, Sci-Fi",7.2,68998,102975,2.929466
1815,Star Trek III: The Search for Spock,1984,"Action, Adventure, Sci-Fi",6.7,73295,88170,2.90918
7734,The Amazing Spider-Man 2,2014,"Action, Adventure, Sci-Fi",6.6,409370,1872181,2.549753


In [66]:
hybrid(500, 'Star Trek')

Unnamed: 0,original_title,year,genre,avg_vote,votes,imdb_title_id,est
1640,Star Trek II: The Wrath of Khan,1982,"Action, Adventure, Sci-Fi",7.7,110769,84726,3.555968
7304,Star Trek Into Darkness,2013,"Action, Adventure, Sci-Fi",7.7,458568,1408101,3.546248
8064,Star Wars: Episode VII - The Force Awakens,2015,"Action, Adventure, Sci-Fi",7.9,845102,2488496,3.515879
5416,Mission: Impossible III,2006,"Action, Adventure, Thriller",6.9,324310,317919,3.387793
2744,Star Trek VI: The Undiscovered Country,1991,"Action, Adventure, Sci-Fi",7.2,68998,102975,3.313709
7565,Super 8,2011,"Action, Adventure, Sci-Fi",7.0,334427,1650062,3.259024
6041,Transformers,2007,"Action, Adventure, Sci-Fi",7.0,589274,418279,3.177278
8117,Star Trek Beyond,2016,"Action, Adventure, Sci-Fi",7.1,223293,2660888,3.111857
1815,Star Trek III: The Search for Spock,1984,"Action, Adventure, Sci-Fi",6.7,73295,88170,2.958028
7734,The Amazing Spider-Man 2,2014,"Action, Adventure, Sci-Fi",6.6,409370,1872181,2.677312
