In [58]:
import pandas as pd
import scipy
import numpy as np
import re
import nltk

from scipy import sparse
from sklearn.metrics.pairwise import cosine_similarity
from surprise import KNNWithMeans, Reader, Dataset
from surprise.model_selection import GridSearchCV

# COLLAB FILTERING USING KNN

In [53]:
ratings = pd.read_csv("userDatas.csv")
movies = pd.read_csv("netflix_titles.csv")
ratings = pd.merge(movies, ratings).drop(['date_added', 'release_year', 'rating', 'duration'], axis=1)
ratings.head()

Unnamed: 0,show_id,type,title,director,cast,country,listed_in,description,user_id,fav_genre,fav_cast,user_rating
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,Documentaries,"As her father nears the end of his life, filmm...",u5,Documentaries,,2
1,s33,TV Show,Sex Education,,"Asa Butterfield, Gillian Anderson, Ncuti Gatwa...",United Kingdom,"British TV Shows, International TV Shows, TV C...",Insecure Otis has all the answers when it come...,u9,Comedies,"Joel Courtney,Stephen Jennings",5
2,s46,Movie,My Heroes Were Cowboys,Tyler Greco,,,Documentaries,Robin Wiltshire's painful childhood was rescue...,u5,Documentaries,,5
3,s47,Movie,Safe House,Daniel Espinosa,"Denzel Washington, Ryan Reynolds, Vera Farmiga...","South Africa, United States, Japan",Action & Adventure,Young CIA operative Matt Weston must get a dan...,u3,Action & Adventure,"Chris Hemsworth,Rain,Matt Damon,Jason Statham,...",3
4,s83,TV Show,Lucifer,,"Tom Ellis, Lauren German, Kevin Alejandro, D.B...",United States,"Crime TV Shows, TV Comedies, TV Dramas","Bored with being the Lord of Hell, the devil r...",u9,Comedies,"Joel Courtney,Stephen Jennings",5


In [54]:
ratings['cast'] = ratings['cast'].fillna(' ')
ratings['fav_cast'] = ratings['fav_cast'].fillna(' ')
ratings['director'] = ratings['director'].fillna(' ')
ratings['country'] = ratings['country'].fillna(' ')

In [59]:
reader = Reader(rating_scale=(0, 5), line_format = 'user item rating')
gs_data = Dataset.load_from_df(ratings[['user_id', 'title', 'user_rating']], reader)

In [60]:
sim_options = {
    "name": ["cosine", "pearson"],
    "min_support": [3, 4, 5, 6],
}

param_grid = {
    "k": [10, 20, 30],
    "min_k": [1, 2, 3],
    "sim_options": sim_options
}

gs = GridSearchCV(KNNWithMeans, param_grid, measures=["rmse", "mae"], cv=10)
gs_res = gs.fit(gs_data)

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing th

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing th

In [61]:
print(gs.best_params["rmse"])
print('rmse:', gs.best_score["rmse"])

{'k': 10, 'min_k': 1, 'sim_options': {'name': 'cosine', 'min_support': 3, 'user_based': True}}
rmse: 1.069912417470864


In [64]:
new_user1 = [("ONE PIECE",5),("Sword Art Online",2),("Sex Education",4),("Hunter X Hunter (2011)",5),("Attack on Titan",4)]
new_ratings = ratings[['user_id', 'title', 'user_rating']]

for movie,rating in new_user1:
    new_ratings = new_ratings.append({'user_id': 'x1', 'title': movie, 'user_rating': rating}, ignore_index=True)
    
data = Dataset.load_from_df(new_ratings, reader)
trainingSet = data.build_full_trainset()

# To use parms from GridSearchCV
sim_options = {
    "name": "cosine",
    "min_support": 3,
    "user_based": True
}

algo = KNNWithMeans(sim_options=sim_options, k=10, min_k=1)
predictions = algo.fit(trainingSet)

pred_movies = []
for title in ratings['title']:
    if title in list(map(lambda n: n[0], new_user1)):
        continue
    pred = algo.predict('x1', title)
    pred_movies.append(pred)

pred_analysis = pd.DataFrame(pred_movies).sort_values('est', ascending=False)[['iid', 'est']]
pred_analysis.drop_duplicates(subset=["iid"]).head(15)

Computing the cosine similarity matrix...
Done computing similarity matrix.


Unnamed: 0,iid,est
15,Record of Ragnarok,5.0
52,The Devil Is a Part-Timer!,5.0
41,Death Note,5.0
20,Durarara!!,4.181818
0,Dick Johnson Is Dead,4.0
31,Cops and Robbers,4.0
32,Teen Mom 2,4.0
33,60 Days In,4.0
34,The Impossible,4.0
35,The American Barbecue Showdown,4.0


# USER-USER COLLAB FILTERING USING GENRE

## MOVIE LENS

### Retrieving Data from CSV

In [2]:
ratings = pd.read_csv('ratings.csv')
movies = pd.read_csv('movies.csv')
ratings = pd.merge(movies, ratings)

ratings.drop('timestamp', axis = 1, inplace=True)
ratings.head()

Unnamed: 0,movieId,title,genres,userId,rating
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1,4.0
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5,4.0
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,7,4.5
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,15,2.5
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,17,4.5


## Pre-processing of data

#### Filling non value data as 0

In [3]:
ratings['rating'] = ratings['rating'].fillna('0')
ratings.head()

Unnamed: 0,movieId,title,genres,userId,rating
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1,4.0
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5,4.0
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,7,4.5
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,15,2.5
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,17,4.5


#### Converting strings into lower case, removing of stop words and splitting of words

In [4]:
ratings['clean_genre'] = ratings['genres'].str.lower()
ratings['clean_genre'] = ratings['clean_genre'].apply(lambda x: re.sub(' ', '', x))
ratings['clean_genre'] = ratings['clean_genre'].apply(lambda x: re.sub('\s+', ' ', x))
ratings['clean_genre'] = ratings['clean_genre'].apply(lambda x: x.split('|'))
ratings

Unnamed: 0,movieId,title,genres,userId,rating,clean_genre
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1,4.0,"[adventure, animation, children, comedy, fantasy]"
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5,4.0,"[adventure, animation, children, comedy, fantasy]"
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,7,4.5,"[adventure, animation, children, comedy, fantasy]"
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,15,2.5,"[adventure, animation, children, comedy, fantasy]"
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,17,4.5,"[adventure, animation, children, comedy, fantasy]"
...,...,...,...,...,...,...
100831,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy,184,4.0,"[action, animation, comedy, fantasy]"
100832,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy,184,3.5,"[animation, comedy, fantasy]"
100833,193585,Flint (2017),Drama,184,3.5,[drama]
100834,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation,184,3.5,"[action, animation]"


In [5]:
genre_ratings = ratings[['title', 'userId', 'rating', 'clean_genre']].copy()
genre_ratings.head()

Unnamed: 0,title,userId,rating,clean_genre
0,Toy Story (1995),1,4.0,"[adventure, animation, children, comedy, fantasy]"
1,Toy Story (1995),5,4.0,"[adventure, animation, children, comedy, fantasy]"
2,Toy Story (1995),7,4.5,"[adventure, animation, children, comedy, fantasy]"
3,Toy Story (1995),15,2.5,"[adventure, animation, children, comedy, fantasy]"
4,Toy Story (1995),17,4.5,"[adventure, animation, children, comedy, fantasy]"


## Creating functions for recommending movies of similar genres

### Function that searches for genre type movies and returns the dataframe of movies

##### Get the similar genre out of the user input and the database

In [6]:
def get_similar_genre(genre):
    temp = []
    df = pd.DataFrame()
    for i, x in enumerate(genre_ratings['clean_genre']):
        if (all(elem in x for elem in genre)):
            temp.append(i)
                    
    temp = set(temp)
    temp = list(temp)
    df = genre_ratings.iloc[temp]
    return df

##### Print function for list

In [None]:
def printList(list):
    for x in list:
        print(x)

##### Function to get the recommendation for the user based on genre
##### The genre should be inserted in accordance to ranking, meaning index 0 should be the most favourite genre

In [51]:
def get_recommendation(genre, xList):
    df_genre = pd.DataFrame()
    df_genre = get_similar_genre(genre)
    index_list = xList
    
    #Creates a table that shows different user and their ratings to different movie titles
    recc_ratings = df_genre.pivot_table(index=['userId'],columns=['title'],values='rating')
    recc_ratings.fillna(0, inplace=True)
    
    #Sum the total ratings from all users for each movie title
    recc_columns = recc_ratings.sum(axis=0).sort_values(ascending=False)
    recc_columns = pd.DataFrame(recc_columns)
    recc_columns['sum'] = recc_ratings.sum(axis=0).sort_values(ascending=False)
    del recc_columns[recc_columns.columns[0]]
    
    #Count the number of users that have rated the movie
    recc_normalization = df_genre['title'].value_counts()
    recc_normalization = pd.DataFrame(recc_normalization)
    recc_normalization.rename(columns={'title':'count'}, inplace=True)
    
    recc_merge = recc_columns.join(recc_normalization)
    
    #Normalizing + removing count values <= 100
    recc_merge['normalize'] = (recc_merge['sum']/recc_merge['count'])
    recc_merge['count'].dtypes
    recc_merge = recc_merge[(recc_merge['count'] > 30)]
    if (len(recc_merge) < 5):
        get_recommendation(genre[:(len(genre)-1)], index_list)
    else :
        recc_merge.sort_values(by='count', ascending=False).head(10)
        for i in recc_merge.index.values.tolist():
            index_list.append(i)
        #index_list = (recc_merge.index.values.tolist())
        print("Top Recommendations: ")
        
    printList(index_list[:5])
    return index_list[:5]

In [52]:
get_recommendation(["crime", "drama"], [])

Top Recommendations: 
Shawshank Redemption, The (1994)
Pulp Fiction (1994)
Fight Club (1999)
Godfather, The (1972)
Fargo (1996)


['Shawshank Redemption, The (1994)',
 'Pulp Fiction (1994)',
 'Fight Club (1999)',
 'Godfather, The (1972)',
 'Fargo (1996)']

In [9]:
get_recommendation(["children", "action", "adventure", "drama", "horror"], [])

Top Recommendations: 


['Incredibles, The (2004)',
 'Goonies, The (1985)',
 'Kung Fu Panda (2008)',
 'Zootopia (2016)',
 'The Lego Movie (2014)']

#### Checking if the user is satisfied with the recommendation or not

In [10]:
def get_sat(list):
    tempdict = {}
    for i in list:
        selection = input("Are you satisfied with " + i + " recommendation? [Y/N]").lower()
        tempdict[i] = selection
        
    return tempdict

In [11]:
get_sat(get_recommendation(["children", "action"], []))

Top Recommendations: 
Are you satisfied with Incredibles, The (2004) recommendation? [Y/N]Y
Are you satisfied with Goonies, The (1985) recommendation? [Y/N]Y
Are you satisfied with Kung Fu Panda (2008) recommendation? [Y/N]Y
Are you satisfied with Zootopia (2016) recommendation? [Y/N]Y
Are you satisfied with The Lego Movie (2014) recommendation? [Y/N]Y


{'Incredibles, The (2004)': 'y',
 'Goonies, The (1985)': 'y',
 'Kung Fu Panda (2008)': 'y',
 'Zootopia (2016)': 'y',
 'The Lego Movie (2014)': 'y'}

# NETFLIX DATA 

In [12]:
netflix_ratings = pd.read_csv('userDatas.csv')
netflix_movies = pd.read_csv('netflix_titles.csv')
movies = pd.read_csv('movies.csv')
ratings = pd.read_csv('ratings.csv')

#movies

Pre-processing of IMDB movie data

In [13]:
movies.title = movies.title.str.replace('\d+', '')
movies.title = movies.title.str.replace('(', '')
movies.title = movies.title.str.replace(')', '')
movies['title'] = movies['title'].str.lower()
movies['title'] = movies['title'].str.strip()
movies

  movies.title = movies.title.str.replace('\d+', '')
  movies.title = movies.title.str.replace('(', '')
  movies.title = movies.title.str.replace(')', '')


Unnamed: 0,movieId,title,genres
0,1,toy story,Adventure|Animation|Children|Comedy|Fantasy
1,2,jumanji,Adventure|Children|Fantasy
2,3,grumpier old men,Comedy|Romance
3,4,waiting to exhale,Comedy|Drama|Romance
4,5,father of the bride part ii,Comedy
...,...,...,...
9737,193581,black butler: book of the atlantic,Action|Animation|Comedy|Fantasy
9738,193583,no game no life: zero,Animation|Comedy|Fantasy
9739,193585,flint,Drama
9740,193587,bungo stray dogs: dead apple,Action|Animation


Pre-processing of netflix data

In [14]:
netflix_movies['title'] = netflix_movies['title'].str.lower()
netflix_movies['title'] = netflix_movies['title'].apply(lambda x: re.sub(' ', '', x))
netflix_movies

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,dickjohnsonisdead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,blood&water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,jailbirdsneworleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,kotafactory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...
...,...,...,...,...,...,...,...,...,...,...,...,...
8802,s8803,Movie,zodiac,David Fincher,"Mark Ruffalo, Jake Gyllenhaal, Robert Downey J...",United States,"November 20, 2019",2007,R,158 min,"Cult Movies, Dramas, Thrillers","A political cartoonist, a crime reporter and a..."
8803,s8804,TV Show,zombiedumb,,,,"July 1, 2019",2018,TV-Y7,2 Seasons,"Kids' TV, Korean TV Shows, TV Comedies","While living alone in a spooky town, a young g..."
8804,s8805,Movie,zombieland,Ruben Fleischer,"Jesse Eisenberg, Woody Harrelson, Emma Stone, ...",United States,"November 1, 2019",2009,R,88 min,"Comedies, Horror Movies",Looking to survive in a world taken over by zo...
8805,s8806,Movie,zoom,Peter Hewitt,"Tim Allen, Courteney Cox, Chevy Chase, Kate Ma...",United States,"January 11, 2020",2006,PG,88 min,"Children & Family Movies, Comedies","Dragged from civilian life, a former superhero..."


Merge Movie Lens data and Netflix data to retrieve ratings for netflix shows for ratings

In [15]:
overall_movies = movies.merge(netflix_movies, on='title', how='left')
overall_movies

Unnamed: 0,movieId,title,genres,show_id,type,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,1,toy story,Adventure|Animation|Children|Comedy|Fantasy,,,,,,,,,,,
1,2,jumanji,Adventure|Children|Fantasy,,,,,,,,,,,
2,3,grumpier old men,Comedy|Romance,,,,,,,,,,,
3,4,waiting to exhale,Comedy|Drama|Romance,,,,,,,,,,,
4,5,father of the bride part ii,Comedy,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9737,193581,black butler: book of the atlantic,Action|Animation|Comedy|Fantasy,,,,,,,,,,,
9738,193583,no game no life: zero,Animation|Comedy|Fantasy,,,,,,,,,,,
9739,193585,flint,Drama,,,,,,,,,,,
9740,193587,bungo stray dogs: dead apple,Action|Animation,,,,,,,,,,,


In [16]:
overall_movies = overall_movies.dropna()

In [17]:
overall_movies = overall_movies[['movieId', 'title', 'genres']]
overall_movies = overall_movies.merge(ratings)
overall_movies = overall_movies.drop('timestamp', axis='columns')
overall_movies

Unnamed: 0,movieId,title,genres,userId,rating
0,7,sabrina,Comedy|Romance,6,4.0
1,7,sabrina,Comedy|Romance,14,3.0
2,7,sabrina,Comedy|Romance,19,2.0
3,7,sabrina,Comedy|Romance,31,4.0
4,7,sabrina,Comedy|Romance,32,4.0
...,...,...,...,...,...
3575,182823,bright,Action|Crime|Fantasy,212,4.0
3576,182823,bright,Action|Crime|Fantasy,249,3.5
3577,182823,bright,Action|Crime|Fantasy,380,3.0
3578,182823,bright,Action|Crime|Fantasy,567,1.0


Clean and split the genre in the merged dataframe

In [18]:
overall_movies['clean_genre'] = overall_movies['genres'].str.lower()
overall_movies['clean_genre'] = overall_movies['clean_genre'].apply(lambda x: re.sub(' ', '', x))
overall_movies['clean_genre'] = overall_movies['clean_genre'].apply(lambda x: re.sub('\s+', ' ', x))
overall_movies['clean_genre'] = overall_movies['clean_genre'].apply(lambda x: x.split('|'))
overall_movies

Unnamed: 0,movieId,title,genres,userId,rating,clean_genre
0,7,sabrina,Comedy|Romance,6,4.0,"[comedy, romance]"
1,7,sabrina,Comedy|Romance,14,3.0,"[comedy, romance]"
2,7,sabrina,Comedy|Romance,19,2.0,"[comedy, romance]"
3,7,sabrina,Comedy|Romance,31,4.0,"[comedy, romance]"
4,7,sabrina,Comedy|Romance,32,4.0,"[comedy, romance]"
...,...,...,...,...,...,...
3575,182823,bright,Action|Crime|Fantasy,212,4.0,"[action, crime, fantasy]"
3576,182823,bright,Action|Crime|Fantasy,249,3.5,"[action, crime, fantasy]"
3577,182823,bright,Action|Crime|Fantasy,380,3.0,"[action, crime, fantasy]"
3578,182823,bright,Action|Crime|Fantasy,567,1.0,"[action, crime, fantasy]"


#### Functions for the recommendation

In [19]:
def get_similar_netflix_genre(genre):
    temp = []
    df = pd.DataFrame()
    for i, x in enumerate(overall_movies['clean_genre']):
        if (all(elem in x for elem in genre)):
            temp.append(i)

                     
    temp = set(temp)
    temp = list(temp)
    df = overall_movies.iloc[temp]
    return df

In [49]:
def get_recommendation_netflix(genre, yList):
    df_netflix_genre = get_similar_netflix_genre(genre)
    index_list = yList
    
    recc_netflix_ratings = df_netflix_genre.pivot_table(index=['userId'],columns=['title'],values='rating')
    recc_netflix_ratings.fillna(0, inplace=True)
    
    recc_netflix_columns = recc_netflix_ratings.sum(axis=0).sort_values(ascending=False)
    recc_netflix_columns = pd.DataFrame(recc_netflix_columns)
    recc_netflix_columns['sum'] = recc_netflix_ratings.sum(axis=0).sort_values(ascending=False)
    del recc_netflix_columns[recc_netflix_columns.columns[0]]
    
    recc_netflix_normalization = df_netflix_genre['title'].value_counts()
    recc_netflix_normalization = pd.DataFrame(recc_netflix_normalization)
    recc_netflix_normalization.rename(columns={'title':'count'}, inplace=True)
    
    recc_netflix_merge = recc_netflix_columns.join(recc_netflix_normalization)
    
    recc_netflix_merge['normalize'] = (recc_netflix_merge['sum']/recc_netflix_merge['count'])
    recc_netflix_merge['count'].dtypes
    recc_netflix_merge = recc_netflix_merge[(recc_netflix_merge['count'] > 30)]
    
    if (len(recc_netflix_merge) < 5):
        get_recommendation(genre[:(len(genre)-1)], index_list)
    else :
        recc_netflix_merge.sort_values(by='count', ascending=False).head(10)
        for i in recc_netflix_merge.index.values.tolist():
            index_list.append(i)
            
        print("Top Recommendations: ")
        
    printList(index_list[:5])
    return index_list[:5]

In [50]:
get_recommendation_netflix(["action", "thriller"], [])

Top Recommendations: 
inception
goldeneye
cliffhanger
equilibrium
godzilla


['inception', 'goldeneye', 'cliffhanger', 'equilibrium', 'godzilla']

In [22]:
def get_sat_netflix(list):
    tempdict = {}
    for i in list:
        selection = input("Are you satisfied with " + i + " recommendation? [Y/N]").lower()
        tempdict[i] = selection
        
    return tempdict

In [23]:
get_sat_netflix(get_recommendation_netflix(["children", "adventure"], []))

Top Recommendations: 
Are you satisfied with Toy Story (1995) recommendation? [Y/N]Y
Are you satisfied with Aladdin (1992) recommendation? [Y/N]Y
Are you satisfied with Lion King, The (1994) recommendation? [Y/N]Y
Are you satisfied with Shrek (2001) recommendation? [Y/N]Y
Are you satisfied with Finding Nemo (2003) recommendation? [Y/N]Y


{'Toy Story (1995)': 'y',
 'Aladdin (1992)': 'y',
 'Lion King, The (1994)': 'y',
 'Shrek (2001)': 'y',
 'Finding Nemo (2003)': 'y'}

# USER-ITEM Collaborative Filtering Recommendation System

In [30]:
ratings = pd.read_csv("userDatas.csv")
netflixData = pd.read_csv("netflix_titles.csv")
ratings = ratings.merge(netflixData)

In [31]:
ratings['cast'] = ratings['cast'].fillna(' ')
ratings['fav_cast'] = ratings['fav_cast'].fillna(' ')
ratings['director'] = ratings['director'].fillna(' ')
ratings['country'] = ratings['country'].fillna(' ')

ratings.head(61)

Unnamed: 0,user_id,fav_genre,show_id,fav_cast,user_rating,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,u1,"Action & Adventure,Scifi & Fantasy",s144,"Tobey Maguire,James Franco",3,Movie,Green Lantern,Martin Campbell,"Ryan Reynolds, Blake Lively, Peter Sarsgaard, ...",United States,"September 1, 2021",2011,PG-13,114 min,"Action & Adventure, Sci-Fi & Fantasy",Test pilot Hal Jordan harnesses glowing new po...
1,u1,"Action & Adventure,Scifi & Fantasy",s6201,"Tobey Maguire,James Franco",5,Movie,Avengers: Infinity War,"Anthony Russo, Joe Russo","Robert Downey Jr., Josh Brolin, Mark Ruffalo, ...",United States,"December 25, 2018",2018,PG-13,150 min,"Action & Adventure, Sci-Fi & Fantasy",Superheroes amass to stop intergalactic sociop...
2,u3,Action & Adventure,s6201,"Chris Hemsworth,Rain,Matt Damon,Jason Statham,...",5,Movie,Avengers: Infinity War,"Anthony Russo, Joe Russo","Robert Downey Jr., Josh Brolin, Mark Ruffalo, ...",United States,"December 25, 2018",2018,PG-13,150 min,"Action & Adventure, Sci-Fi & Fantasy",Superheroes amass to stop intergalactic sociop...
3,u1,"Action & Adventure,Scifi & Fantasy",s8068,"Tobey Maguire,James Franco",4,Movie,Spider-Man 3,Sam Raimi,"Tobey Maguire, Kirsten Dunst, James Franco, Th...",United States,"November 1, 2019",2007,PG-13,139 min,"Action & Adventure, Sci-Fi & Fantasy",The seemingly invincible Spider-Man goes up ag...
4,u1,"Action & Adventure,Scifi & Fantasy",s8069,"Tobey Maguire,James Franco",5,Movie,Spider-Man: Into the Spider-Verse,"Peter Ramsey, Rodney Rothman, Bob Persichetti","Shameik Moore, Jake Johnson, Hailee Steinfeld,...",United States,"June 26, 2019",2018,PG,117 min,"Action & Adventure, Comedies","After being bitten by a radioactive spider, Br..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56,u10,Dramas,s113,"Naomi Watts,Miachel Keaton,Zaria,Amy Ryan",5,Movie,Worth,Sara Colangelo,"Michael Keaton, Stanley Tucci, Amy Ryan, Shuno...",,"September 3, 2021",2021,PG-13,119 min,Dramas,"In the wake of the Sept. 11 attacks, a lawyer ..."
57,u10,Dramas,s1090,"Naomi Watts,Miachel Keaton,Zaria,Amy Ryan",3,Movie,Two Distant Strangers,"Travon Free, Martin Desmond Roe","Joey Bada$$, Andrew Howard, Zaria",United States,"April 9, 2021",2021,TV-MA,32 min,Dramas,"In this Oscar-nominated short film, a man tryi..."
58,u10,Dramas,s1485,"Naomi Watts,Miachel Keaton,Zaria,Amy Ryan",2,Movie,Cops and Robbers,"Arnon Manor, Timothy Ware-Hill",Timothy Ware-Hill,United States,"December 28, 2020",2020,PG-13,8 min,Dramas,Animation and activism unite in this multimedi...
59,u10,Dramas,s1436,"Naomi Watts,Miachel Keaton,Zaria,Amy Ryan",5,Movie,Pieces of a Woman,KornÃ©l MundruczÃ³,"Vanessa Kirby, Shia LaBeouf, Ellen Burstyn, Mo...","Canada, Hungary, United States","January 7, 2021",2020,R,128 min,Dramas,A heartbreaking home birth leaves a woman grap...


#### Create a table that shows user's rating of different netflix titles

In [32]:
userRatings = ratings.pivot_table(index=['user_id'],columns=['title'],values='user_rating')
print("Before: ",userRatings.shape)
userRatings.fillna(0, inplace=True)
print("After: ",userRatings.shape)
userRatings.head(10)

Before:  (10, 58)
After:  (10, 58)


title,60 Days In,Akame ga Kill!,Attack on Titan,Avengers: Infinity War,Blade Runner: The Final Cut,Bling Empire,"Bob Ross: Happy Accidents, Betrayal & Greed",Cops and Robbers,Cosmic Sin,Creating an Army of the Dead,...,The Kissing Booth 3,The Secret Life of Pets 2,The Seven Deadly Sins,The Seventh Day,The Show Must Go On: The Queen + Adam Lambert Story,Tinker Bell and the Legend of the NeverBeast,Truth or Dare,Two Distant Strangers,Why Did You Kill Me?,Worth
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
u1,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
u10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,5.0
u2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,3.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0
u3,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,4.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
u4,0.0,2.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
u5,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0
u6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,4.0,0.0,0.0,5.0,0.0,0.0,0.0
u7,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
u8,2.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
u9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Movie Correlation data using pearson correlations

In [33]:
corrMatrix = userRatings.corr(method='pearson')
corrMatrix.head(10)

title,60 Days In,Akame ga Kill!,Attack on Titan,Avengers: Infinity War,Blade Runner: The Final Cut,Bling Empire,"Bob Ross: Happy Accidents, Betrayal & Greed",Cops and Robbers,Cosmic Sin,Creating an Army of the Dead,...,The Kissing Booth 3,The Secret Life of Pets 2,The Seven Deadly Sins,The Seventh Day,The Show Must Go On: The Queen + Adam Lambert Story,Tinker Bell and the Legend of the NeverBeast,Truth or Dare,Two Distant Strangers,Why Did You Kill Me?,Worth
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
60 Days In,1.0,-0.111111,-0.111111,-0.166667,-0.111111,1.0,-0.111111,-0.111111,-0.111111,-0.111111,...,-0.111111,-0.111111,-0.111111,-0.111111,1.0,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111
Akame ga Kill!,-0.111111,1.0,1.0,-0.166667,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,...,-0.111111,-0.111111,1.0,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111
Attack on Titan,-0.111111,1.0,1.0,-0.166667,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,...,-0.111111,-0.111111,1.0,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111
Avengers: Infinity War,-0.166667,-0.166667,-0.166667,1.0,-0.166667,-0.166667,-0.166667,-0.166667,0.666667,-0.166667,...,-0.166667,-0.166667,-0.166667,-0.166667,-0.166667,-0.166667,-0.166667,-0.166667,-0.166667,-0.166667
Blade Runner: The Final Cut,-0.111111,-0.111111,-0.111111,-0.166667,1.0,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,...,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111
Bling Empire,1.0,-0.111111,-0.111111,-0.166667,-0.111111,1.0,-0.111111,-0.111111,-0.111111,-0.111111,...,-0.111111,-0.111111,-0.111111,-0.111111,1.0,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111
"Bob Ross: Happy Accidents, Betrayal & Greed",-0.111111,-0.111111,-0.111111,-0.166667,-0.111111,-0.111111,1.0,-0.111111,-0.111111,1.0,...,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,1.0,-0.111111
Cops and Robbers,-0.111111,-0.111111,-0.111111,-0.166667,-0.111111,-0.111111,-0.111111,1.0,-0.111111,-0.111111,...,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,1.0,-0.111111,1.0
Cosmic Sin,-0.111111,-0.111111,-0.111111,0.666667,-0.111111,-0.111111,-0.111111,-0.111111,1.0,-0.111111,...,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111
Creating an Army of the Dead,-0.111111,-0.111111,-0.111111,-0.166667,-0.111111,-0.111111,1.0,-0.111111,-0.111111,1.0,...,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,1.0,-0.111111


In [34]:
def standardize(row):
    new_row = (row - row.mean())/(row.max()-row.min())
    return new_row

df_temp = userRatings
df_std = df_temp.apply(standardize)

user_similarity = cosine_similarity(df_std.T)
user_similarity

array([[ 1.        , -0.11111111, -0.11111111, ..., -0.11111111,
        -0.11111111, -0.11111111],
       [-0.11111111,  1.        ,  1.        , ..., -0.11111111,
        -0.11111111, -0.11111111],
       [-0.11111111,  1.        ,  1.        , ..., -0.11111111,
        -0.11111111, -0.11111111],
       ...,
       [-0.11111111, -0.11111111, -0.11111111, ...,  1.        ,
        -0.11111111,  1.        ],
       [-0.11111111, -0.11111111, -0.11111111, ..., -0.11111111,
         1.        , -0.11111111],
       [-0.11111111, -0.11111111, -0.11111111, ...,  1.        ,
        -0.11111111,  1.        ]])

In [35]:
from sklearn.metrics.pairwise import euclidean_distances
euclidean_dist = euclidean_distances(df_std.T)
euclidean_dist

array([[0.        , 1.41421356, 1.41421356, ..., 1.41421356, 1.41421356,
        1.41421356],
       [1.41421356, 0.        , 0.        , ..., 1.41421356, 1.41421356,
        1.41421356],
       [1.41421356, 0.        , 0.        , ..., 1.41421356, 1.41421356,
        1.41421356],
       ...,
       [1.41421356, 1.41421356, 1.41421356, ..., 0.        , 1.41421356,
        0.        ],
       [1.41421356, 1.41421356, 1.41421356, ..., 1.41421356, 0.        ,
        1.41421356],
       [1.41421356, 1.41421356, 1.41421356, ..., 0.        , 1.41421356,
        0.        ]])

In [36]:
user_similarity_df = pd.DataFrame(user_similarity,index=userRatings.columns,columns=userRatings.columns)
user_similarity_df

title,60 Days In,Akame ga Kill!,Attack on Titan,Avengers: Infinity War,Blade Runner: The Final Cut,Bling Empire,"Bob Ross: Happy Accidents, Betrayal & Greed",Cops and Robbers,Cosmic Sin,Creating an Army of the Dead,...,The Kissing Booth 3,The Secret Life of Pets 2,The Seven Deadly Sins,The Seventh Day,The Show Must Go On: The Queen + Adam Lambert Story,Tinker Bell and the Legend of the NeverBeast,Truth or Dare,Two Distant Strangers,Why Did You Kill Me?,Worth
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
60 Days In,1.0,-0.111111,-0.111111,-0.166667,-0.111111,1.0,-0.111111,-0.111111,-0.111111,-0.111111,...,-0.111111,-0.111111,-0.111111,-0.111111,1.0,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111
Akame ga Kill!,-0.111111,1.0,1.0,-0.166667,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,...,-0.111111,-0.111111,1.0,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111
Attack on Titan,-0.111111,1.0,1.0,-0.166667,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,...,-0.111111,-0.111111,1.0,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111
Avengers: Infinity War,-0.166667,-0.166667,-0.166667,1.0,-0.166667,-0.166667,-0.166667,-0.166667,0.666667,-0.166667,...,-0.166667,-0.166667,-0.166667,-0.166667,-0.166667,-0.166667,-0.166667,-0.166667,-0.166667,-0.166667
Blade Runner: The Final Cut,-0.111111,-0.111111,-0.111111,-0.166667,1.0,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,...,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111
Bling Empire,1.0,-0.111111,-0.111111,-0.166667,-0.111111,1.0,-0.111111,-0.111111,-0.111111,-0.111111,...,-0.111111,-0.111111,-0.111111,-0.111111,1.0,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111
"Bob Ross: Happy Accidents, Betrayal & Greed",-0.111111,-0.111111,-0.111111,-0.166667,-0.111111,-0.111111,1.0,-0.111111,-0.111111,1.0,...,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,1.0,-0.111111
Cops and Robbers,-0.111111,-0.111111,-0.111111,-0.166667,-0.111111,-0.111111,-0.111111,1.0,-0.111111,-0.111111,...,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,1.0,-0.111111,1.0
Cosmic Sin,-0.111111,-0.111111,-0.111111,0.666667,-0.111111,-0.111111,-0.111111,-0.111111,1.0,-0.111111,...,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111
Creating an Army of the Dead,-0.111111,-0.111111,-0.111111,-0.166667,-0.111111,-0.111111,1.0,-0.111111,-0.111111,1.0,...,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,1.0,-0.111111


In [37]:
def get_similar_cosine(movie_name, user_rating):
    similar_score = user_similarity_df[movie_name]
    similar_score = similar_score.sort_values(ascending=False)
    return similar_score

In [38]:
new_user1 = [("ONE PIECE",5),("Sword Art Online",2),("Sex Education",4),("Hunter X Hunter (2011)",5),("Attack on Titan",4)]

similar_movies = pd.DataFrame()
for movie,rating in new_user1:
    similar_movies = similar_movies.append(get_similar_cosine(movie,rating),ignore_index = True)

for (x,_) in new_user1:
    del similar_movies[x]
similar_movies.head()

Unnamed: 0,The Devil Is a Part-Timer!,DOTA: Dragon's Blood,The Seven Deadly Sins,Akame ga Kill!,Durarara!!,Death Note,Record of Ragnarok,The Karate Kid Part II,The Karate Kid,Blade Runner: The Final Cut,...,Spider-Man 3,Rogue Warfare: Death of a Nation,Pieces of a Woman,Ninja Assassin,Creating an Army of the Dead,Cosmic Sin,Cops and Robbers,Worth,Jiu Jitsu,Avengers: Infinity War
0,1.0,1.0,1.0,1.0,1.0,1.0,0.666667,-0.111111,-0.111111,-0.111111,...,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.164581,-0.166667
1,1.0,1.0,1.0,1.0,1.0,1.0,0.666667,-0.111111,-0.111111,-0.111111,...,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.164581,-0.166667
2,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.166667,-0.111111,-0.111111,-0.111111,...,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.164581,-0.166667
3,1.0,1.0,1.0,1.0,1.0,1.0,0.666667,-0.111111,-0.111111,-0.111111,...,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.164581,-0.166667
4,1.0,1.0,1.0,1.0,1.0,1.0,0.666667,-0.111111,-0.111111,-0.111111,...,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.164581,-0.166667


Top 10 netflix recommendations

In [39]:
similar_movies.sum().sort_values(ascending=False).head(10)

The Devil Is a Part-Timer!    3.888889
Durarara!!                    3.888889
Death Note                    3.888889
DOTA: Dragon's Blood          3.888889
Akame ga Kill!                3.888889
The Seven Deadly Sins         3.888889
Record of Ragnarok            2.500000
The Kissing Booth 3           0.555556
Lucifer                       0.555556
Friends                       0.555556
dtype: float64

In [40]:
corrMatrix = userRatings.corr(method='pearson')
corrMatrix.head(10)

title,60 Days In,Akame ga Kill!,Attack on Titan,Avengers: Infinity War,Blade Runner: The Final Cut,Bling Empire,"Bob Ross: Happy Accidents, Betrayal & Greed",Cops and Robbers,Cosmic Sin,Creating an Army of the Dead,...,The Kissing Booth 3,The Secret Life of Pets 2,The Seven Deadly Sins,The Seventh Day,The Show Must Go On: The Queen + Adam Lambert Story,Tinker Bell and the Legend of the NeverBeast,Truth or Dare,Two Distant Strangers,Why Did You Kill Me?,Worth
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
60 Days In,1.0,-0.111111,-0.111111,-0.166667,-0.111111,1.0,-0.111111,-0.111111,-0.111111,-0.111111,...,-0.111111,-0.111111,-0.111111,-0.111111,1.0,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111
Akame ga Kill!,-0.111111,1.0,1.0,-0.166667,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,...,-0.111111,-0.111111,1.0,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111
Attack on Titan,-0.111111,1.0,1.0,-0.166667,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,...,-0.111111,-0.111111,1.0,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111
Avengers: Infinity War,-0.166667,-0.166667,-0.166667,1.0,-0.166667,-0.166667,-0.166667,-0.166667,0.666667,-0.166667,...,-0.166667,-0.166667,-0.166667,-0.166667,-0.166667,-0.166667,-0.166667,-0.166667,-0.166667,-0.166667
Blade Runner: The Final Cut,-0.111111,-0.111111,-0.111111,-0.166667,1.0,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,...,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111
Bling Empire,1.0,-0.111111,-0.111111,-0.166667,-0.111111,1.0,-0.111111,-0.111111,-0.111111,-0.111111,...,-0.111111,-0.111111,-0.111111,-0.111111,1.0,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111
"Bob Ross: Happy Accidents, Betrayal & Greed",-0.111111,-0.111111,-0.111111,-0.166667,-0.111111,-0.111111,1.0,-0.111111,-0.111111,1.0,...,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,1.0,-0.111111
Cops and Robbers,-0.111111,-0.111111,-0.111111,-0.166667,-0.111111,-0.111111,-0.111111,1.0,-0.111111,-0.111111,...,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,1.0,-0.111111,1.0
Cosmic Sin,-0.111111,-0.111111,-0.111111,0.666667,-0.111111,-0.111111,-0.111111,-0.111111,1.0,-0.111111,...,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111
Creating an Army of the Dead,-0.111111,-0.111111,-0.111111,-0.166667,-0.111111,-0.111111,1.0,-0.111111,-0.111111,1.0,...,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,-0.111111,1.0,-0.111111


In [41]:
def get_similar_pearson(movie_name, user_rating):
    similar_score = corrMatrix[movie_name]*(user_rating-2.5)
    similar_score = similar_score.sort_values(ascending=False)
    return similar_score

In [42]:
new_user1 = [("ONE PIECE",5),("Sword Art Online",2),("Sex Education",4),("Hunter X Hunter (2011)",5),("Attack on Titan",4)]
similar_movies = pd.DataFrame()
for movie,rating in new_user1:
    similar_movies = similar_movies.append(get_similar_pearson(movie,rating),ignore_index = True)

for (x,_) in new_user1:
    del similar_movies[x]
similar_movies.head()

Unnamed: 0,Akame ga Kill!,Durarara!!,Death Note,The Devil Is a Part-Timer!,The Seven Deadly Sins,DOTA: Dragon's Blood,Record of Ragnarok,The Impossible,Tarzan,Friends,...,Creating an Army of the Dead,The Seventh Day,Truth or Dare,Silent Hill: Revelation,The Final Destination,The Conjuring,Scream 2,Seaspiracy,Jiu Jitsu,Avengers: Infinity War
0,2.5,2.5,2.5,2.5,2.5,2.5,1.666667,-0.277778,-0.277778,-0.277778,...,-0.277778,-0.277778,-0.277778,-0.277778,-0.277778,-0.277778,-0.277778,-0.277778,-0.411452,-0.416667
1,-0.5,-0.5,-0.5,-0.5,-0.5,-0.5,-0.333333,0.055556,0.055556,0.055556,...,0.055556,0.055556,0.055556,0.055556,0.055556,0.055556,0.055556,0.055556,0.08229,0.083333
2,-0.166667,-0.166667,-0.166667,-0.166667,-0.166667,-0.166667,-0.25,-0.166667,-0.166667,1.5,...,-0.166667,-0.166667,-0.166667,-0.166667,-0.166667,-0.166667,-0.166667,-0.166667,-0.246871,-0.25
3,2.5,2.5,2.5,2.5,2.5,2.5,1.666667,-0.277778,-0.277778,-0.277778,...,-0.277778,-0.277778,-0.277778,-0.277778,-0.277778,-0.277778,-0.277778,-0.277778,-0.411452,-0.416667
4,1.5,1.5,1.5,1.5,1.5,1.5,1.0,-0.166667,-0.166667,-0.166667,...,-0.166667,-0.166667,-0.166667,-0.166667,-0.166667,-0.166667,-0.166667,-0.166667,-0.246871,-0.25


In [43]:
similar_movies.sum().sort_values(ascending=False).head(10)

Akame ga Kill!                5.833333
The Devil Is a Part-Timer!    5.833333
Durarara!!                    5.833333
Death Note                    5.833333
DOTA: Dragon's Blood          5.833333
The Seven Deadly Sins         5.833333
Record of Ragnarok            3.750000
The Kissing Booth 3           0.833333
Friends                       0.833333
Lucifer                       0.833333
dtype: float64

In [44]:
# action_lover = [("The Amazing Spider-Man (2012)",5),("Mission: Impossible III (2006)",4),("Toy Story 3 (2010)",2),("2 Fast 2 Furious (Fast and the Furious 2, The) (2003)",4)]
# similar_movies = pd.DataFrame()
# for movie,rating in action_lover:
#     similar_movies = similar_movies.append(get_similar_pearson(movie,rating),ignore_index = True)

# similar_movies.head(10)
# similar_movies.sum().sort_values(ascending=False).head(20)

#### Evalaution Metric : RMSE ( Root Mean Squared Error )

In [45]:
# Function to predict ratings
def predict(ratings, similarity, type='user'):
    if type == 'user':
        mean_user_rating = ratings.mean(axis=1)
        # Use np.newaxis so that mean_user_rating has same format as ratings
        ratings_diff = (ratings - mean_user_rating[:, np.newaxis])
        pred = mean_user_rating[:, np.newaxis] + similarity.dot(ratings_diff) / np.array([np.abs(similarity).sum(axis=1)]).T
    elif type == 'item':
        pred = ratings.dot(similarity) / np.array([np.abs(similarity).sum(axis=1)])
    return pred

In [46]:
from sklearn.metrics import mean_squared_error
from math import sqrt

# Function to calculate RMSE
def rmse(pred, actual):
    return sqrt(mean_squared_error(pred, actual))

# Predict ratings on the training data with both similarity score
user_prediction = predict(euclidean_dist, corrMatrix, type='user')
item_prediction = predict(euclidean_dist, corrMatrix, type='item')
# RMSE on the train data
print('User-based CF RMSE: ' + str(rmse(user_prediction, corrMatrix)))
print('Item-based CF RMSE: ' + str(rmse(item_prediction, corrMatrix)))

User-based CF RMSE: 1.4075937705410637
Item-based CF RMSE: 0.6234156009311755
