In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [15]:
movie_dataset=pd.read_csv('./dataset/movies.xls',sep=';',encoding='latin-1').drop('Unnamed: 3',axis=1)
print("Movie Dataset shape:", movie_dataset.shape)
movie_dataset.head()

Movie Dataset shape: (3883, 3)


Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


In [20]:
ratings_dataset=pd.read_csv('./dataset/ratings.csv',sep=';')
print("Ratings Dataset shape:", ratings_dataset.shape)
ratings_dataset.head()

Ratings Dataset shape: (1000209, 4)


Unnamed: 0,userId,movieId,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [21]:
user_dataset=pd.read_csv('./dataset/users.xls',sep=';',encoding='latin-1')
print("User Dataset shape:", user_dataset.shape)
user_dataset.head()

User Dataset shape: (6040, 5)


Unnamed: 0,userId,gender,age,occupation,zip-code
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117
3,4,M,45,7,2460
4,5,M,25,20,55455


Collaborative Filtering : It is considered to be one of the very smart recommender systems that work on the similarity between different users and also items that are widely used as an e-commerce website and also online movie websites. It checks about the taste of similar users and does recommendations.
# ![title](collb.png)

create pivot table

In [22]:
rating_pivot=ratings_dataset.pivot_table(index=['movieId'],columns=['userId'],values='rating').fillna(0)
print("Rating Pivot shape:", rating_pivot.shape)
rating_pivot.head()

Rating Pivot shape: (3706, 6040)


userId,1,2,3,4,5,6,7,8,9,10,...,6031,6032,6033,6034,6035,6036,6037,6038,6039,6040
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,5.0,5.0,...,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,3.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0



Machine Learning Model training for Recommending movies based on users ratings.


In [23]:
from sklearn.neighbors import NearestNeighbors
model=NearestNeighbors(metric='cosine')
model.fit(rating_pivot)


Developing the class of Collaborative filtering Recommendation Engine¶


In [24]:
class Recommender:
    def __init__(self):
        self.history=[]
        self.isHistory=False
    def recommend_on_movie(self, movie_name, n_recommend=5):
        self.isHistory=True
        movie_id=int(movie_dataset[movie_dataset['title']==movie_name]['movieId'])
        self.history.append(movie_id)
        distance,neighbors=model.kneighbors([rating_pivot.loc[movie_id]],n_neighbors=n_recommend+1)
        movieids = [rating_pivot.iloc[i].name for i in neighbors[0]]
        recommeds = [str(movie_dataset[movie_dataset['movieId']==mid]['title']).split('\n')[0].split('  ')[-1] for mid in movieids if mid not in [movie_id]]
        return recommeds[:n_recommend]
    def recommend_on_history(self,n_reccomend = 5):
        if self.isHistory == False:
            return print('No history found')
        history = np.array([list(rating_pivot.loc[mid]) for mid in self.history])
        distance,neighbors = model.kneighbors([np.average(history,axis=0)],n_neighbors=n_reccomend + len(self.history))
        movieids = [rating_pivot.iloc[i].name for i in neighbors[0]]
        recommeds = [str(movie_dataset[movie_dataset['movieId']==mid]['title']).split('\n')[0].split('  ')[-1] for mid in movieids if mid not in self.history]
        return recommeds[:n_reccomend]

In [25]:
recommender=Recommender()

In [26]:
recommender.recommend_on_history()

No history found


In [27]:
recommender.recommend_on_movie('Father of the Bride Part II (1995)')

['Home Alone (1990)',
 'Home Alone 2',
 'Mighty Ducks, The (1992)',
 'Mrs. Doubtfire (1993)',
 'Liar Liar (1997)']

In [28]:
recommender.recommend_on_history()

['Home Alone (1990)',
 'Home Alone 2',
 'Mighty Ducks, The (1992)',
 'Mrs. Doubtfire (1993)',
 'Liar Liar (1997)']

In [29]:
recommender.recommend_on_movie('Tigerland (2000)')

['Requiem for a Dream (2000)',
 'Yards, The (1999)',
 'Steal This Movie! (2000)',
 'Contender, The (2000)',
 'Dancer in the Dark (2000)']

In [30]:
recommender.recommend_on_history()

['Home Alone 2',
 'Home Alone (1990)',
 'Mrs. Doubtfire (1993)',
 'Liar Liar (1997)',
 'Mighty Ducks, The (1992)']

Recommendation System using Content Based Filtering
# # ![title](conn.png)

In [41]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer(stop_words='english')
genres = vectorizer.fit_transform(movie_dataset.genres).toarray()
contents = pd.DataFrame(genres,columns=vectorizer.get_feature_names_out())
print('Shape of the content table :',contents.shape)
contents.head()


Shape of the content table : (3883, 347)


Unnamed: 0,1919,1956,1963,1968,1974,1977,1978,1979,1980,1981,...,wight,willowbrook,witch,worrying,wrath,years,yellow,yes,york,yu
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [42]:
model2=NearestNeighbors(metric='cosine')
model2.fit(contents)

In [43]:
class Recommender2:
    def __init__(self):
        # This list will stored movies that called atleast ones using recommend_on_movie method
        self.hist = [] 
        self.ishist = False # Check if history is empty
    
    # This method will recommend movies based on a movie that passed as the parameter
    def recommend_on_movie(self,movie,n_reccomend = 5):
        self.ishist = True
        iloc = movie_dataset[movie_dataset['title']==movie].index[0]
        self.hist.append(iloc)
        distance,neighbors = model2.kneighbors([contents.iloc[iloc]],n_neighbors=n_reccomend+1)
        recommeds = [movie_dataset.iloc[i]['title'] for i in neighbors[0] if i not in [iloc]]
        return recommeds[:n_reccomend]
    
    # This method will recommend movies based on history stored in self.hist list
    def recommend_on_history(self,n_reccomend = 5):
        if self.ishist == False:
            return print('No history found')
        history = np.array([list(contents.iloc[iloc]) for iloc in self.hist])
        distance,neighbors = model2.kneighbors([np.average(history,axis=0)],n_neighbors=n_reccomend + len(self.hist))
        recommeds = [movie_dataset.iloc[i]['title'] for i in neighbors[0] if i not in self.hist]
        return recommeds[:n_reccomend]

In [44]:
recommender2=Recommender2()

In [45]:
recommender2.recommend_on_history()

No history found


In [46]:
recommender2.recommend_on_movie('Father of the Bride Part II (1995)')



['Waiting for Guffman (1996)',
 'Jimmy Hollywood (1994)',
 'Kolya (1996)',
 'Life with Mikey (1993)',
 '8 1/2 Women (1999)']

In [47]:
recommender2.recommend_on_history()



['Waiting for Guffman (1996)',
 'Jimmy Hollywood (1994)',
 'Kolya (1996)',
 'Life with Mikey (1993)',
 '8 1/2 Women (1999)']

In [48]:
recommender2.recommend_on_movie('Tigerland (2000)')



['Breaking the Waves (1996)',
 'Jails, Hospitals & Hip-Hop (2000)',
 'They Bite (1996)',
 'Black Tights (Les Collants Noirs) (1960)',
 'Identification of a Woman (Identificazione di una donna) (1982)']

In [49]:
recommender2.recommend_on_history()



['Sleepover (1995)',
 'Seven Beauties (Pasqualino Settebellezze) (1976)',
 'Virgin Suicides, The (1999)',
 'Man on the Moon (1999)',
 'Two Girls and a Guy (1997)']

In [50]:
recommender2.recommend_on_movie('Two Girls and a Guy (1997)')



['Sleepover (1995)',
 'Seven Beauties (Pasqualino Settebellezze) (1976)',
 'Virgin Suicides, The (1999)',
 'Man on the Moon (1999)',
 "Swept Away (Travolti da un insolito destino nell'azzurro mare d'Agosto) (1975)"]