In [2]:
import numpy as np
import pandas as pd
movies = pd.read_csv('movies.csv.xls',sep=';',encoding='latin-1').drop('Unnamed: 3',axis=1)
print('Shape of this dataset :',movies.shape)
movies.head()


Shape of this dataset : (3883, 3)


Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


In [4]:
ratings = pd.read_csv('ratings.csv',sep=';')
print('Shape of this dataset :',ratings.shape)
ratings.head()


Shape of this dataset : (94888, 4)


Unnamed: 0,userId,movieId,rating,timestamp
0,1,1193.0,5.0,978300760.0
1,1,661.0,3.0,978302109.0
2,1,914.0,3.0,978301968.0
3,1,3408.0,4.0,978300275.0
4,1,2355.0,5.0,978824291.0


In [5]:
users = pd.read_csv('users.csv.xls',sep=';')
print('Shape of this dataset :',users.shape)
users.head()


Shape of this dataset : (6040, 5)


Unnamed: 0,userId,gender,age,occupation,zip-code
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117
3,4,M,45,7,2460
4,5,M,25,20,55455


In [6]:
rating_pivot = ratings.pivot_table(values='rating',columns='userId',index='movieId').fillna(0)
print('Shape of this pivot table :',rating_pivot.shape)
rating_pivot.head()


Shape of this pivot table : (3357, 579)


userId,1,2,3,4,5,6,7,8,9,10,...,1689,1690,1691,1692,1693,1694,1695,1696,1697,1698
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1.0,5.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,5.0,5.0,...,0.0,0.0,0.0,0.0,0.0,3.0,5.0,0.0,5.0,0.0
2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,...,4.0,0.0,0.0,0.0,2.0,3.0,0.0,0.0,0.0,0.0
3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
from sklearn.neighbors import NearestNeighbors
nn_algo = NearestNeighbors(metric='cosine')
nn_algo.fit(rating_pivot)


In [8]:
class Recommender:
    def __init__(self):
        # This list will stored movies that called atleast ones using recommend_on_movie method
        self.hist = []
        self.ishist = False # Check if history is empty

    # This method will recommend movies based on a movie that passed as the parameter
    def recommend_on_movie(self,movie,n_reccomend = 5):
        self.ishist = True
        movieid = int(movies[movies['title']==movie]['movieId'])
        self.hist.append(movieid)
        distance,neighbors = nn_algo.kneighbors([rating_pivot.loc[movieid]],n_neighbors=n_reccomend+1)
        movieids = [rating_pivot.iloc[i].name for i in neighbors[0]]
        recommeds = [str(movies[movies['movieId']==mid]['title']).split('\n')[0].split('  ')[-1] for mid in movieids if mid not in [movieid]]
        return recommeds[:n_reccomend]

    # This method will recommend movies based on history stored in self.hist list
    def recommend_on_history(self,n_reccomend = 5):
        if self.ishist == False:
            return print('No history found')
        history = np.array([list(rating_pivot.loc[mid]) for mid in self.hist])
        distance,neighbors = nn_algo.kneighbors([np.average(history,axis=0)],n_neighbors=n_reccomend + len(self.hist))
        movieids = [rating_pivot.iloc[i].name for i in neighbors[0]]
        recommeds = [str(movies[movies['movieId']==mid]['title']).split('\n')[0].split('  ')[-1] for mid in movieids if mid not in self.hist]
        return recommeds[:n_reccomend]


In [9]:
# linitializing the Recommender Object
recommender = Recommender()
# Recommendation based on past watched movies, but the object just initialized. So, therefore no history found
recommender.recommend_on_history()

No history found


In [10]:
# Recommendation based on this movie
recommender.recommend_on_movie('Father of the Bride Part II (1995)')

['Multiplicity (1996)',
 'Home Alone (1990)',
 'Vegas Vacation (1997)',
 'Problem Child (1990)',
 'Sister Act 2']

In [11]:
# Recommendation based on past watched movies, and this time a movie is there in the history.
recommender.recommend_on_history()


['Multiplicity (1996)',
 'Home Alone (1990)',
 'Vegas Vacation (1997)',
 'Problem Child (1990)',
 'Sister Act 2']

In [12]:
# Recommendation based on this movie
recommender.recommend_on_movie('Tigerland (2000)')


['Kissed (1996)',
 'Two Family House (2000)',
 'Steal This Movie! (2000)',
 'Yards, The (1999)',
 'Ballad of Narayama, The (Narayama Bushiko) (1982)']

In [13]:
# Recommendation based on past watched movies, and this time two movies is there in the history.
recommender.recommend_on_history()


['Multiplicity (1996)',
 'Home Alone (1990)',
 'Mrs. Doubtfire (1993)',
 "White Men Can't Jump (1992)",
 'Vegas Vacation (1997)']

In [14]:
# Recommendation based on this movie
recommender.recommend_on_movie('Dracula')


['Dead Man on Campus (1998)',
 'Deuce Bigalow',
 "Don't Be a Menace to South Central While Drink...",
 'Kissing a Fool (1998)',
 'Repossessed (1990)']

In [15]:
# Recommendation based on past watched movies, and this time three movies is there in the history.
recommender.recommend_on_history()


['Multiplicity (1996)',
 'Vegas Vacation (1997)',
 'Major Payne (1994)',
 'Police Academy (1984)',
 "White Men Can't Jump (1992)"]

In [16]:
# Recommendation based on this movie
recommender.recommend_on_movie('Money Train (1995)')


['Grass Harp, The (1995)',
 'Beverly Hills Cop III (1994)',
 'Demolition Man (1993)',
 'Blue Lagoon, The (1980)',
 'Renaissance Man (1994)']

In [17]:
# Recommendation based on past watched movies, and this time four movies is there in the history.
recommender.recommend_on_history()

['Major Payne (1994)',
 'Beverly Hills Cop III (1994)',
 'Renaissance Man (1994)',
 'Police Academy (1984)',
 'Problem Child (1990)']

In [18]:
# Recommendation based on this movie
recommender.recommend_on_movie('GoldenEye (1995)')


['Tomorrow Never Dies (1997)',
 'World Is Not Enough, The (1999)',
 'True Lies (1994)',
 'Enemy of the State (1998)',
 'Die Hard']

In [19]:
# Recommendation based on past watched movies, and this time five movies is there in the history.
recommender.recommend_on_history()

['Tomorrow Never Dies (1997)',
 'True Lies (1994)',
 'World Is Not Enough, The (1999)',
 'Batman Returns (1992)',
 'Fugitive, The (1993)']

In [21]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer(stop_words='english')
genres = vectorizer.fit_transform(movies.genres).toarray()
contents = pd.DataFrame(genres,columns=vectorizer.get_feature_names_out())
print('Shape of the content table :',contents.shape)
contents.head()


Shape of the content table : (3883, 347)


Unnamed: 0,1919,1956,1963,1968,1974,1977,1978,1979,1980,1981,...,wight,willowbrook,witch,worrying,wrath,years,yellow,yes,york,yu
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [22]:
from sklearn.neighbors import NearestNeighbors
nn_algo = NearestNeighbors(metric='cosine')
nn_algo.fit(contents)


In [23]:
class Recommender:
    def __init__(self):
        # This list will stored movies that called atleast ones using recommend_on_movie method
        self.hist = []
        self.ishist = False # Check if history is empty

    # This method will recommend movies based on a movie that passed as the parameter
    def recommend_on_movie(self,movie,n_reccomend = 5):
        self.ishist = True
        iloc = movies[movies['title']==movie].index[0]
        self.hist.append(iloc)
        distance,neighbors = nn_algo.kneighbors([contents.iloc[iloc]],n_neighbors=n_reccomend+1)
        recommeds = [movies.iloc[i]['title'] for i in neighbors[0] if i not in [iloc]]
        return recommeds[:n_reccomend]

    # This method will recommend movies based on history stored in self.hist list
    def recommend_on_history(self,n_reccomend = 5):
        if self.ishist == False:
            return print('No history found')
        history = np.array([list(contents.iloc[iloc]) for iloc in self.hist])
        distance,neighbors = nn_algo.kneighbors([np.average(history,axis=0)],n_neighbors=n_reccomend + len(self.hist))
        recommeds = [movies.iloc[i]['title'] for i in neighbors[0] if i not in self.hist]
        return recommeds[:n_reccomend]


In [24]:
# linitializing the Recommender Object
recommender = Recommender()
# Recommendation based on past watched movies, but the object just initialized. So, therefore no history found
recommender.recommend_on_history()

No history found


In [25]:
# Recommendation based on this movie
recommender.recommend_on_movie('Father of the Bride Part II (1995)')




['Waiting for Guffman (1996)',
 'Jimmy Hollywood (1994)',
 'Kolya (1996)',
 'Life with Mikey (1993)',
 '8 1/2 Women (1999)']

In [26]:
# Recommendation based on past watched movies, and this time a movie is there in the history.
recommender.recommend_on_history()



['Waiting for Guffman (1996)',
 'Jimmy Hollywood (1994)',
 'Kolya (1996)',
 'Life with Mikey (1993)',
 '8 1/2 Women (1999)']

In [27]:
# Recommendation based on this movie
recommender.recommend_on_movie('Tigerland (2000)')




['Breaking the Waves (1996)',
 'Jails, Hospitals & Hip-Hop (2000)',
 'They Bite (1996)',
 'Black Tights (Les Collants Noirs) (1960)',
 'Identification of a Woman (Identificazione di una donna) (1982)']

In [28]:
# Recommendation based on past watched movies, and this time two movies is there in the history.
recommender.recommend_on_history()



['Sleepover (1995)',
 'Seven Beauties (Pasqualino Settebellezze) (1976)',
 'Virgin Suicides, The (1999)',
 'Man on the Moon (1999)',
 'Two Girls and a Guy (1997)']

In [29]:
# Recommendation based on this movie
recommender.recommend_on_movie('Dracula')




['Nemesis 2',
 'Best of the Best 3',
 'Mighty Morphin Power Rangers',
 'Gumby',
 'Die Hard']

In [30]:
# Recommendation based on past watched movies, and this time three movies is there in the history.
recommender.recommend_on_history()



['Play it to the Bone (1999)',
 'Seven Beauties (Pasqualino Settebellezze) (1976)',
 "Swept Away (Travolti da un insolito destino nell'azzurro mare d'Agosto) (1975)",
 'Muse, The (1999)',
 'Carriers Are Waiting, The (Les Convoyeurs Attendent) (1999)']

In [31]:
# Recommendation based on this movie
recommender.recommend_on_movie('Money Train (1995)')




['Shadow, The (1994)',
 'Black Mask (Hak hap) (1996)',
 'Stranger, The (1994)',
 'Shanghai Noon (2000)',
 'Thunderball (1965)']

In [32]:
# Recommendation based on past watched movies, and this time four movies is there in the history.
recommender.recommend_on_history()




['Get Shorty (1995)',
 'Buffalo 66 (1998)',
 'Faster Pussycat! Kill! Kill! (1965)',
 'Lethal Weapon 4 (1998)',
 'Lethal Weapon 2 (1989)']

In [33]:
# Recommendation based on this movie
recommender.recommend_on_movie('GoldenEye (1995)')




['Anaconda (1997)',
 'Clear and Present Danger (1994)',
 'Surviving the Game (1994)',
 'Chain Reaction (1996)',
 'Rock, The (1996)']

In [34]:
# Recommendation based on past watched movies, and this time five movies is there in the history.
recommender.recommend_on_history()




['Runaway Train (1985)',
 'Daylight (1996)',
 'Con Air (1997)',
 'Fire Down Below (1997)',
 'Outbreak (1995)']