# Movie Recommendation using Content Based Filtering

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Importing Dataset

In [16]:
movies = pd.read_csv("movies.csv")
ratings = pd.read_csv("ratings.csv")

In [17]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [18]:
'extracting year from title column and separating genres'
movies['year'] = movies.title.str.extract('(\(\d\d\d\d\))' , expand = False)
movies['year'] = movies.year.str.extract('(\d\d\d\d)' , expand = False)
movies['title'] = movies.title.str.replace('(\(\d\d\d\d\))' , '')
movies['title'] = movies.title.apply(lambda x : x.strip())
movies['genres'] = movies.genres.str.split('|')
movies.head()

Unnamed: 0,movieId,title,genres,year
0,1,Toy Story,"[Adventure, Animation, Children, Comedy, Fantasy]",1995
1,2,Jumanji,"[Adventure, Children, Fantasy]",1995
2,3,Grumpier Old Men,"[Comedy, Romance]",1995
3,4,Waiting to Exhale,"[Comedy, Drama, Romance]",1995
4,5,Father of the Bride Part II,[Comedy],1995


In [19]:
'separating genres into one-hot encodings'
movies_genres = movies.copy()

for index,row in movies.iterrows():
    for genre in row['genres']:
        movies_genres.at[index , genre] = 1
movies_genres = movies_genres.fillna(0)
movies_genres.head()

Unnamed: 0,movieId,title,genres,year,Adventure,Animation,Children,Comedy,Fantasy,Romance,...,Horror,Mystery,Sci-Fi,IMAX,Documentary,War,Musical,Western,Film-Noir,(no genres listed)
0,1,Toy Story,"[Adventure, Animation, Children, Comedy, Fantasy]",1995,1.0,1.0,1.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,Jumanji,"[Adventure, Children, Fantasy]",1995,1.0,0.0,1.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3,Grumpier Old Men,"[Comedy, Romance]",1995,0.0,0.0,0.0,1.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4,Waiting to Exhale,"[Comedy, Drama, Romance]",1995,0.0,0.0,0.0,1.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5,Father of the Bride Part II,[Comedy],1995,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [28]:
ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,169,2.5
1,1,2471,3.0
2,1,48516,5.0
3,2,2571,3.5
4,2,109487,4.0


user input part 

In [47]:
user_data = [
    {'title' : 'Back to the Future' , 'rating' : 4.9},
    {'title' : 'Back to the Future Part II' , 'rating' : 4.9},
    {'title': 'Harry Potter and the Chamber of Secrets' , 'rating' : 3.5},
    {'title' : 'Exorcist: The Beginning' , 'rating' : 4.5},
    {'title':'Iron Man' , 'rating' : 5},
    {'title':'Captain America: The First Avenger' , 'rating' : 4.8}
]

In [48]:
user_movies = pd.DataFrame(user_data)
user_movies

Unnamed: 0,title,rating
0,Back to the Future,4.9
1,Back to the Future Part II,4.9
2,Harry Potter and the Chamber of Secrets,3.5
3,Exorcist: The Beginning,4.5
4,Iron Man,5.0
5,Captain America: The First Avenger,4.8


In [49]:
user_ratings = user_movies['rating']
user_ratings

0    4.9
1    4.9
2    3.5
3    4.5
4    5.0
5    4.8
Name: rating, dtype: float64

'Adding other columns in user_movies dataframe'

In [50]:
user_movies = movies_genres[movies_genres['title'].isin(user_movies['title'].tolist())]
user_movies

Unnamed: 0,movieId,title,genres,year,Adventure,Animation,Children,Comedy,Fantasy,Romance,...,Horror,Mystery,Sci-Fi,IMAX,Documentary,War,Musical,Western,Film-Noir,(no genres listed)
1242,1270,Back to the Future,"[Adventure, Comedy, Sci-Fi]",1985,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1928,2011,Back to the Future Part II,"[Adventure, Comedy, Sci-Fi]",1989,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5718,5816,Harry Potter and the Chamber of Secrets,"[Adventure, Fantasy]",2002,1.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8133,8815,Exorcist: The Beginning,"[Horror, Thriller]",2004,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12646,59315,Iron Man,"[Action, Adventure, Sci-Fi]",2008,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17544,88140,Captain America: The First Avenger,"[Action, Adventure, Sci-Fi, Thriller, War]",2011,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [51]:
user_movies = user_movies.reset_index(drop=True)
user_genre = user_movies.drop('movieId', 1).drop('title', 1).drop('genres', 1).drop('year', 1)
user_genre

Unnamed: 0,Adventure,Animation,Children,Comedy,Fantasy,Romance,Drama,Action,Crime,Thriller,Horror,Mystery,Sci-Fi,IMAX,Documentary,War,Musical,Western,Film-Noir,(no genres listed)
0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


### profile of user

In [52]:
profile = user_genre.transpose().dot(user_ratings)
profile

Adventure             23.1
Animation              0.0
Children               0.0
Comedy                 9.8
Fantasy                3.5
Romance                0.0
Drama                  0.0
Action                 9.8
Crime                  0.0
Thriller               9.3
Horror                 4.5
Mystery                0.0
Sci-Fi                19.6
IMAX                   0.0
Documentary            0.0
War                    4.8
Musical                0.0
Western                0.0
Film-Noir              0.0
(no genres listed)     0.0
dtype: float64

In [53]:
genreTable = movies_genres.set_index(movies_genres['movieId'])
genreTable = genreTable.drop('movieId', 1).drop('title', 1).drop('genres', 1).drop('year', 1)
genreTable.head()

Unnamed: 0_level_0,Adventure,Animation,Children,Comedy,Fantasy,Romance,Drama,Action,Crime,Thriller,Horror,Mystery,Sci-Fi,IMAX,Documentary,War,Musical,Western,Film-Noir,(no genres listed)
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### User Recommendation Table 

In [55]:
recommendationTable = ((genreTable*profile).sum(axis=1))/(profile.sum())
recommendationTable = recommendationTable.sort_values(ascending=False)
recommendationTable.head()

movieId
116758    0.848341
27032     0.848341
96200     0.842417
115479    0.789100
87232     0.789100
dtype: float64

### Recommending best 10 movies according to the User Profile

In [57]:
movies.loc[movies['movieId'].isin(recommendationTable.head(10).keys())]

Unnamed: 0,movieId,title,genres,year
9180,27032,Who Am I? (Wo shi shei),"[Action, Adventure, Comedy, Sci-Fi, Thriller]",1998
10382,36509,"Cave, The","[Action, Adventure, Horror, Mystery, Sci-Fi, T...",2005
15691,79681,Invisible Agent,"[Adventure, Comedy, Sci-Fi, Thriller, War]",1942
17317,87232,X-Men: First Class,"[Action, Adventure, Sci-Fi, Thriller, War]",2011
17544,88140,Captain America: The First Avenger,"[Action, Adventure, Sci-Fi, Thriller, War]",2011
19411,96200,War of the Dead - Stone's War,"[Action, Adventure, Horror, Sci-Fi, Thriller, ...",2011
23650,111861,Battle of the Damned,"[Action, Adventure, Horror, Sci-Fi, Thriller]",2013
24565,115479,"Whip Hand, The","[Action, Adventure, Crime, Drama, Sci-Fi, Thri...",1951
24946,116758,Death Racers,"[Action, Adventure, Comedy, Sci-Fi, Thriller]",2008
26301,122280,Sabretooth,"[Action, Adventure, Horror, Sci-Fi, Thriller]",2002
