# Movie Recommendation System - Collaborative Filtering (Item - Item Based) 

#### Importing the required libraries

In [1]:
import pandas as pd
#import numpy as np
import warnings
warnings.filterwarnings('ignore') 


#### Loading the Dataset

In [3]:
ratings_dataset = pd.read_csv(r"ratings.csv")
ratings_dataset = ratings_dataset.fillna(0)
ratings_dataset.head(10)

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
5,1,70,3.0,964982400
6,1,101,5.0,964980868
7,1,110,4.0,964982176
8,1,151,5.0,964984041
9,1,157,5.0,964984100


In [4]:
ratings_dataset.shape 

(100836, 4)

In [5]:
ratings_dataset = ratings_dataset.sample(n=100000)
ratings_dataset.head()

Unnamed: 0,userId,movieId,rating,timestamp
56715,377,2791,5.0,1340343300
84725,547,2194,3.0,942723549
76496,480,2353,2.5,1179178120
66369,428,104,3.0,1111623768
47212,307,3858,1.5,1186087735


In [6]:
details = pd.read_csv(r"movies.csv")
details.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [7]:
details.shape

(9742, 3)

#### Merging the data sets 

In [8]:
new = pd.merge(ratings_dataset , details , on = 'movieId',how = 'left')
new.head(20)

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,377,2791,5.0,1340343300,Airplane! (1980),Comedy
1,547,2194,3.0,942723549,"Untouchables, The (1987)",Action|Crime|Drama
2,480,2353,2.5,1179178120,Enemy of the State (1998),Action|Thriller
3,428,104,3.0,1111623768,Happy Gilmore (1996),Comedy
4,307,3858,1.5,1186087735,Cecil B. DeMented (2000),Comedy
5,543,165,5.0,1377022834,Die Hard: With a Vengeance (1995),Action|Crime|Thriller
6,184,143355,2.5,1537109441,Wonder Woman (2017),Action|Adventure|Fantasy
7,448,3156,2.0,1019563582,Bicentennial Man (1999),Drama|Romance|Sci-Fi
8,68,56336,2.0,1261622964,Wrong Turn 2: Dead End (2007),Action|Horror|Thriller
9,275,1097,4.0,1049079449,E.T. the Extra-Terrestrial (1982),Children|Drama|Sci-Fi


#### Calculating the mean ratings 

In [9]:
avg = pd.DataFrame(new.groupby('title')['rating'].mean())
avg.head(20)

Unnamed: 0_level_0,rating
title,Unnamed: 1_level_1
'71 (2014),4.0
'Hellboy': The Seeds of Creation (2004),4.0
'Round Midnight (1986),3.5
'Salem's Lot (2004),5.0
'Til There Was You (1997),4.0
'Tis the Season for Love (2015),1.5
"'burbs, The (1989)",3.176471
'night Mother (1986),3.0
(500) Days of Summer (2009),3.666667
*batteries not included (1987),3.285714


#### Calculating the Total Rating of each movie

In [10]:
avg['Total Ratings'] = pd.DataFrame(new.groupby('title')['rating'].count())
avg.head(10)

Unnamed: 0_level_0,rating,Total Ratings
title,Unnamed: 1_level_1,Unnamed: 2_level_1
'71 (2014),4.0,1
'Hellboy': The Seeds of Creation (2004),4.0,1
'Round Midnight (1986),3.5,2
'Salem's Lot (2004),5.0,1
'Til There Was You (1997),4.0,2
'Tis the Season for Love (2015),1.5,1
"'burbs, The (1989)",3.176471,17
'night Mother (1986),3.0,1
(500) Days of Summer (2009),3.666667,42
*batteries not included (1987),3.285714,7


#### Calculating the Correlaation 

In [11]:
movie = new.pivot_table(index = 'userId' , columns = 'title' , values = 'rating')
movie.head(20)

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,4.0,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
6,,,,,,,,,,,...,,,,,,,,,,
7,,,,,,,,,,,...,,,,,,,,,,
8,,,,,,,,,,,...,,,,,,,,,,
9,,,,,,,,,,,...,,,,,,,1.0,,,
10,,,,,,,,,,,...,,,,,,,,,,


#### Sorting in descending order 

In [12]:
avg.sort_values('Total Ratings',ascending=False).head()

Unnamed: 0_level_0,rating,Total Ratings
title,Unnamed: 1_level_1,Unnamed: 2_level_1
Forrest Gump (1994),4.161043,326
"Shawshank Redemption, The (1994)",4.429936,314
Pulp Fiction (1994),4.194444,306
"Matrix, The (1999)",4.192446,278
"Silence of the Lambs, The (1991)",4.166065,277


#### Taking a sample movie for testing are system 

In [13]:
relation = movie.corrwith(movie['Incredibles, The (2004)'])
relation.head()

title
'71 (2014)                                NaN
'Hellboy': The Seeds of Creation (2004)   NaN
'Round Midnight (1986)                    NaN
'Salem's Lot (2004)                       NaN
'Til There Was You (1997)                 NaN
dtype: float64

In [14]:
recommend = pd.DataFrame(relation , columns = ['Correlation'])
recommend.dropna(inplace = True)
recommend.head()

Unnamed: 0_level_0,Correlation
title,Unnamed: 1_level_1
"'burbs, The (1989)",-0.2
(500) Days of Summer (2009),0.181256
*batteries not included (1987),0.19033
...And Justice for All (1979),-1.0
10 Cent Pistol (2015),1.0


In [15]:
recommend.sort_values('Correlation',ascending=False).head(10)

Unnamed: 0_level_0,Correlation
title,Unnamed: 1_level_1
"Glass Menagerie, The (1987)",1.0
Armour of God (Long xiong hu di) (1987),1.0
Ultimate Avengers (2006),1.0
High Crimes (2002),1.0
Marvel One-Shot: Item 47 (2012),1.0
I Love Trouble (1994),1.0
Ariel (1988),1.0
Under Siege 2: Dark Territory (1995),1.0
Cemetery Man (Dellamorte Dellamore) (1994),1.0
Outside Providence (1999),1.0


In [16]:
recommend = recommend.join(avg['Total Ratings'])

In [17]:
recommend.head()

Unnamed: 0_level_0,Correlation,Total Ratings
title,Unnamed: 1_level_1,Unnamed: 2_level_1
"'burbs, The (1989)",-0.2,17
(500) Days of Summer (2009),0.181256,42
*batteries not included (1987),0.19033,7
...And Justice for All (1979),-1.0,3
10 Cent Pistol (2015),1.0,2


#### Recommending the movies 

In [18]:
recommend[recommend['Total Ratings']>200].sort_values('Correlation',ascending = False)

Unnamed: 0_level_0,Correlation,Total Ratings
title,Unnamed: 1_level_1,Unnamed: 2_level_1
Toy Story (1995),0.642048,215
"Silence of the Lambs, The (1991)",0.412339,277
"Matrix, The (1999)",0.337363,278
Jurassic Park (1993),0.288024,238
Schindler's List (1993),0.245408,218
"Usual Suspects, The (1995)",0.243932,204
Forrest Gump (1994),0.185684,326
Pulp Fiction (1994),0.158758,306
American Beauty (1999),0.150473,203
Star Wars: Episode IV - A New Hope (1977),0.144524,248


In [19]:
a = input("Enter a name of the movie u want to see the prediction ")
a

Enter a name of the movie u want to see the prediction Forrest Gump (1994)


'Forrest Gump (1994)'

In [20]:
relation = movie.corrwith(movie[a])
recommend = pd.DataFrame(relation , columns = ['Correlation'])
recommend.dropna(inplace = True)
recommend.sort_values('Correlation',ascending=False)
recommend = recommend.join(avg['Total Ratings'])

In [21]:
result = recommend[recommend['Total Ratings']>50].sort_values('Correlation',ascending = False)
result.head()

Unnamed: 0_level_0,Correlation,Total Ratings
title,Unnamed: 1_level_1,Unnamed: 2_level_1
Forrest Gump (1994),1.0,326
Mr. Holland's Opus (1995),0.65442,78
Grumpier Old Men (1995),0.573308,51
Pocahontas (1995),0.550118,68
"Few Good Men, A (1992)",0.522244,57


In [22]:
result = result.merge(details , on = 'title',how = 'left')
result.head(10)

Unnamed: 0,title,Correlation,Total Ratings,movieId,genres
0,Forrest Gump (1994),1.0,326,356,Comedy|Drama|Romance|War
1,Mr. Holland's Opus (1995),0.65442,78,62,Drama
2,Grumpier Old Men (1995),0.573308,51,3,Comedy|Romance
3,Pocahontas (1995),0.550118,68,48,Animation|Children|Drama|Musical|Romance
4,"Few Good Men, A (1992)",0.522244,57,2268,Crime|Drama|Thriller
5,Caddyshack (1980),0.520328,52,3552,Comedy
6,Field of Dreams (1989),0.501416,55,1302,Children|Drama|Fantasy
7,Big (1988),0.493824,91,2797,Comedy|Drama|Fantasy|Romance
8,Space Jam (1996),0.486882,52,673,Adventure|Animation|Children|Comedy|Fantasy|Sc...
9,Hook (1991),0.484676,52,3489,Adventure|Comedy|Fantasy
