<h1>Demo-1</h1>

<h1>Build Movie Recommendation System Using Collobrative Filtering in Python</h1>

<h3>Importing Packages</h3>

In [10]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.decomposition import TruncatedSVD


<h3>Preparing the Data</h3>

In [14]:
columns  = ["user_id","item_id","rating","timestamp"]
frame = pd.read_csv("Downloads/ml-100k/u.data",sep = "\t",names = columns)
frame.head()

Unnamed: 0,user_id,item_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [33]:
columns = ["item_id","movie_title","release data","video release date","IMBb URL",
           "unknown","Action","Adventure","Animation","Childrens","Comedy","Crime","Documentary","Drama","Fantasy",
           "Flim-Noir","Horror","Musical","Mystery","Romance","Si-Fi","Thriller","War","Western"]

movies = pd.read_csv("Downloads/ml-100k/u.item",sep = "|",names = columns,encoding = "iso-8859-1")
#subset of the dataset
movie_names = movies[["item_id","movie_title"]]
movie_names.head()


Unnamed: 0,item_id,movie_title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


In [34]:
#merge function ,it merge two data frame into common name in item_id
combined_movies_data = pd.merge(frame,movie_names,on = "item_id")
combined_movies_data.head()

Unnamed: 0,user_id,item_id,rating,timestamp,movie_title
0,196,242,3,881250949,Kolya (1996)
1,186,302,3,891717742,L.A. Confidential (1997)
2,22,377,1,878887116,Heavyweights (1994)
3,244,51,2,880606923,Legends of the Fall (1994)
4,166,346,1,886397596,Jackie Brown (1997)


In [35]:
#To sort the dataframe in descending orders for counting
combined_movies_data.groupby("item_id")["rating"].count().sort_values(ascending=False).head()

item_id
50     583
258    509
100    508
181    507
294    485
Name: rating, dtype: int64

In [37]:
filter = combined_movies_data["item_id"]==50
combined_movies_data[filter]["movie_title"].unique()

array(['Star Wars (1977)'], dtype=object)

<h3>Building Utility matrix</h3>

In [38]:
rating_crosstab = combined_movies_data.pivot_table(values = "rating",index = "user_id",columns = "movie_title",fill_value = 0)
rating_crosstab.head()

movie_title,'Til There Was You (1997),1-900 (1994),101 Dalmatians (1996),12 Angry Men (1957),187 (1997),2 Days in the Valley (1996),"20,000 Leagues Under the Sea (1954)",2001: A Space Odyssey (1968),3 Ninjas: High Noon At Mega Mountain (1998),"39 Steps, The (1935)",...,Yankee Zulu (1994),Year of the Horse (1997),You So Crazy (1994),Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),"Young Poisoner's Handbook, The (1995)",Zeus and Roxanne (1997),unknown,Á köldum klaka (Cold Fever) (1994)
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,2.0,5.0,0.0,0.0,3.0,4.0,0.0,0.0,...,0.0,0.0,0.0,5.0,3.0,0.0,0.0,0.0,4.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,2.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,...,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,4.0,0.0


<h3>Transposing the Matrix</h3>

In [40]:
rating_crosstab.shape

(943, 1664)

In [41]:
X = rating_crosstab.values.T
X.shape

(1664, 943)

<h3>DECOMPOSING THE MATRIX</h3>

In [43]:
SVD = TruncatedSVD(n_components = 12,random_state = 17)
resultant_matrix = SVD.fit_transform(X)
resultant_matrix.shape

(1664, 12)

<h3>Generating a Correlation Matrix</h3>

#Pearson R Correlation Co-efficient

corr_mat = np.corrcoef(resultant_matrix)
corr_mat.shape

In [58]:
#Isolating Star Wars from the correlation matrix
movie_names = rating_crosstab.columns
movie_list = list(movie_names)

star_wars = movie_list.index("Star Wars (1977)")
print(star_wars)

1398


In [59]:
corr_star_wars = corr_mat[star_wars]
corr_star_wars.shape

(1664,)

<h3>Recommending a Highly Correlated Movie</h3>

In [60]:
list(movie_names[(corr_star_wars < 1.0) & (corr_star_wars > 0.9)])

['Empire Strikes Back, The (1980)',
 'Men in Black (1997)',
 'Raiders of the Lost Ark (1981)',
 'Return of the Jedi (1983)',
 'Star Trek: First Contact (1996)',
 'Toy Story (1995)']

In [61]:
list(movie_names[(corr_star_wars < 1.0) & (corr_star_wars > 0.95)])

['Return of the Jedi (1983)', 'Toy Story (1995)']