# Collaborative Filtering Recommendation System

## Task 1: Import Modules

In [10]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import sklearn

## Task 2: Import the Dataset

In [11]:
movie=pd.read_csv("Movie_data.csv")
movie_title=pd.read_csv("Movie_Id_Titles.csv")


## Task 3: Explore the Dataset

In [7]:
print('movie data')
print(movie.head())
print('movie title id')
print(movie_title.head())


movie data
     0  Shawn Wilson   50  5  881250949
0    0  Shawn Wilson  172  5  881250949
1    0  Shawn Wilson  133  1  881250949
2  196  Bessie White  242  3  881250949
3  196  Bessie White  393  4  881251863
4  196  Bessie White  381  4  881251728
movie title id
   item_id              title
0        1   Toy Story (1995)
1        2   GoldenEye (1995)
2        3  Four Rooms (1995)
3        4  Get Shorty (1995)
4        5     Copycat (1995)


In [19]:
col_names=['user_id','user_name','Movie_ID','rating','timestamp']
movies_df=movie=pd.read_csv("Movie_data.csv",sep=',',names=col_names)
movies_df

Unnamed: 0,user_id,user_name,Movie_ID,rating,timestamp
0,0,Shawn Wilson,50,5,881250949
1,0,Shawn Wilson,172,5,881250949
2,0,Shawn Wilson,133,1,881250949
3,196,Bessie White,242,3,881250949
4,196,Bessie White,393,4,881251863
...,...,...,...,...,...
99998,941,Thomas Warren,919,5,875048887
99999,941,Thomas Warren,273,3,875049038
100000,941,Thomas Warren,1,5,875049144
100001,941,Thomas Warren,294,4,875048532


In [26]:
movie_title_df=pd.read_csv("Movie_Id_Titles.csv")
movie_title_df.rename(columns={"item_id":'Movie_ID','title':'Movie_Title'},inplace=True)
movie_title_df

Unnamed: 0,Movie_ID,Movie_Title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)
...,...,...
1677,1678,Mat' i syn (1997)
1678,1679,B. Monkey (1998)
1679,1680,Sliding Doors (1998)
1680,1681,You So Crazy (1994)


In [29]:
Merged_df=pd.merge(movies_df,movie_title_df,on='Movie_ID')
Merged_df.head(5)

Unnamed: 0,user_id,user_name,Movie_ID,rating,timestamp,Movie_Title
0,0,Shawn Wilson,50,5,881250949,Star Wars (1977)
1,22,Robert Poulin,50,5,878887765,Star Wars (1977)
2,244,Laura Krulik,50,5,880604379,Star Wars (1977)
3,298,Loren Aucoin,50,5,884125578,Star Wars (1977)
4,115,Dominick Jenkins,50,5,881172049,Star Wars (1977)


In [32]:
np.shape(Merged_df)

(100003, 6)

In [33]:
Merged_df.describe()

Unnamed: 0,user_id,Movie_ID,rating,timestamp
count,100003.0,100003.0,100003.0,100003.0
mean,462.470876,425.520914,3.529864,883528800.0
std,266.622454,330.797791,1.125704,5343791.0
min,0.0,1.0,1.0,874724700.0
25%,254.0,175.0,3.0,879448700.0
50%,447.0,322.0,4.0,882826900.0
75%,682.0,631.0,4.0,888260000.0
max,943.0,1682.0,5.0,893286600.0


In [35]:
#min rating per user in ascending order
Merged_df.groupby('user_id')['rating'].count().sort_values(ascending=True).head()

user_id
0       3
166    20
418    20
34     20
441    20
Name: rating, dtype: int64

In [40]:
num_users=Merged_df.user_id.unique().shape[0]
num_movies=Merged_df.Movie_ID.unique().shape[0]
print('Total Users:'+str(num_users))
print('Total Movies:'+str(num_movies))

Total Users:944
Total Movies:1682


## Task 4: Create an Interaction Matrix

In [48]:
ratings=np.zeros((num_users,num_movies))
for row in Merged_df.itertuples():
    ratings[row[1],row[3]-1]=row[4]
print(ratings)    

[[0. 0. 0. ... 0. 0. 0.]
 [5. 3. 4. ... 0. 0. 0.]
 [4. 0. 0. ... 0. 0. 0.]
 ...
 [5. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 5. 0. ... 0. 0. 0.]]


## Task 5: Explore the Interaction Matrix

In [69]:
sparsity = float(len(ratings.nonzero()[0]))
sparsity /= (ratings.shape[0] * ratings.shape[1])
sparsity *= 100

print(sparsity)

6.298179628771237


## Task 6 : Create a Similarity Matrix

In [71]:
rating_similarity=cosine_similarity(ratings)
print(rating_similarity)

[[1.         0.11988816 0.11554032 ... 0.         0.18180857 0.11890394]
 [0.11988816 1.         0.16693098 ... 0.14861694 0.17950788 0.39817474]
 [0.11554032 0.16693098 1.         ... 0.16148478 0.17226781 0.10579788]
 ...
 [0.         0.14861694 0.16148478 ... 1.         0.1016418  0.09511958]
 [0.18180857 0.17950788 0.17226781 ... 0.1016418  1.         0.18246466]
 [0.11890394 0.39817474 0.10579788 ... 0.09511958 0.18246466 1.        ]]


## Task 7: Provide Recommendations

In [80]:
def recommended(user_item_m,X_user,user,k=10,top_n=10):
    user_similarity=X_user[user]
    most_similar_users = user_item_m.index[user_similarity.argpartition(-k)[-k:]]
    rec_movies=user_item_m.loc[most_similar_users].mean(0).sort_values(ascending=False)
    m_seen_movies=user_item_m.loc[user].gt(0)
    seen_movies = m_seen_movies.index[m_seen_movies].tolist()
    rec_movies = rec_movies.drop(seen_movies).head(top_n)
    rec_movies_a=rec_movies.index.to_frame().reset_index(drop=True)
    rec_movies_a.rename(columns={rec_movies_a.columns[0]: 'Movie_ID'}, inplace=True)
    return rec_movies_a

## Task 8: View the Provided Recommendations 

In [81]:
ratings_df=pd.DataFrame(ratings)


In [82]:
recommended(ratings_df,rating_similarity,user_id)

Unnamed: 0,Movie_ID
0,180
1,209
2,495
3,422
4,172
5,384
6,78
7,567
8,565
9,21


## Task 9: Create Wrapper Function

In [83]:
def movie_recommender_run(u_name):
    user_id=Mergerd_df.loc[Merged_df['user_name']==u_name].user_id.values[0]
    tmp=recommended(rating_df,rating_similarity,user_id)