# Recommender System

### Reading the Data and Importing necessary packages

In [458]:
import pandas as pd
import numpy as np
from surprise import KNNWithMeans
from surprise import Dataset
from surprise import accuracy
from surprise import Reader
from surprise.model_selection import train_test_split 

In [459]:
movies= pd.read_csv("movies.csv",encoding='utf-8')
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [460]:
ratings= pd.read_csv("ratings.csv",encoding='utf-8')
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [461]:
links=pd.read_csv("links.csv",encoding='utf-8')
links.head()

Unnamed: 0,movieId,imdbId,tmdbId
0,1,114709,862.0
1,2,113497,8844.0
2,3,113228,15602.0
3,4,114885,31357.0
4,5,113041,11862.0


In [462]:
tags= pd.read_csv("tags.csv",encoding='utf-8')
tags.head()

Unnamed: 0,userId,movieId,tag,timestamp
0,2,60756,funny,1445714994
1,2,60756,Highly quotable,1445714996
2,2,60756,will ferrell,1445714992
3,2,89774,Boxing story,1445715207
4,2,89774,MMA,1445715200


### 1. To create recommender system using ratings.csv file 

In [463]:
reader= Reader(rating_scale=(0.5,5))
data= Dataset.load_from_df(ratings[['userId','movieId','rating']],reader)

In [464]:
[trainset,testset]= train_test_split(data,test_size=0.15,shuffle=True)

In [465]:
recom= KNNWithMeans(k=5,sim_options={'name':'cosine','user_based':True})  
recom.fit(data.build_full_trainset())
test_pred= recom.test(testset)
RMSE= accuracy.rmse(test_pred)

Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.6466


### 2. To display predicted rating for a particular userID and MovieID combinations (both taken as keyboard input)

In [466]:
user_Id= int(input())
movie_Id= int(input())

pre=recom.predict(user_Id,movie_Id)
print("Rating Value for Movie-id", movie_Id," by User-id ",user_Id," is ",pre[3])

3
2
Rating Value for Movie-id 2  by User-id  3  is  2.5468248484346407


### 3. To recommend a user to watch top 10 movies, which has not been watched yet (as per his rating predictions). Fix the threshold rating as 2.5

In [467]:
ratings_pivot=(ratings.pivot(index='movieId',columns='userId',values='rating').fillna(0))
ratings_pivot.head()

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,0.0,0.0,4.0,0.0,4.5,0.0,0.0,0.0,...,4.0,0.0,4.0,3.0,4.0,2.5,4.0,2.5,3.0,5.0
2,0.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,0.0,0.0,...,0.0,4.0,0.0,5.0,3.5,0.0,0.0,2.0,0.0,0.0
3,4.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0


In [480]:
user_Id=int(input())
unseen_movies=[]
for i in range(1,9725):
    try:
        if ratings_pivot[user_Id][i]==0:
            unseen_movies.append(i)
    except:
        i+=1    

10


In [481]:
ratings_for_unseen_movies=[]
for i in range(len(unseen_movies)):
    pre=recom.predict(user_Id,unseen_movies[i])
    ratings_for_unseen_movies.append(pre[3])

In [482]:
UnseenMovies = pd.DataFrame({'Unseen_movieId':unseen_movies,'Predicted_Ratings':ratings_for_unseen_movies})

In [483]:
UnseenMovies =UnseenMovies.sort_values(by='Predicted_Ratings', ascending=False)

In [484]:
UnseenMovies[:10]

Unnamed: 0,Unseen_movieId,Predicted_Ratings
2645,3567,5.0
2504,3379,5.0
3878,5490,5.0
5149,8477,5.0
4550,6818,5.0
2311,3086,4.977002
5005,7926,4.914887
3822,5416,4.886614
3778,5328,4.886614
2925,3951,4.886614


### 4. To display the MovieID, IMDB ID, Average User Rating (excluding predicted ratings), genres and tag of all the movies found in Step 3 as a data frame.

In [485]:
Merge1= pd.merge(UnseenMovies,movies,left_on='Unseen_movieId',right_on='movieId',how='left').drop('movieId',axis=1)

In [486]:
Merge2= pd.merge(Merge1,links,left_on='Unseen_movieId',right_on='movieId',how='left').drop('movieId',axis=1)

In [487]:
ratings_grouped= pd.DataFrame(ratings.groupby('movieId')['rating'].mean())
ratings_grouped= ratings_grouped.reset_index()

In [488]:
Merge3= pd.merge(Merge2,ratings_grouped,left_on='Unseen_movieId',right_on='movieId',how='left').drop('movieId',axis=1)
Merge3.head(10)

Unnamed: 0,Unseen_movieId,Predicted_Ratings,title,genres,imdbId,tmdbId,rating
0,3567,5.0,Bossa Nova (2000),Comedy|Drama|Romance,180837,19600.0,5.0
1,3379,5.0,On the Beach (1959),Drama,53137,35412.0,4.5
2,5490,5.0,The Big Bus (1976),Action|Comedy,74205,19133.0,5.0
3,8477,5.0,"Jetée, La (1962)",Romance|Sci-Fi,56119,662.0,4.5
4,6818,5.0,Come and See (Idi i smotri) (1985),Drama|War,91251,25237.0,5.0
5,3086,4.977002,Babes in Toyland (1934),Children|Comedy|Fantasy|Musical,24852,25898.0,5.0
6,7926,4.914887,High and Low (Tengoku to jigoku) (1963),Crime|Drama|Film-Noir|Thriller,57565,12493.0,4.0
7,5416,4.886614,Cherish (2002),Comedy|Drama|Thriller,298798,18408.0,5.0
8,5328,4.886614,Rain (2001),Drama|Romance,287645,69324.0,5.0
9,3951,4.886614,Two Family House (2000),Drama,202641,63956.0,5.0


In [499]:
tags_for_movies = pd.DataFrame(tags.groupby(['movieId'])['tag'].apply(list))
tags_for_movies =tags_for_movies.reset_index()
tags_for_movies.tag.replace('NaN',"No tag Available")
tags_for_movies.head(10)

Unnamed: 0,movieId,tag
0,1,"[pixar, pixar, fun]"
1,2,"[fantasy, magic board game, Robin Williams, game]"
2,3,"[moldy, old]"
3,5,"[pregnancy, remake]"
4,7,[remake]
5,11,"[politics, president]"
6,14,"[politics, president]"
7,16,[Mafia]
8,17,[Jane Austen]
9,21,[Hollywood]


In [500]:
Merge4= pd.merge(Merge3,tags_for_movies,left_on='Unseen_movieId',right_on='movieId',how='left').drop('movieId',axis=1)
Merge4.drop('Predicted_Ratings',inplace=True,axis=1)
Merge4.head(10)

Unnamed: 0,Unseen_movieId,title,genres,imdbId,tmdbId,rating,tag
0,3567,Bossa Nova (2000),Comedy|Drama|Romance,180837,19600.0,5.0,
1,3379,On the Beach (1959),Drama,53137,35412.0,4.5,
2,5490,The Big Bus (1976),Action|Comedy,74205,19133.0,5.0,
3,8477,"Jetée, La (1962)",Romance|Sci-Fi,56119,662.0,4.5,[post-apocalyptic]
4,6818,Come and See (Idi i smotri) (1985),Drama|War,91251,25237.0,5.0,"[atmospheric, bleak, disturbing, gritty, harsh]"
5,3086,Babes in Toyland (1934),Children|Comedy|Fantasy|Musical,24852,25898.0,5.0,
6,7926,High and Low (Tengoku to jigoku) (1963),Crime|Drama|Film-Noir|Thriller,57565,12493.0,4.0,[In Netflix queue]
7,5416,Cherish (2002),Comedy|Drama|Thriller,298798,18408.0,5.0,
8,5328,Rain (2001),Drama|Romance,287645,69324.0,5.0,
9,3951,Two Family House (2000),Drama,202641,63956.0,5.0,[In Netflix queue]


In [501]:
#Top10 Movies based on Predicted Ratings 
Top_10_Movies= pd.DataFrame(Merge4.head(10))
Top_10_Movies

Unnamed: 0,Unseen_movieId,title,genres,imdbId,tmdbId,rating,tag
0,3567,Bossa Nova (2000),Comedy|Drama|Romance,180837,19600.0,5.0,
1,3379,On the Beach (1959),Drama,53137,35412.0,4.5,
2,5490,The Big Bus (1976),Action|Comedy,74205,19133.0,5.0,
3,8477,"Jetée, La (1962)",Romance|Sci-Fi,56119,662.0,4.5,[post-apocalyptic]
4,6818,Come and See (Idi i smotri) (1985),Drama|War,91251,25237.0,5.0,"[atmospheric, bleak, disturbing, gritty, harsh]"
5,3086,Babes in Toyland (1934),Children|Comedy|Fantasy|Musical,24852,25898.0,5.0,
6,7926,High and Low (Tengoku to jigoku) (1963),Crime|Drama|Film-Noir|Thriller,57565,12493.0,4.0,[In Netflix queue]
7,5416,Cherish (2002),Comedy|Drama|Thriller,298798,18408.0,5.0,
8,5328,Rain (2001),Drama|Romance,287645,69324.0,5.0,
9,3951,Two Family House (2000),Drama,202641,63956.0,5.0,[In Netflix queue]
