# Recommender System Case Study

In [17]:
# Import libraries
import numpy as np
import pandas as pd

In [18]:
# Reading ratings file

ratings = pd.read_csv('ratings.csv', usecols=['userId','movieId','rating','timestamp'])
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,1112486027
1,1,29,3.5,1112484676
2,1,32,3.5,1112484819
3,1,47,3.5,1112484727
4,1,50,3.5,1112484580


In [19]:
# Reading movies file

movies = pd.read_csv('movies.csv', usecols=['movieId','title','genres'])
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [20]:
# Find the unique number of users and movies in the ‘ratings’ dataset
n_users = ratings.userId.unique().shape[0]

n_movies = ratings.movieId.unique().shape[0]

print(f'Number of users = {n_users} and Number of movies = {n_movies}')

Number of users = 7120 and Number of movies = 14026


In [21]:
# Create a rating matrix for the ‘ratings’ dataset and store it in ‘Ratings’
Ratings = ratings.pivot(index = 'userId', columns ='movieId', values = 'rating').fillna(0)
Ratings.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,129350,129354,129428,129707,130052,130073,130219,130462,130490,130642
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,3.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [22]:
#conda install -c conda-forge scikit-surprise
!pip install surprise


Collecting surprise
  Using cached surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Installing collected packages: surprise
Successfully installed surprise-0.1


In [23]:
# Import libraries from Surprise package
from surprise import Reader, Dataset, SVD
from surprise.model_selection import cross_validate

# Load Reader library
reader = Reader()

# Load ratings dataset with Dataset library
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

# Use the SVD algorithm.
svd = SVD()

# Compute the RMSE of the SVD algorithm.
cross_validate(svd, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.8473  0.8438  0.8447  0.8452  0.0015  
MAE (testset)     0.6487  0.6460  0.6469  0.6472  0.0011  
Fit time          45.73   45.09   45.86   45.56   0.34    
Test time         3.67    3.44    3.55    3.55    0.09    


{'test_rmse': array([0.84725926, 0.84381113, 0.84466046]),
 'test_mae': array([0.64874301, 0.6460483 , 0.64688163]),
 'fit_time': (45.732199907302856, 45.08893704414368, 45.85891127586365),
 'test_time': (3.666422128677368, 3.4400546550750732, 3.5512640476226807)}

In [24]:
# Print the head of ratings dataset
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,1112486027
1,1,29,3.5,1112484676
2,1,32,3.5,1112484819
3,1,47,3.5,1112484727
4,1,50,3.5,1112484580


# To find all the movies rated as more than 4 stars by user with userId = 1

In [25]:
ratings_1 = ratings[(ratings['userId'] == 5) & (ratings['rating'] == 5)]
ratings_1 = ratings_1.set_index('movieId')
ratings_1 = ratings_1.join(movies)['title']
ratings_1.head(10)

movieId
11                    Dracula: Dead and Loving It (1995)
62     Don't Be a Menace to South Central While Drink...
141                                         Gospa (1995)
150                                Addiction, The (1995)
260                             Ladybird Ladybird (1994)
318    Strawberry and Chocolate (Fresa y chocolate) (...
364                                      Maverick (1994)
368                                 Reality Bites (1994)
377                      When a Man Loves a Woman (1994)
380                                   Bad Company (1995)
Name: title, dtype: object

# Train an SVD to predict ratings for user with userId = 1

In [27]:
# Create a shallow copy for the movies dataset
user_5 = movies.copy()

#Reset the index for user_5 dataset
user_5 = user_5.reset_index()

# getting full dataset
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

#create a training set for svd
trainset = data.build_full_trainset()
svd.fit(trainset)

#Predict the ratings for user1
user_5['Estimate_Score'] = user_5['movieId'].apply(lambda x: svd.predict(1, x).est)

#Drop extra columns from the user1 data frame
user_5 = user_5.drop(['movieId','genres','index'], axis = 1)

# Sort predicted ratings for user1 in descending order
user_5 = user_5.sort_values('Estimate_Score', ascending=False)

#Print top 10 recommendations
print(user_5.head(10))

                                                   title  Estimate_Score
5853       Lord of the Rings: The Two Towers, The (2002)        4.679040
3003                                      Matewan (1987)        4.673445
7041   Lord of the Rings: The Return of the King, The...        4.647395
4897   Lord of the Rings: The Fellowship of the Ring,...        4.624148
2849                                Lady Eve, The (1941)        4.608477
2851                        Palm Beach Story, The (1942)        4.572075
10923              Devil and Daniel Johnston, The (2005)        4.542861
8937                     Decalogue, The (Dekalog) (1989)        4.511222
18990                                Black Mirror (2011)        4.505202
7356                             Band of Brothers (2001)        4.491050
