# Module 9 - Unsupervised Learning - Case Study 1 - Recommendation Engine Using SVD in Python

In [1]:
import numpy as np
import pandas as pd
from surprise import Reader, Dataset, SVD
from surprise.model_selection import cross_validate

In [2]:
ratings = pd.read_csv('ratings.csv')
movies = pd.read_csv('movies.csv')

In [3]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,1112486027
1,1,29,3.5,1112484676
2,1,32,3.5,1112484819
3,1,47,3.5,1112484727
4,1,50,3.5,1112484580


In [4]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [5]:
ratings.shape, movies.shape

((1048575, 4), (27278, 3))

In [6]:
n_users = ratings.userId.unique().shape[0]
n_users

7120

In [7]:
Ratings = ratings.pivot(index = 'userId', columns ='movieId', values = 'rating').fillna(0)
Ratings.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,129350,129354,129428,129707,130052,130073,130219,130462,130490,130642
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,3.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
reader = Reader()
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
svd = SVD()
cross_validate(svd, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.8456  0.8446  0.8464  0.8455  0.0007  
MAE (testset)     0.6477  0.6475  0.6482  0.6478  0.0003  
Fit time          52.97   52.85   51.85   52.56   0.50    
Test time         3.90    3.86    3.94    3.90    0.03    


{'test_rmse': array([0.84561133, 0.84462853, 0.84639537]),
 'test_mae': array([0.64766898, 0.64745926, 0.64821991]),
 'fit_time': (52.969940423965454, 52.84589171409607, 51.85083603858948),
 'test_time': (3.899085760116577, 3.8588924407958984, 3.936843156814575)}

In [9]:
ratings_1 = ratings[(ratings['userId'] == 5) & (ratings['rating'] == 5)]
ratings_1 = ratings_1.set_index('movieId')
ratings_1 = ratings_1.join(movies)['title']
print('User has give 5* Rating to the following', ratings_1.shape[0], 'movies\n')
print(ratings_1)

User has give 5* Rating to the following 38 movies

movieId
11                     Dracula: Dead and Loving It (1995)
62      Don't Be a Menace to South Central While Drink...
141                                          Gospa (1995)
150                                 Addiction, The (1995)
260                              Ladybird Ladybird (1994)
318     Strawberry and Chocolate (Fresa y chocolate) (...
364                                       Maverick (1994)
368                                  Reality Bites (1994)
377                       When a Man Loves a Woman (1994)
380                                    Bad Company (1995)
440                    Even Cowgirls Get the Blues (1993)
454                   Geronimo: An American Legend (1993)
457                                        Go Fish (1994)
500                                      No Escape (1994)
508                            Puppet Masters, The (1994)
531                                     Short Cuts (1993)
588         

In [10]:
user_5 = movies.copy()
user_5 = user_5.reset_index()
user_5.head()

Unnamed: 0,index,movieId,title,genres
0,0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,1,2,Jumanji (1995),Adventure|Children|Fantasy
2,2,3,Grumpier Old Men (1995),Comedy|Romance
3,3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,4,5,Father of the Bride Part II (1995),Comedy


In [11]:
train = data.build_full_trainset()
svd.fit(train)

user_5['Estimate_Score'] = user_5['movieId'].apply(lambda x: svd.predict(1, x).est)
user_5 = user_5.drop(['movieId','genres','index'], axis = 1)
user_5 = user_5.sort_values('Estimate_Score', ascending=False)

In [12]:
print(user_5.head(10))

                                                   title  Estimate_Score
2849                                Lady Eve, The (1941)        4.625185
7041   Lord of the Rings: The Return of the King, The...        4.583071
4897   Lord of the Rings: The Fellowship of the Ring,...        4.562879
10923              Devil and Daniel Johnston, The (2005)        4.554848
5853       Lord of the Rings: The Two Towers, The (2002)        4.552802
7356                             Band of Brothers (2001)        4.507403
9497                Sea Inside, The (Mar adentro) (2004)        4.500031
2646                 Jules and Jim (Jules et Jim) (1961)        4.492981
8937                     Decalogue, The (Dekalog) (1989)        4.481553
6873   Passion of Joan of Arc, The (Passion de Jeanne...        4.479140
