In [1]:
from src import data_loader, features, model
import yaml

import pandas as pd
import numpy as np

from surprise.model_selection import train_test_split
from surprise import SVD

In [2]:
# Loading dataset
config = yaml.safe_load(open("config/config.yaml"))

df_movies = data_loader.load_data(config['data']['movies'], ['movieId','title','genres'])
df_ratings = data_loader.load_data(config['data']['ratings'], ['userId','movieId','rating','timestamp'])

In [3]:
df_movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [4]:
df_ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


# Pre-processing

In [5]:
# Create a User-Movie-Ratings matrix
Ratings = df_ratings.pivot(
    index = 'userId',
    columns = 'movieId',
    values = 'rating'
).fillna(0)

Ratings.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
# Converting df_ratings to Surprise's format
ratings = features.format_surpsise(df_ratings, ['userId', 'movieId', 'rating'])

# Cross Validations

In [7]:
# 3 Fold
cv_3 = model.cv(ratings, ['RMSE', 'MAE'], 3)

Evaluating RMSE, MAE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.8734  0.8847  0.8809  0.8797  0.0047  
MAE (testset)     0.6744  0.6797  0.6765  0.6769  0.0022  
Fit time          0.48    0.43    0.35    0.42    0.05    
Test time         0.11    0.11    0.10    0.11    0.01    


In [8]:
# 5 Fold
cv_5 = model.cv(ratings, ['RMSE', 'MAE'], 5)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8755  0.8726  0.8752  0.8720  0.8705  0.8731  0.0019  
MAE (testset)     0.6722  0.6700  0.6739  0.6676  0.6662  0.6700  0.0028  
Fit time          0.44    0.42    0.45    0.42    0.42    0.43    0.01    
Test time         0.04    0.04    0.07    0.07    0.04    0.05    0.02    


# User's Liked Movies

In [9]:
features.preferences(df_movies, df_ratings, num_prefs=5, userID=1)

Unnamed: 0_level_0,title
movieId,Unnamed: 1_level_1
1,Jumanji (1995)
3,Waiting to Exhale (1995)
6,Sabrina (1995)
110,Jupiter's Wife (1994)
235,Mary Shelley's Frankenstein (Frankenstein) (1994)


# Recomendations for User

In [10]:
model.recomendations(df_movies, ratings, num_recs=10, userID=1)

Unnamed: 0,title,genres,Estimate_Score
0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,4.74464
1,Jumanji (1995),Adventure|Children|Fantasy,4.208574
2,Grumpier Old Men (1995),Comedy|Romance,4.035252
3,Waiting to Exhale (1995),Comedy|Drama|Romance,3.697722
4,Father of the Bride Part II (1995),Comedy,3.814337
5,Heat (1995),Action|Crime|Thriller,4.663155
6,Sabrina (1995),Comedy|Romance,3.661443
7,Tom and Huck (1995),Adventure|Children,3.800016
8,Sudden Death (1995),Action,3.910447
9,GoldenEye (1995),Action|Adventure|Thriller,4.206205
