In [32]:
import pandas as pd
from surprise import SVD, KNNWithZScore, CoClustering, NMF, Dataset
from surprise.reader import Reader
from surprise.model_selection import cross_validate, GridSearchCV

In [3]:
ratings_red = pd.read_json('ratings_vreduced.json')

In [6]:
ratings_red.head()

Unnamed: 0,gameid,score,user
0,1,10,aSoso
1,1,7,Annagul
10,1,8,Rygel
1000,1,9,gutrie_es
10000,10,6,byturn


In [12]:
# Convert user names and game IDs to matrix indices

userdict = {}
for i, user in enumerate(ratings_red.user.unique()):
    userdict[user] = i
    
gamedict = {}
for i, game in enumerate(ratings_red.gameid.unique()):
    gamedict[game] = i
    
ratings_red['user'] = ratings_red['user'].map(userdict.get)
ratings_red['gameid'] = ratings_red['gameid'].map(gamedict.get)
ratings_red = ratings_red[['user', 'gameid', 'score']]
ratings_red.head()

Unnamed: 0,user,gameid,score
0,0,0,10
1,1,0,7
10,2,0,8
1000,3,0,9
10000,4,1,6


In [13]:
reader = Reader(rating_scale = (1,10))
data = Dataset.load_from_df(ratings_red, reader)

In [23]:
algo = SVD()
_ = cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
MAE (testset)     0.8635  0.8631  0.8626  0.8617  0.8617  0.8625  0.0007  
RMSE (testset)    1.1718  1.1738  1.1715  1.1713  1.1705  1.1718  0.0011  
Fit time          64.24   63.30   64.17   64.31   63.16   63.84   0.50    
Test time         4.02    3.97    3.90    3.88    3.88    3.93    0.06    


In [26]:
algo2 = KNNWithZScore()
_ = cross_validate(algo2, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNWithZScore on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
MAE (testset)     0.8888  0.8879  0.8867  0.8882  0.8886  0.8880  0.0007  
RMSE (testset)    1.2093  1.2078  1.2047  1.2077  1.2098  1.2078  0.0018  
Fit time          154.59  151.82  154.28  155.49  152.99  153.83  1.29    
Test time         319.70  321.83  324.62  314.65  315.22  319.20  3.82    


In [29]:
algo3 = CoClustering()
_ = cross_validate(algo3, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm CoClustering on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
MAE (testset)     0.9016  0.9050  0.9019  0.9022  0.9021  0.9026  0.0012  
RMSE (testset)    1.2101  1.2127  1.2094  1.2091  1.2094  1.2101  0.0013  
Fit time          36.84   37.51   37.84   37.66   37.96   37.56   0.39    
Test time         2.75    4.41    4.41    4.60    4.47    4.13    0.69    


In [31]:
algo4 = NMF()
_ = cross_validate(algo4, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm NMF on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
MAE (testset)     1.4608  1.4560  1.4538  1.4443  1.4436  1.4517  0.0067  
RMSE (testset)    1.7208  1.7179  1.7157  1.7065  1.7055  1.7133  0.0062  
Fit time          68.38   68.46   68.52   68.19   68.36   68.38   0.11    
Test time         5.19    5.48    4.57    5.03    5.10    5.08    0.30    


In [36]:
param_grid = {'n_factors': [20,40,60,80]}
gs = GridSearchCV(SVD, param_grid, measures=['RMSE', 'MAE'], cv=3)
gs.fit(data)

In [37]:
print(gs.best_score['rmse'])
print(gs.best_params['rmse'])

1.1573581656361576
{'n_factors': 20}


In [44]:
param_grid = {'n_factors': [8,10,12,14]}
gs = GridSearchCV(SVD, param_grid, measures=['RMSE', 'MAE'], cv=3)
gs.fit(data)

In [45]:
print(gs.best_score['rmse'])
print(gs.best_params['rmse'])

1.1535392511931557
{'n_factors': 12}
