In [1]:
from surprise import SVD
from surprise import Dataset
from surprise.model_selection import cross_validate

In [3]:
# Load the movielens-100k dataset (download it if needed).
data = Dataset.load_builtin('ml-100k')


In [4]:
# Use the famous SVD algorithm.
algo = SVD()

In [5]:
# Run 5-fold cross-validation and print results.
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9409  0.9360  0.9382  0.9313  0.9348  0.9363  0.0032  
MAE (testset)     0.7450  0.7387  0.7390  0.7294  0.7384  0.7381  0.0050  
Fit time          3.84    4.08    3.75    3.73    3.73    3.83    0.13    
Test time         0.16    0.12    0.13    0.10    0.12    0.12    0.02    


{'test_rmse': array([0.9409364 , 0.93601127, 0.93817767, 0.93129908, 0.93484235]),
 'test_mae': array([0.74500603, 0.73865044, 0.73897806, 0.72943888, 0.73837882]),
 'fit_time': (3.840437412261963,
  4.081766128540039,
  3.7547361850738525,
  3.7348825931549072,
  3.7328202724456787),
 'test_time': (0.15945887565612793,
  0.11605668067932129,
  0.12658381462097168,
  0.09699463844299316,
  0.12009644508361816)}

In [7]:
from surprise.model_selection import GridSearchCV

In [9]:
param_grid = {'n_epochs': [5, 10], 'lr_all': [0.002, 0.005], 'reg_all': [0.4, 0.6]}
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3)
gs.fit(data)

In [10]:
# best RMSE score
print(gs.best_score['rmse'])

0.9637801329566301


In [11]:
# combination of parameters that gave the best RMSE score
print(gs.best_params['rmse'])

{'n_epochs': 10, 'lr_all': 0.005, 'reg_all': 0.4}


In [12]:
# We can now use the algorithm that yields the best rmse:
algo = gs.best_estimator['rmse']
algo.fit(data.build_full_trainset())

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x25d3c3fae20>

In [14]:
import pandas as pd
results_df = pd.DataFrame.from_dict(gs.cv_results)

In [15]:
results_df

Unnamed: 0,split0_test_rmse,split1_test_rmse,split2_test_rmse,mean_test_rmse,std_test_rmse,rank_test_rmse,split0_test_mae,split1_test_mae,split2_test_mae,mean_test_mae,std_test_mae,rank_test_mae,mean_fit_time,std_fit_time,mean_test_time,std_test_time,params,param_n_epochs,param_lr_all,param_reg_all
0,0.994065,1.000484,0.997585,0.997378,0.002625,7,0.805854,0.805543,0.806718,0.806038,0.000497,7,0.841784,0.025502,0.230188,0.029757,"{'n_epochs': 5, 'lr_all': 0.002, 'reg_all': 0.4}",5,0.002,0.4
1,0.999936,1.006253,1.003292,1.00316,0.002581,8,0.814226,0.814122,0.81517,0.814506,0.000471,8,0.822612,0.015063,0.225535,0.037408,"{'n_epochs': 5, 'lr_all': 0.002, 'reg_all': 0.6}",5,0.002,0.6
2,0.970355,0.976613,0.973925,0.973631,0.002564,3,0.781266,0.781028,0.783076,0.78179,0.000914,2,0.789614,0.019094,0.24109,0.017107,"{'n_epochs': 5, 'lr_all': 0.005, 'reg_all': 0.4}",5,0.005,0.4
3,0.979073,0.985484,0.982546,0.982368,0.00262,5,0.792069,0.792325,0.793802,0.792732,0.000764,5,0.775163,0.045878,0.221267,0.020754,"{'n_epochs': 5, 'lr_all': 0.005, 'reg_all': 0.6}",5,0.005,0.6
4,0.974592,0.980925,0.97824,0.977919,0.002595,4,0.785609,0.785213,0.787279,0.786034,0.000896,4,1.58767,0.060764,0.211776,0.033201,"{'n_epochs': 10, 'lr_all': 0.002, 'reg_all': 0.4}",10,0.002,0.4
5,0.982813,0.989182,0.986414,0.986136,0.002607,6,0.79596,0.795844,0.797514,0.796439,0.000761,6,1.720859,0.190536,0.244464,0.070193,"{'n_epochs': 10, 'lr_all': 0.002, 'reg_all': 0.6}",10,0.002,0.6
6,0.960716,0.966758,0.963866,0.96378,0.002467,1,0.771927,0.772113,0.773774,0.772605,0.00083,1,1.977869,0.210116,0.239593,0.041171,"{'n_epochs': 10, 'lr_all': 0.005, 'reg_all': 0.4}",10,0.005,0.4
7,0.970472,0.976461,0.973832,0.973588,0.002451,2,0.783514,0.783849,0.785454,0.784272,0.000847,3,1.632751,0.037175,0.226875,0.024939,"{'n_epochs': 10, 'lr_all': 0.005, 'reg_all': 0.6}",10,0.005,0.6
