In [None]:
from surprise import Dataset
from surprise import Reader
reader = Reader(rating_scale=(0, 5))
raw = Dataset.load_builtin("ml-100k")

## Co-Clustering
Using GridSearchCV to find a good combination of parameters.

In [3]:
from surprise import CoClustering
from surprise.model_selection import GridSearchCV
ccparams = {"n_cltr_u": list(range(1, 6)), "n_cltr_i": list(range(1, 6))}
ccgrid = GridSearchCV(CoClustering, ccparams, cv=5)
ccgrid.fit(raw)

In [4]:
ccgrid.best_params

{'rmse': {'n_cltr_u': 5, 'n_cltr_i': 2}, 'mae': {'n_cltr_u': 5, 'n_cltr_i': 2}}

In [5]:
ccgrid.best_estimator

{'rmse': <surprise.prediction_algorithms.co_clustering.CoClustering at 0x72ab41940dd0>,
 'mae': <surprise.prediction_algorithms.co_clustering.CoClustering at 0x72ab75891610>}

In [6]:
rmse = ccgrid.best_estimator['rmse']
mae = ccgrid.best_estimator['mae']
from surprise.model_selection import cross_validate
cross_validate(rmse, raw, measures=["RMSE", "MAE"], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm CoClustering on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9489  0.9488  0.9602  0.9635  0.9507  0.9544  0.0062  
MAE (testset)     0.7445  0.7382  0.7530  0.7540  0.7432  0.7466  0.0060  
Fit time          2.18    2.22    2.40    2.09    2.25    2.23    0.10    
Test time         0.16    0.18    0.31    0.19    0.16    0.20    0.06    


{'test_rmse': array([0.94892456, 0.94882581, 0.96015476, 0.96350833, 0.95071085]),
 'test_mae': array([0.74447682, 0.73819899, 0.75296697, 0.75395385, 0.743239  ]),
 'fit_time': (2.1805832386016846,
  2.216141939163208,
  2.4029719829559326,
  2.0935311317443848,
  2.2541615962982178),
 'test_time': (0.15508341789245605,
  0.17817902565002441,
  0.31122422218322754,
  0.1940610408782959,
  0.15544629096984863)}

In [7]:
cross_validate(mae, raw, measures=["RMSE", "MAE"], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm CoClustering on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9505  0.9458  0.9555  0.9778  0.9560  0.9571  0.0110  
MAE (testset)     0.7439  0.7408  0.7473  0.7664  0.7459  0.7489  0.0090  
Fit time          2.24    2.19    2.13    2.17    2.31    2.21    0.06    
Test time         0.17    0.16    0.27    0.16    0.18    0.19    0.04    


{'test_rmse': array([0.95048057, 0.94584899, 0.95550953, 0.97783352, 0.95602211]),
 'test_mae': array([0.74387804, 0.7408266 , 0.74734955, 0.76642007, 0.7458593 ]),
 'fit_time': (2.235046625137329,
  2.194749355316162,
  2.132270097732544,
  2.1661503314971924,
  2.305774211883545),
 'test_time': (0.17203807830810547,
  0.15787315368652344,
  0.27431535720825195,
  0.1592111587524414,
  0.17817354202270508)}

## KNN with Means

In [12]:
from surprise import KNNWithMeans
knn = KNNWithMeans()

In [13]:
cross_validate(knn, raw, measures=["RMSE", "MAE"], cv=5, verbose=True)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9522  0.9523  0.9417  0.9572  0.9504  0.9508  0.0051  
MAE (testset)     0.7499  0.7491  0.7437  0.7534  0.7503  0.7493  0.0032  
Fit time          0.61    0.48    0.46    0.56    0.52    0.53    0.05    
Test time         4.08    4.32    4.61    3.97    4.27    4.25    0.22    


{'test_rmse': array([0.95221755, 0.95230211, 0.94169482, 0.9571584 , 0.95039039]),
 'test_mae': array([0.7499    , 0.74907718, 0.74367276, 0.75343872, 0.75028468]),
 'fit_time': (0.6090817451477051,
  0.48448610305786133,
  0.45854878425598145,
  0.5571496486663818,
  0.5180301666259766),
 'test_time': (4.08277440071106,
  4.316580295562744,
  4.605699300765991,
  3.9686379432678223,
  4.270847320556641)}

## SVD
Using GridSearchCV to find a good combination of parameters. 

In [14]:
import numpy as np
from surprise import SVD
svdparams = {"n_factors": np.arange(1, 100, 20), "n_epochs": np.arange(10, 40, 10), "biased": ["True", "False"], "lr_all": np.linspace(0.001, 0.05, 5)}
svdgrid = GridSearchCV(SVD, svdparams, cv=5)
svdgrid.fit(raw)

In [15]:
svdgrid.best_params

{'rmse': {'n_factors': 1,
  'n_epochs': 30,
  'biased': 'False',
  'lr_all': 0.013250000000000001},
 'mae': {'n_factors': 1,
  'n_epochs': 30,
  'biased': 'False',
  'lr_all': 0.013250000000000001}}

In [17]:
svdgrid.best_estimator

{'rmse': <surprise.prediction_algorithms.matrix_factorization.SVD at 0x72ab717a26d0>,
 'mae': <surprise.prediction_algorithms.matrix_factorization.SVD at 0x72ab1b2a3750>}

In [18]:
s_rmse = svdgrid.best_estimator['rmse']
s_mae = svdgrid.best_estimator['mae']
cross_validate(s_rmse, raw, measures=["RMSE", "MAE"], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9430  0.9299  0.9263  0.9244  0.9126  0.9272  0.0098  
MAE (testset)     0.7423  0.7297  0.7273  0.7276  0.7166  0.7287  0.0082  
Fit time          0.86    0.89    0.87    0.97    0.85    0.89    0.04    
Test time         0.20    0.39    0.21    0.20    0.20    0.24    0.07    


{'test_rmse': array([0.9429702 , 0.9298615 , 0.92625523, 0.92437017, 0.91263989]),
 'test_mae': array([0.74227696, 0.72970723, 0.72729536, 0.72762174, 0.71656072]),
 'fit_time': (0.8615038394927979,
  0.8924663066864014,
  0.8694305419921875,
  0.9712057113647461,
  0.8495841026306152),
 'test_time': (0.20369315147399902,
  0.3903484344482422,
  0.20852971076965332,
  0.20216751098632812,
  0.19833755493164062)}

In [19]:
cross_validate(s_mae, raw, measures=["RMSE", "MAE"], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9252  0.9254  0.9158  0.9261  0.9200  0.9225  0.0040  
MAE (testset)     0.7314  0.7280  0.7187  0.7277  0.7238  0.7259  0.0043  
Fit time          1.04    0.94    0.91    0.86    0.88    0.93    0.07    
Test time         0.23    0.21    0.20    0.39    0.20    0.25    0.07    


{'test_rmse': array([0.92518352, 0.92543931, 0.91580343, 0.92614976, 0.91999475]),
 'test_mae': array([0.73138317, 0.72799225, 0.71873941, 0.72770596, 0.72376721]),
 'fit_time': (1.044694185256958,
  0.9365847110748291,
  0.9054262638092041,
  0.8577315807342529,
  0.8810992240905762),
 'test_time': (0.23189496994018555,
  0.2088789939880371,
  0.20484590530395508,
  0.38657569885253906,
  0.1961688995361328)}