In [1]:
from surprise import Dataset
from surprise import SVD
from surprise.model_selection import cross_validate

# Load the movielens-100k dataset
data = Dataset.load_builtin('ml-100k')

# Use the famous SVD algorithm
algo = SVD()

# Perform cross-validation
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)


Dataset ml-100k could not be found. Do you want to download it? [Y/n] 

 Y


Trying to download dataset from https://files.grouplens.org/datasets/movielens/ml-100k.zip...
Done! Dataset ml-100k has been saved to C:\Users\Dmytro/.surprise_data/ml-100k
Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9354  0.9412  0.9355  0.9295  0.9442  0.9371  0.0051  
MAE (testset)     0.7390  0.7415  0.7391  0.7294  0.7420  0.7382  0.0046  
Fit time          0.77    0.77    0.77    0.77    0.78    0.77    0.00    
Test time         0.07    0.07    0.10    0.10    0.10    0.09    0.01    


{'test_rmse': array([0.93535652, 0.94124845, 0.93545333, 0.92950622, 0.94416133]),
 'test_mae': array([0.73904957, 0.74147836, 0.73909248, 0.72943359, 0.74203729]),
 'fit_time': (0.7664635181427002,
  0.7739138603210449,
  0.7721354961395264,
  0.7743098735809326,
  0.7806704044342041),
 'test_time': (0.07164669036865234,
  0.06847667694091797,
  0.09867310523986816,
  0.0993344783782959,
  0.10248613357543945)}

In [2]:
from surprise.model_selection import GridSearchCV

# Define a parameter grid to search over
param_grid = {
    'n_epochs': [5, 10, 20], # Number of epochs
    'lr_all': [0.002, 0.005, 0.01], # Learning rate
    'reg_all': [0.02, 0.1, 0.5] # Regularization term
}

# Set up GridSearchCV with SVD algorithm, using RMSE as the evaluation metric
gs = GridSearchCV(SVD, param_grid, measures=['RMSE'], cv=3)

# Perform the grid search on the data
gs.fit(data)

# Best RMSE score
print(f"Best RMSE score: {gs.best_score['rmse']}")

# Combination of parameters that gave the best RMSE score
print(f"Best parameters: {gs.best_params['rmse']}")


Best RMSE score: 0.9279010537341783
Best parameters: {'n_epochs': 20, 'lr_all': 0.01, 'reg_all': 0.1}


In [3]:
from surprise import SVDpp, NMF
from surprise.model_selection import cross_validate

# SVD++
print('Evaluating SVD++...')
algo_svdpp = SVDpp(n_epochs=20, lr_all=0.01, reg_all=0.1)
cross_validate(algo_svdpp, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

# NMF
print('Evaluating NMF...')
algo_nmf = NMF(n_epochs=20, reg_pu=0.1, reg_qi=0.1)  # Note: reg_pu & reg_qi are regularization terms for NMF
cross_validate(algo_nmf, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)


Evaluating SVD++...
Evaluating RMSE, MAE of algorithm SVDpp on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9279  0.9304  0.9279  0.9287  0.0012  
MAE (testset)     0.7356  0.7354  0.7344  0.7351  0.0005  
Fit time          8.13    8.12    8.10    8.12    0.01    
Test time         3.36    3.36    3.57    3.43    0.10    
Evaluating NMF...
Evaluating RMSE, MAE of algorithm NMF on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9714  0.9635  0.9607  0.9652  0.0045  
MAE (testset)     0.7655  0.7574  0.7579  0.7603  0.0037  
Fit time          0.47    0.45    0.43    0.45    0.02    
Test time         0.14    0.15    0.15    0.15    0.01    


{'test_rmse': array([0.97143893, 0.96351638, 0.96073886]),
 'test_mae': array([0.76550062, 0.75739384, 0.75789007]),
 'fit_time': (0.4718654155731201, 0.4509406089782715, 0.43446922302246094),
 'test_time': (0.13905739784240723, 0.1515824794769287, 0.14684224128723145)}

In [5]:
# SVD++ had a slightly lower RMSE than SVD, indicating better accuracy.
# NMF had a higher RMSE and MAE than both SVD variants, suggesting it may not predict as accurately in this context.
# NMF was faster in terms of fit time compared to SVD++, but given its lower accuracy, the speed advantage may not be beneficial.
# SVD was likely the fastest in terms of fit time and had decent accuracy, making it a good candidate for scenarios where speed is essential.


# SVD++ might be the best choice despite its longer training time.
# However, if you need a balance between speed and accuracy, the tuned SVD model might be the way to go

SyntaxError: invalid syntax (3210887690.py, line 7)