In [60]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise.model_selection import cross_validate
from surprise import Dataset, Reader, SVD, SVDpp, NMF

In [61]:
table = pd.read_csv("ratings.csv")
table_new = table.drop(table.columns[-1], axis=1)

In [62]:
reader = Reader(rating_scale=(1, 5))

In [63]:
data = Dataset.load_from_df(pd.DataFrame(table_new, columns=['userId', 'movieId', 'rating']), reader)


In [64]:
trainset, testset = train_test_split(data, test_size=0.25)

In [65]:
model = SVD()

In [66]:
model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x20c5bb03f90>

In [67]:
predictions = model.test(testset)

In [68]:
cross_validate(model, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8696  0.8725  0.8684  0.8718  0.8833  0.8731  0.0053  
MAE (testset)     0.6678  0.6704  0.6663  0.6721  0.6775  0.6708  0.0039  
Fit time          0.99    0.89    0.92    0.90    0.93    0.93    0.04    
Test time         0.19    0.07    0.06    0.07    0.06    0.09    0.05    


{'test_rmse': array([0.86957667, 0.87246172, 0.86844883, 0.87182481, 0.88331809]),
 'test_mae': array([0.66778319, 0.67039109, 0.66626123, 0.67206862, 0.67754106]),
 'fit_time': (0.9933233261108398,
  0.8896629810333252,
  0.9154019355773926,
  0.9024519920349121,
  0.9323122501373291),
 'test_time': (0.1864323616027832,
  0.07301783561706543,
  0.06139183044433594,
  0.06954264640808105,
  0.062255144119262695)}

In [69]:
models = {
    'SVD': SVD(),
    'SVD++': SVDpp(),
    'NMF': NMF()
}

In [70]:
results = {}

In [72]:
for name, model in models.items():
    cv_results = cross_validate(model, data, measures=['RMSE', 'MAE'], cv=5, verbose=False)
    results[name] = {
        'RMSE': cv_results['test_rmse'].mean(),
        'MAE': cv_results['test_mae'].mean() }

In [73]:
for model_name, metrics in results.items():
    print(f"{model_name}: RMSE = {metrics['RMSE']:.4f}, MAE = {metrics['MAE']:.4f}")

SVD: RMSE = 0.8739, MAE = 0.6716
SVD++: RMSE = 0.8604, MAE = 0.6602
NMF: RMSE = 0.9208, MAE = 0.7062
