In [57]:
from surprise import SVD
from surprise import Dataset, Reader
from surprise.model_selection import cross_validate

def run_surprise(filename, bias):
    # Load the movielens-100k dataset (download it if needed).
    data = Dataset.load_builtin('ml-100k')
    # Use to load our own data
    reader = Reader(line_format='user item rating', sep='\t')
    data = Dataset.load_from_file(filename, reader=reader)
    # Use the famous SVD algorithm.
    algo = SVD(biased=bias, n_factors=20)

    # Run 5-fold cross-validation and print results.
    cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

    U_surprise = algo.pu
    V_surprise = algo.qi
    
    return U_surprise, V_surprise

In [58]:
# biased training error
U1, V1 = run_surprise('train.txt', 'true')

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9430  0.9350  0.9377  0.9421  0.9358  0.9387  0.0032  
MAE (testset)     0.7455  0.7373  0.7386  0.7438  0.7396  0.7410  0.0032  
Fit time          1.94    1.93    2.03    2.00    2.11    2.00    0.06    
Test time         0.17    0.15    0.13    0.16    0.22    0.17    0.03    


In [59]:
# biased testing error
U2, V2 = run_surprise('test.txt', 'true')

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.0089  0.9986  1.0033  1.0258  0.9972  1.0067  0.0104  
MAE (testset)     0.8046  0.8008  0.8100  0.8224  0.7940  0.8064  0.0096  
Fit time          0.26    0.28    0.24    0.24    0.21    0.25    0.02    
Test time         0.02    0.02    0.02    0.02    0.02    0.02    0.00    


In [60]:
# no bias training error
U3, V3 = run_surprise('train.txt', 'false')

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9272  0.9528  0.9333  0.9352  0.9406  0.9378  0.0086  
MAE (testset)     0.7294  0.7522  0.7377  0.7379  0.7440  0.7402  0.0076  
Fit time          2.28    2.46    2.46    2.51    2.36    2.41    0.08    
Test time         0.21    0.17    0.23    0.20    0.13    0.19    0.03    


In [61]:
# no bias test error
U3, V3 = run_surprise('test.txt', 'false')

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.0007  1.0165  0.9895  0.9987  1.0070  1.0025  0.0090  
MAE (testset)     0.8046  0.8134  0.7918  0.8013  0.8052  0.8033  0.0070  
Fit time          0.20    0.20    0.20    0.20    0.20    0.20    0.00    
Test time         0.02    0.01    0.02    0.01    0.01    0.02    0.00    
