In [1]:
from surprise import SVD
from surprise import SVDpp
from surprise import Dataset
from surprise.model_selection import cross_validate
from surprise.model_selection import train_test_split
from surprise import accuracy
from surprise import KNNBasic
from surprise.prediction_algorithms.slope_one import SlopeOne

import pandas as pd

In [2]:
movie_data = Dataset.load_builtin('ml-1m')

In [3]:
jester_data = Dataset.load_builtin('jester')

In [None]:
df = pd.DataFrame(movie_data.raw_ratings)

In [None]:
print(df.head())

In [None]:
print("Users=" ,len(df[0].unique()))
print("Items=" ,len(df[1].unique()))
print("Ratings=", len(df))

In [4]:
movies_trainset, movies_testset = train_test_split(movie_data, test_size=.25)
jester_trainset, jester_testset = train_test_split(jester_data, test_size=.25)

In [26]:
def run_algo(algo, train_set, test_set):
    algo.fit(train_set)
    predictions = algo.test(test_set)
    accuracy.rmse(predictions);
    accuracy.mae(predictions);

In [17]:
%%time
run_algo(KNNBasic(), movies_trainset, movies_testset)

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9276
MAE:  0.7318


In [6]:
%%time
run_algo(SlopeOne(), movies_trainset, movies_testset)

RMSE: 0.9085
MAE:  0.7161


In [55]:
%%time
run_algo(SVD(n_epochs=20, n_factors=100, reg_all=0.02), movies_trainset, movies_testset)
run_algo(SVD(n_epochs=20, n_factors=100, reg_all=0.01), movies_trainset, movies_testset)
run_algo(SVD(n_epochs=20, n_factors=100, reg_all=0.015), movies_trainset, movies_testset)
run_algo(SVD(n_epochs=20, n_factors=100, reg_pu=0.01), movies_trainset, movies_testset)
run_algo(SVD(n_epochs=20, n_factors=100, reg_pu=0.015), movies_trainset, movies_testset)
run_algo(SVD(n_epochs=20, n_factors=100, reg_pu=0.025), movies_trainset, movies_testset)
run_algo(SVD(n_epochs=20, n_factors=100, reg_qi=0.01), movies_trainset, movies_testset)
run_algo(SVD(n_epochs=20, n_factors=100, reg_qi=0.015), movies_trainset, movies_testset)
run_algo(SVD(n_epochs=20, n_factors=100, reg_qi=0.025), movies_trainset, movies_testset)
run_algo(SVD(n_epochs=20, n_factors=100, reg_bi=0.01), movies_trainset, movies_testset)
run_algo(SVD(n_epochs=20, n_factors=100, reg_bi=0.015), movies_trainset, movies_testset)
run_algo(SVD(n_epochs=20, n_factors=100, reg_bi=0.025), movies_trainset, movies_testset)

RMSE: 0.8788
MAE:  0.6904
RMSE: 0.8972
MAE:  0.7027
RMSE: 0.8861
MAE:  0.6948
RMSE: 0.8874
MAE:  0.6963
RMSE: 0.8834
MAE:  0.6936
RMSE: 0.8776
MAE:  0.6898
RMSE: 0.8910
MAE:  0.6991
RMSE: 0.8834
MAE:  0.6938
RMSE: 0.8776
MAE:  0.6896
RMSE: 0.8802
MAE:  0.6910
RMSE: 0.8799
MAE:  0.6913
RMSE: 0.8792
MAE:  0.6911
CPU times: user 8min 54s, sys: 904 ms, total: 8min 55s
Wall time: 8min 54s


In [57]:
run_algo(SVD(n_epochs=20, n_factors=100, reg_bu=0.01), movies_trainset, movies_testset)
run_algo(SVD(n_epochs=20, n_factors=100, reg_bu=0.015), movies_trainset, movies_testset)
run_algo(SVD(n_epochs=20, n_factors=100, reg_bu=0.025), movies_trainset, movies_testset)

RMSE: 0.8799
MAE:  0.6912
RMSE: 0.8799
MAE:  0.6916
RMSE: 0.8797
MAE:  0.6909


In [58]:
%%time
# n_epochs=210, n_factors=4
run_algo(SVD(n_epochs=250, n_factors=4, lr_all=0.004), movies_trainset, movies_testset)
run_algo(SVD(n_epochs=210, n_factors=4, lr_all=0.003), movies_trainset, movies_testset)
run_algo(SVD(n_epochs=210, n_factors=4, lr_all=0.006), movies_trainset, movies_testset)

RMSE: 0.8699
MAE:  0.6814
RMSE: 0.8669
MAE:  0.6786
RMSE: 0.8687
MAE:  0.6798
CPU times: user 6min 44s, sys: 180 ms, total: 6min 44s
Wall time: 6min 44s


In [11]:
%%time
algo = SVDpp()
algo.fit(movies_trainset)
predictions = algo.test(movies_testset)
accuracy.rmse(predictions);
accuracy.mae(predictions);

RMSE: 0.8667
MAE:  0.6768
CPU times: user 51min 1s, sys: 1.44 s, total: 51min 2s
Wall time: 50min 59s


In [8]:
%%time
temp = cross_validate(SVD(), movie_data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8738  0.8742  0.8743  0.8760  0.8724  0.8741  0.0011  
MAE (testset)     0.6861  0.6869  0.6861  0.6873  0.6845  0.6862  0.0010  
Fit time          95.10   97.54   98.21   99.18   47.11   87.43   20.21   
Test time         3.99    4.16    3.97    2.61    1.66    3.28    0.98    
CPU times: user 15.7 s, sys: 500 ms, total: 16.2 s
Wall time: 2min 32s


In [9]:
%%time
temp = cross_validate(SlopeOne(), movie_data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SlopeOne on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9056  0.9057  0.9071  0.9057  0.9085  0.9065  0.0011  
MAE (testset)     0.7133  0.7143  0.7147  0.7133  0.7165  0.7144  0.0012  
Fit time          22.94   21.89   23.05   22.40   14.14   20.88   3.40    
Test time         88.36   88.01   89.11   87.41   32.58   77.09   22.27   
CPU times: user 17.6 s, sys: 552 ms, total: 18.1 s
Wall time: 2min 42s
