In [128]:
from surprise import SVD
from surprise import Dataset
from surprise import accuracy

from surprise.model_selection import train_test_split
from surprise.model_selection import GridSearchCV
from surprise.model_selection import cross_validate

In [2]:
data = Dataset.load_builtin('ml-100k')

In [3]:
trainset, testset = train_test_split(data, test_size=.25)

In [83]:
def TrainAndTestModel(alg):
    alg.fit(trainset)
    predictions=alg.test(testset)
    accuracy.rmse(predictions)
    accuracy.mae(predictions)
    cross_validate(alg, data, measures=['RMSE', 'MAE', 'FCP'], cv=5, verbose=True)

## 0. Random

In [136]:
from surprise.prediction_algorithms.random_pred import NormalPredictor

algo_r = NormalPredictor()
TrainAndTestModel(algo_r)

RMSE: 1.5165
MAE:  1.2197
Evaluating RMSE, MAE, FCP of algorithm NormalPredictor on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.5187  1.5228  1.5228  1.5224  1.5086  1.5190  0.0055  
MAE (testset)     1.2184  1.2195  1.2273  1.2238  1.2101  1.2198  0.0058  
FCP (testset)     0.5037  0.5004  0.4993  0.4975  0.5067  0.5015  0.0033  
Fit time          0.38    0.38    0.25    0.29    0.27    0.31    0.06    
Test time         0.30    0.25    0.22    0.28    0.22    0.25    0.03    


## 1. SVD (Singular Value Decomposition)

In [98]:
algo1 = SVD()

In [134]:
TrainAndTestModel(algo1)

RMSE: 0.9296
MAE:  0.7332
Evaluating RMSE, MAE, FCP of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9351  0.9386  0.9307  0.9428  0.9370  0.9368  0.0040  
MAE (testset)     0.7364  0.7403  0.7310  0.7403  0.7405  0.7377  0.0037  
FCP (testset)     0.7054  0.7000  0.7012  0.6917  0.6962  0.6989  0.0047  
Fit time          9.96    9.92    10.13   9.80    9.57    9.88    0.18    
Test time         0.26    0.22    0.22    0.22    0.23    0.23    0.01    


In [116]:
#Optimizacija parametara SVD default(15,20,0.005,0.02)

In [124]:
param_grid = {'n_factors':[5,10,15],'n_epochs': [5, 10, 15,20], 'lr_all': [0.002, 0.005, 0.01, 0.3],
              'reg_all': [0.01,0.02,0.04,0.4,0.6]}

In [118]:
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3)

In [119]:
gs.fit(data)

In [120]:
print(gs.best_score['rmse'])

0.9342943563328268


In [125]:
print(gs.best_params['rmse'])
algo = gs.best_estimator['rmse']

{'n_factors': 10, 'n_epochs': 20, 'lr_all': 0.01, 'reg_all': 0.04}


In [123]:
TrainAndTestModel(algo)

RMSE: 0.9225
MAE:  0.7292
Evaluating RMSE, MAE, FCP of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9279  0.9248  0.9300  0.9220  0.9204  0.9250  0.0036  
MAE (testset)     0.7317  0.7275  0.7284  0.7285  0.7247  0.7281  0.0022  
FCP (testset)     0.7021  0.7097  0.7045  0.7070  0.7174  0.7081  0.0053  
Fit time          4.18    4.01    3.99    3.68    3.11    3.79    0.38    
Test time         0.23    0.36    0.32    0.28    0.21    0.28    0.06    


## 2. SVD++ (Singular Value Decomposition)

In [75]:
from surprise.prediction_algorithms.matrix_factorization import SVDpp

In [76]:
algo2 = SVDpp()
algo2.fit(trainset)
predictions2 = algo2.test(testset)

In [77]:
accuracy.rmse(predictions2)
accuracy.mae(predictions2)

RMSE: 0.9166
MAE:  0.7212


0.7211969567252345

## 3. NMF ( Non-negative Matrix Factorization )

In [144]:
from surprise.prediction_algorithms.matrix_factorization import NMF
algo3 = NMF()
TrainAndTestModel(algo3)

RMSE: 0.9591
MAE:  0.7550
Evaluating RMSE, MAE, FCP of algorithm NMF on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9654  0.9652  0.9587  0.9662  0.9662  0.9643  0.0029  
MAE (testset)     0.7575  0.7591  0.7555  0.7595  0.7582  0.7580  0.0014  
FCP (testset)     0.6901  0.6905  0.6800  0.6865  0.6934  0.6881  0.0046  
Fit time          12.78   12.74   13.37   13.20   9.93    12.40   1.26    
Test time         0.24    0.24    0.26    0.23    0.24    0.24    0.01    


In [139]:
param_grid1 = {'n_factors':[5,10,15,20], 'n_epochs':[25,50,75]}

In [140]:
gs1 = GridSearchCV(NMF, param_grid1, measures=['rmse', 'mae'], cv=3)

In [None]:
gs1.fit(data)

In [142]:
print(gs1.best_score['rmse'])

0.973010364187657


In [138]:
print(gs1.best_params['rmse'])
algo3 = gs1.best_estimator['rmse']
algo3.fit(data.build_full_trainset())
TrainAndTestModel(algo3)

{'n_factors': 5, 'n_epochs': 75}
RMSE: 0.9573
MAE:  0.7418
Evaluating RMSE, MAE, FCP of algorithm NMF on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9486  0.9624  0.9577  0.9642  0.9546  0.9575  0.0056  
MAE (testset)     0.7374  0.7479  0.7412  0.7468  0.7395  0.7425  0.0041  
FCP (testset)     0.7050  0.6996  0.6925  0.6932  0.7002  0.6981  0.0047  
Fit time          12.21   12.46   12.71   12.49   11.60   12.30   0.38    
Test time         0.26    0.25    0.27    0.33    0.19    0.26    0.05    


## 4. SlopeOne 

In [145]:
from surprise.prediction_algorithms.slope_one import SlopeOne
algo4 = SlopeOne()
TrainAndTestModel(algo4)

RMSE: 0.9350
MAE:  0.7361
Evaluating RMSE, MAE, FCP of algorithm SlopeOne on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9385  0.9437  0.9509  0.9450  0.9445  0.9445  0.0039  
MAE (testset)     0.7396  0.7404  0.7452  0.7456  0.7417  0.7425  0.0025  
FCP (testset)     0.6960  0.6953  0.6954  0.7004  0.6994  0.6973  0.0022  
Fit time          2.41    2.31    2.49    2.02    1.85    2.22    0.24    
Test time         6.33    6.34    6.77    6.64    4.77    6.17    0.72    


## 5. KNNBasic 

In [146]:
from surprise.prediction_algorithms.knns import KNNBasic
algo5 = KNNBasic()
TrainAndTestModel(algo5)

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9724
MAE:  0.7682
Evaluating RMSE, MAE, FCP of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9826  0.9791  0.9739  0.9756  0.9824  0.9787  0.0035  
MAE (testset)     0.7769  0.7731  0.7680  0.7720  0.7758  0.7732  0.0031  
FCP (testset)     0.7006  0.7134  0.7061  0.7136  0.7091  0.7085  0.0049  
Fit time          1.47    1.29    1.66    1.52    0.87    1.36    0.27    
Test time         5.14    5.02    5.36    5.13    4.68    5.07    0.22    


In [147]:
#40,1
param_grid2 = {'k':[10,20,40,60], 'min_k':[1,3,4,5]}

In [148]:
gs2 = GridSearchCV(KNNBasic, param_grid2, measures=['rmse', 'mae'], cv=3)

In [149]:
gs2.fit(data)

In [150]:
print(gs2.best_score['rmse'])

0.9861530744908782


In [151]:
print(gs2.best_params['rmse'])
algo5 = gs1.best_estimator['rmse']
algo5.fit(data.build_full_trainset())

{'k': 20, 'min_k': 3}


<surprise.prediction_algorithms.matrix_factorization.NMF at 0xa2fbe50>

## 6. KNNWithZScore 

In [152]:
from surprise.prediction_algorithms.knns import KNNWithZScore
algo6 = KNNWithZScore()
TrainAndTestModel(algo6)

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9451
MAE:  0.7423
Evaluating RMSE, MAE, FCP of algorithm KNNWithZScore on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9369  0.9458  0.9637  0.9595  0.9519  0.9515  0.0096  
MAE (testset)     0.7332  0.7401  0.7553  0.7541  0.7495  0.7464  0.0085  
FCP (testset)     0.7074  0.7056  0.6968  0.7035  0.7004  0.7027  0.0038  
Fit time          1.32    1.29    1.39    1.49    1.38    1.37    0.07    
Test time         7.06    7.14    6.28    6.36    7.01    6.77    0.37    


In [40]:
param_grid3 = {'k':[20,40,60], 'min_k':[1,3,5]}

In [41]:
gs3 = GridSearchCV(KNNWithZScore, param_grid3, measures=['rmse', 'mae'], cv=3)

In [42]:
gs3.fit(data)

In [43]:
print(gs3.best_score['rmse'])

0.9548599873047982


In [44]:
print(gs3.best_params['rmse'])
algo6 = gs3.best_estimator['rmse']
algo6.fit(data.build_full_trainset())

{'k': 60, 'min_k': 3}
Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithZScore at 0xa360670>

## 7. KNNBaseline 

In [153]:
from surprise.prediction_algorithms.knns import KNNBaseline

algo7 = KNNBaseline()
TrainAndTestModel(algo7)

Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9230
MAE:  0.7277
Evaluating RMSE, MAE, FCP of algorithm KNNBaseline on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9260  0.9319  0.9328  0.9318  0.9319  0.9309  0.0025  
MAE (testset)     0.7292  0.7353  0.7351  0.7319  0.7347  0.7332  0.0024  
FCP (testset)     0.7027  0.7028  0.7050  0.7055  0.7070  0.7046  0.0017  
Fit time          1.37    1.32    1.45    1.14    1.14    1.28    0.13    
Test time         7.39    7.36    6.64    6.62    5.96    6.79    0.53    


In [46]:
param_grid4 = {'k':[20,40,60], 'min_k':[1,3,5]
              }

In [47]:
gs4 = GridSearchCV(KNNBaseline, param_grid4, measures=['rmse', 'mae'], cv=3)

In [48]:
gs4.fit(data)

In [49]:
print(gs4.best_score['rmse'])

0.9340694348797752


In [50]:
print(gs4.best_params['rmse'])
algo7 = gs4.best_estimator['rmse']
algo7.fit(data.build_full_trainset())

{'k': 60, 'min_k': 5}
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBaseline at 0xe58ef10>

## 8. CoClustering 

In [154]:
from surprise.prediction_algorithms.co_clustering import CoClustering

algo8 = CoClustering()
TrainAndTestModel(algo8)

RMSE: 0.9662
MAE:  0.7577
Evaluating RMSE, MAE, FCP of algorithm CoClustering on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9592  0.9710  0.9637  0.9662  0.9697  0.9660  0.0042  
MAE (testset)     0.7495  0.7603  0.7550  0.7561  0.7590  0.7560  0.0038  
FCP (testset)     0.7045  0.6865  0.6944  0.6949  0.6938  0.6948  0.0057  
Fit time          3.26    3.23    3.29    2.84    2.58    3.04    0.28    
Test time         0.24    0.23    0.18    0.19    0.16    0.20    0.03    


In [52]:
param_grid7 = {'n_epochs':[25,50,75] ,'n_cltr_u':[1,3,5,7], 'n_cltr_i':[1,3,5,7]
              }

In [53]:
gs7 = GridSearchCV(CoClustering, param_grid7, measures=['rmse', 'mae'], cv=3)

In [54]:
gs7.fit(data)

In [55]:
print(gs7.best_score['rmse'])

0.9684113570378144


In [56]:
print(gs7.best_params['rmse'])
algo8 = gs7.best_estimator['rmse']
algo8.fit(data.build_full_trainset())

{'n_epochs': 75, 'n_cltr_u': 5, 'n_cltr_i': 3}


<surprise.prediction_algorithms.co_clustering.CoClustering at 0x1189ba90>

## 9. KNNWithMeans 

In [155]:
from surprise.prediction_algorithms.knns import KNNWithMeans
algo9 = KNNWithMeans()
TrainAndTestModel(algo9)

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9451
MAE:  0.7452
Evaluating RMSE, MAE, FCP of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9446  0.9597  0.9572  0.9476  0.9446  0.9508  0.0064  
MAE (testset)     0.7446  0.7551  0.7555  0.7494  0.7427  0.7495  0.0052  
FCP (testset)     0.7054  0.7041  0.7012  0.6999  0.7057  0.7033  0.0023  
Fit time          1.12    0.97    1.21    1.07    0.91    1.05    0.11    
Test time         4.91    5.15    4.98    5.08    4.81    4.99    0.12    


In [58]:
param_grid5 = {'k':[40,60,80], 'min_k':[1,3,5,7]
              }

In [59]:
gs5 = GridSearchCV(KNNWithMeans, param_grid5, measures=['rmse', 'mae'], cv=3)

In [60]:
gs5.fit(data)

In [61]:
print(gs5.best_score['rmse'])

0.9556160524946807


In [62]:
print(gs5.best_params['rmse'])
algo9 = gs5.best_estimator['rmse']
algo9.fit(data.build_full_trainset())

{'k': 60, 'min_k': 3}
Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x137456b0>

## 10. BaselineOnly 

In [63]:
from surprise.prediction_algorithms.baseline_only import BaselineOnly
algo10 = BaselineOnly()
algo10.fit(trainset)
predictions10 = algo10.test(testset)
accuracy.rmse(predictions10)
accuracy.mae(predictions10)

Estimating biases using als...
RMSE: 0.9362
MAE:  0.7435


0.743474660829392

In [137]:
from surprise.prediction_algorithms.baseline_only import BaselineOnly
algo10 = BaselineOnly()
TrainAndTestModel(algo10)

Estimating biases using als...
RMSE: 0.9362
MAE:  0.7435
Evaluating RMSE, MAE, FCP of algorithm BaselineOnly on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9316  0.9483  0.9491  0.9481  0.9444  0.9443  0.0066  
MAE (testset)     0.7383  0.7527  0.7503  0.7495  0.7524  0.7486  0.0053  
FCP (testset)     0.7005  0.6970  0.6997  0.6869  0.6893  0.6947  0.0055  
Fit time          0.19    0.22    0.16    0.09    0.08    0.15    0.05    
Test time         0.28    0.25    0.22    0.28    0.23    0.25    0.03    


In [64]:
param_grid6 = {'bsl_options': {'method': ['als', 'sgd'],
                              'reg': [1, 2]}
              }

In [65]:
gs6 = GridSearchCV(BaselineOnly, param_grid6, measures=['rmse', 'mae'], cv=3)

In [66]:
gs6.fit(data)

In [67]:
print(gs6.best_score['rmse'])

0.9475444004779824


In [68]:
print(gs6.best_params['rmse'])
algo10 = gs6.best_estimator['rmse']
algo10.fit(data.build_full_trainset())

{'bsl_options': {'method': 'als', 'reg': 1}}
Estimating biases using als...


<surprise.prediction_algorithms.baseline_only.BaselineOnly at 0x13e1fcb0>