In [1]:
import pandas as pd
from surprise import prediction_algorithms as pa
from surprise import Dataset, Reader
from surprise import evaluate, print_perf

In [2]:
data = pd.read_csv('./movielens_small/ratings.csv')
number_of_rows = len(data)

In [3]:
for nfolds in [2,3,4,5]:
    reader = Reader(rating_scale=(0.5, 5))
    train_data = Dataset.load_from_df(data[['userId','movieId','rating']], reader)

    algo = pa.matrix_factorization.SVD()

    train_data.split(n_folds=nfolds)

    perf = evaluate(algo, train_data, measures=['RMSE', 'MAE','FCP'])

Evaluating RMSE, MAE, FCP of algorithm SVD.

------------
Fold 1
RMSE: 0.9127
MAE:  0.7035
FCP:  0.6191
------------
Fold 2
RMSE: 0.9136
MAE:  0.7052
FCP:  0.6189
------------
------------
Mean RMSE: 0.9132
Mean MAE : 0.7043
Mean FCP : 0.6190
------------
------------
Evaluating RMSE, MAE, FCP of algorithm SVD.

------------
Fold 1
RMSE: 0.9003
MAE:  0.6947
FCP:  0.6363
------------
Fold 2
RMSE: 0.9056
MAE:  0.6973
FCP:  0.6294
------------
Fold 3
RMSE: 0.9048
MAE:  0.6954
FCP:  0.6337
------------
------------
Mean RMSE: 0.9036
Mean MAE : 0.6958
Mean FCP : 0.6331
------------
------------
Evaluating RMSE, MAE, FCP of algorithm SVD.

------------
Fold 1
RMSE: 0.8940
MAE:  0.6906
FCP:  0.6407
------------
Fold 2
RMSE: 0.9049
MAE:  0.6957
FCP:  0.6361
------------
Fold 3
RMSE: 0.8999
MAE:  0.6926
FCP:  0.6386
------------
Fold 4
RMSE: 0.8983
MAE:  0.6910
FCP:  0.6385
------------
------------
Mean RMSE: 0.8993
Mean MAE : 0.6925
Mean FCP : 0.6385
------------
------------
Evaluating RMSE,

In [4]:
res_tune = {}
for latent_factors in [5,10,15,20,25,30]:
    res_tune[latent_factors] = {}
    for reg_t in [1,0.5,0.2,0.1,0.05,0.02,0.01]:
        reader = Reader(rating_scale=(0.5, 5))
        train_data = Dataset.load_from_df(data[['userId','movieId','rating']], reader)

        algo = pa.matrix_factorization.SVD(n_factors=latent_factors, n_epochs=100,reg_pu=reg_t, reg_qi=reg_t)

        train_data.split(n_folds=5)

        perf = evaluate(algo, train_data, measures=['RMSE', 'MAE','FCP'])
        
        res_tune[latent_factors][reg_t] = perf

Evaluating RMSE, MAE, FCP of algorithm SVD.

------------
Fold 1
RMSE: 0.8878
MAE:  0.6832
FCP:  0.6574
------------
Fold 2
RMSE: 0.8910
MAE:  0.6842
FCP:  0.6558
------------
Fold 3
RMSE: 0.8853
MAE:  0.6810
FCP:  0.6563
------------
Fold 4
RMSE: 0.8859
MAE:  0.6797
FCP:  0.6632
------------
Fold 5
RMSE: 0.8956
MAE:  0.6891
FCP:  0.6645
------------
------------
Mean RMSE: 0.8891
Mean MAE : 0.6834
Mean FCP : 0.6594
------------
------------
Evaluating RMSE, MAE, FCP of algorithm SVD.

------------
Fold 1
RMSE: 0.8913
MAE:  0.6858
FCP:  0.6596
------------
Fold 2
RMSE: 0.8863
MAE:  0.6804
FCP:  0.6601
------------
Fold 3
RMSE: 0.8955
MAE:  0.6859
FCP:  0.6609
------------
Fold 4
RMSE: 0.8866
MAE:  0.6803
FCP:  0.6564
------------
Fold 5
RMSE: 0.8862
MAE:  0.6831
FCP:  0.6530
------------
------------
Mean RMSE: 0.8892
Mean MAE : 0.6831
Mean FCP : 0.6580
------------
------------
Evaluating RMSE, MAE, FCP of algorithm SVD.

------------
Fold 1
RMSE: 0.8928
MAE:  0.6829
FCP:  0.6540
----

RMSE: 0.9035
MAE:  0.6880
FCP:  0.6539
------------
Fold 4
RMSE: 0.9006
MAE:  0.6895
FCP:  0.6555
------------
Fold 5
RMSE: 0.9028
MAE:  0.6911
FCP:  0.6493
------------
------------
Mean RMSE: 0.9022
Mean MAE : 0.6891
Mean FCP : 0.6514
------------
------------
Evaluating RMSE, MAE, FCP of algorithm SVD.

------------
Fold 1
RMSE: 0.9323
MAE:  0.7102
FCP:  0.6339
------------
Fold 2
RMSE: 0.9464
MAE:  0.7194
FCP:  0.6390
------------
Fold 3
RMSE: 0.9492
MAE:  0.7241
FCP:  0.6377
------------
Fold 4
RMSE: 0.9424
MAE:  0.7222
FCP:  0.6295
------------
Fold 5
RMSE: 0.9494
MAE:  0.7260
FCP:  0.6360
------------
------------
Mean RMSE: 0.9439
Mean MAE : 0.7204
Mean FCP : 0.6352
------------
------------
Evaluating RMSE, MAE, FCP of algorithm SVD.

------------
Fold 1
RMSE: 0.9695
MAE:  0.7396
FCP:  0.6245
------------
Fold 2
RMSE: 0.9674
MAE:  0.7354
FCP:  0.6284
------------
Fold 3
RMSE: 0.9699
MAE:  0.7421
FCP:  0.6193
------------
Fold 4
RMSE: 0.9781
MAE:  0.7412
FCP:  0.6332
----------

------------
Fold 1
RMSE: 0.8800
MAE:  0.6775
FCP:  0.6592
------------
Fold 2
RMSE: 0.8930
MAE:  0.6857
FCP:  0.6581
------------
Fold 3
RMSE: 0.8900
MAE:  0.6835
FCP:  0.6619
------------
Fold 4
RMSE: 0.8922
MAE:  0.6851
FCP:  0.6619
------------
Fold 5
RMSE: 0.8881
MAE:  0.6836
FCP:  0.6550
------------
------------
Mean RMSE: 0.8887
Mean MAE : 0.6831
Mean FCP : 0.6592
------------
------------
Evaluating RMSE, MAE, FCP of algorithm SVD.

------------
Fold 1
RMSE: 0.8734
MAE:  0.6698
FCP:  0.6635
------------
Fold 2
RMSE: 0.8800
MAE:  0.6757
FCP:  0.6592
------------
Fold 3
RMSE: 0.8722
MAE:  0.6706
FCP:  0.6642
------------
Fold 4
RMSE: 0.8747
MAE:  0.6696
FCP:  0.6652
------------
Fold 5
RMSE: 0.8892
MAE:  0.6813
FCP:  0.6663
------------
------------
Mean RMSE: 0.8779
Mean MAE : 0.6734
Mean FCP : 0.6637
------------
------------
Evaluating RMSE, MAE, FCP of algorithm SVD.

------------
Fold 1
RMSE: 0.9023
MAE:  0.6901
FCP:  0.6617
------------
Fold 2
RMSE: 0.9045
MAE:  0.6935
FCP

In [5]:
for l in res_tune:
    s = ""
    for j in res_tune[l]:
        s+=str(l)+str(j)+" "+str(sum(res_tune[l][j]['rmse'])/len(res_tune[l][j]['rmse']))+"     "
    s+='\n'
    print(s)
import pickle
pickle.dump(res_tune,open('svd_factors_regularization.dat','wb'))

51 0.889121725037     50.5 0.889177385221     50.2 0.888867760857     50.1 0.88369235254     50.05 0.89784159662     50.02 0.923865362303     50.01 0.936588211621     

101 0.888454519686     100.5 0.888553660999     100.2 0.888172230137     100.1 0.883274990633     100.05 0.902386500632     100.02 0.94014157937     100.01 0.959044609908     

151 0.888643110897     150.5 0.889348812403     150.2 0.888761649282     150.1 0.880950134311     150.05 0.902180209954     150.02 0.943934803819     150.01 0.971597235486     

201 0.888656884939     200.5 0.888881034619     200.2 0.887949570793     200.1 0.879143273673     200.05 0.902878880738     200.02 0.948755538804     200.01 0.97918770809     

251 0.889096788134     250.5 0.890183781904     250.2 0.888268308643     250.1 0.87762444114     250.05 0.900653219167     250.02 0.946508952223     250.01 0.979714426401     

301 0.888986366137     300.5 0.88919227449     300.2 0.888678227489     300.1 0.877893356916     300.05 0.896119173471    

In [6]:
res_reg_bias_tune = {}
for learn_bias in [0.02,0.01,0.005,0.002,0.001]:
    res_reg_bias_tune[learn_bias] = {}
    for reg in [0.1,0.05,0.02,0.01]:
        reader = Reader(rating_scale=(0.5, 5))
        train_data = Dataset.load_from_df(data[['userId','movieId','rating']], reader)

        algo = pa.matrix_factorization.SVD(n_factors=15, n_epochs=200,reg_pu=reg, reg_qi=reg, biased = True, reg_bu=reg, reg_bi=reg, lr_bu=learn_bias, lr_bi=learn_bias )

        train_data.split(n_folds=5)

        perf = evaluate(algo, train_data, measures=['RMSE', 'MAE','FCP'])
        
        res_reg_bias_tune[learn_bias][reg] = perf
pickle.dump(res_reg_bias_tune,open('svd_bias_reg.dat','wb'))

Evaluating RMSE, MAE, FCP of algorithm SVD.

------------
Fold 1
RMSE: 0.8900
MAE:  0.6816
FCP:  0.6513
------------
Fold 2
RMSE: 0.8926
MAE:  0.6816
FCP:  0.6655
------------
Fold 3
RMSE: 0.8996
MAE:  0.6892
FCP:  0.6647
------------
Fold 4
RMSE: 0.8943
MAE:  0.6838
FCP:  0.6620
------------
Fold 5
RMSE: 0.8928
MAE:  0.6834
FCP:  0.6556
------------
------------
Mean RMSE: 0.8939
Mean MAE : 0.6839
Mean FCP : 0.6598
------------
------------
Evaluating RMSE, MAE, FCP of algorithm SVD.

------------
Fold 1
RMSE: 0.9283
MAE:  0.7113
FCP:  0.6523
------------
Fold 2
RMSE: 0.9358
MAE:  0.7130
FCP:  0.6443
------------
Fold 3
RMSE: 0.9277
MAE:  0.7072
FCP:  0.6341
------------
Fold 4
RMSE: 0.9252
MAE:  0.7092
FCP:  0.6503
------------
Fold 5
RMSE: 0.9320
MAE:  0.7122
FCP:  0.6460
------------
------------
Mean RMSE: 0.9298
Mean MAE : 0.7106
Mean FCP : 0.6454
------------
------------
Evaluating RMSE, MAE, FCP of algorithm SVD.

------------
Fold 1
RMSE: 0.9739
MAE:  0.7452
FCP:  0.6415
----

RMSE: 0.9744
MAE:  0.7434
FCP:  0.6307
------------
Fold 4
RMSE: 0.9769
MAE:  0.7455
FCP:  0.6245
------------
Fold 5
RMSE: 0.9753
MAE:  0.7428
FCP:  0.6296
------------
------------
Mean RMSE: 0.9756
Mean MAE : 0.7440
Mean FCP : 0.6298
------------
------------
Evaluating RMSE, MAE, FCP of algorithm SVD.

------------
Fold 1
RMSE: 1.0147
MAE:  0.7738
FCP:  0.6107
------------
Fold 2
RMSE: 1.0162
MAE:  0.7691
FCP:  0.6306
------------
Fold 3
RMSE: 1.0077
MAE:  0.7672
FCP:  0.6319
------------
Fold 4
RMSE: 0.9945
MAE:  0.7574
FCP:  0.6200
------------
Fold 5
RMSE: 1.0061
MAE:  0.7664
FCP:  0.6293
------------
------------
Mean RMSE: 1.0079
Mean MAE : 0.7668
Mean FCP : 0.6245
------------
------------


In [7]:
for l in res_reg_bias_tune:
    s = ""
    for j in res_reg_bias_tune[l]:
        s+=str(l)+str(j)+" "+str(sum(res_reg_bias_tune[l][j]['rmse'])/len(res_reg_bias_tune[l][j]['rmse']))+"     "
    s+='\n'
    print(s)

0.020.1 0.893870392199     0.020.05 0.929787544866     0.020.02 0.98473410602     0.020.01 1.01296444693     

0.010.1 0.888356106577     0.010.05 0.924555895076     0.010.02 0.977568593969     0.010.01 1.01371717291     

0.0050.1 0.884900410686     0.0050.05 0.921940606908     0.0050.02 0.975636323611     0.0050.01 1.01241339308     

0.0020.1 0.881389375942     0.0020.05 0.921667071238     0.0020.02 0.973862984716     0.0020.01 1.00671612179     

0.0010.1 0.881963960373     0.0010.05 0.920745654422     0.0010.02 0.975577340105     0.0010.01 1.00785019701     

