In [36]:
import numpy as np
import pandas as pd
from nltk.corpus.reader import titles
from sklearn.model_selection import GridSearchCV
from surprise import Dataset, Reader, accuracy, SVD, KNNBaseline, KNNWithZScore, CoClustering, NormalPredictor, KNNBasic

In [37]:
from tabulate import tabulate
ratings = pd.read_csv('train-PDA2019.csv')
test = pd.read_csv('test-PDA2019.csv')
content = pd.read_csv('content-PDA2019.csv')
ratings.columns=['userID','itemID','rating','timeStamp']

In [38]:
ratings_full = ratings.pivot(index='userID',columns='itemID',
                             values='rating')
ratings_full.fillna(0).astype(int)


itemID,89,93,94,95,97,98,100,101,102,104,...,3929,3930,3931,3932,3937,3938,3945,3946,3950,3952
userID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,4,...,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12069,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12071,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12073,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12077,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [39]:
reader = Reader(rating_scale=(1,5))
data = Dataset.load_from_df(ratings[['userID','itemID','rating']],reader)
trainset = data.build_full_trainset()

In [40]:
num_items = trainset.n_items
num_users = trainset.n_users
num_ratings = trainset.n_ratings
mean_ = trainset.global_mean
mean_ = np.round(mean_,decimals=2)
density_ = num_ratings/(num_users*num_items)
density_ = np.round(density_,decimals=2)
sparsity_ = 1-density_
print('this dataset has ',num_users,'users')
print('this dataset has ',num_items,'items')
print('this dataset has ',num_ratings,'ratings')
print('mean rating: ',mean_)
print('density: ',density_*100,'%')
print('sparsity',sparsity_*100,'%')

this dataset has  5690 users
this dataset has  1824 items
this dataset has  470711 ratings
mean rating:  3.64
density:  5.0 %
sparsity 95.0 %


In [41]:
from surprise.model_selection import cross_validate,KFold
kf = KFold(n_splits=5,random_state=0)

In [42]:
recommenders = (SVD,KNNBasic,KNNBaseline,KNNWithZScore,CoClustering,NormalPredictor)

title = ('SVD','KNN-Basic','KNN-Baseline','KNNwithZscore','Co-Clustering','Random')

In [43]:
table = []
fold_n=0
for rec in recommenders:
    out = cross_validate(rec(),data,['rmse','mae','fcp'],kf)
    mean_rmse = '{:.3f}'.format(np.mean(out['test_rmse']))
    mean_mae = '{:.3f}'.format(np.mean(out['test_mae']))
    mean_fcp = '{:.3f}'.format(np.mean(out['test_fcp']))
    new_line = [titles[fold_n],mean_rmse,mean_mae,mean_fcp]
    table.append(new_line)
    fold_n +=1
        

KeyError: 0

In [44]:
header = ['Recommenders','Pred Accuracy(RMSE)','Pred Accuracy(MAE)','Rank Quality(FCP)']
print(tabulate(table,header,tablefmt='pipe'))



| Recommenders   | Pred Accuracy(RMSE)   | Pred Accuracy(MAE)   | Rank Quality(FCP)   |
||


In [46]:
param_grid = {'n_epochs': [5,20],
              'lr_all':[0.001,0.1],
              'reg_all':[0.01,0.5]}
gs = GridSearchCV(SVD,param_grid,measures=['rmse','mae','fcp'],cv=4)
gs.fit(data)
print('Best RMSE:',gs.best_score['rmse'])
print('Best parameters for RMSE:',gs.best_params['rmse'])
print('Best ranking (FCP):',gs.best_score['fcp'])

TypeError: __init__() got an unexpected keyword argument 'measures'