The implementations of all item recommendations are done using Cornac. 
- MostPop is a non-personalized model where the most popular/rated items are recommended to everyone.

- The user and item KNN are neighborhood models used for top-n recommendations.

- NCF (Neural Collaborative Filtering) is a generalization of the matrix factorization problem using a multilayer perceptron.

- BPR (Bayesian Personalised Ranking) is esentially a Matrix Factorization algorithm which is optimized with a Bayes Criterion (BPR-OPT) in order to make the recommendation list ranking as personalized to the user (hence as "correct") as possible. 

In [1]:
import numpy as np
import pandas as pd
import cornac
from cornac.eval_methods import CrossValidation, RatioSplit
from cornac.data import Reader
from cornac.data import Dataset
from cornac.hyperopt import Discrete, GridSearch
from tabulate import tabulate

## Read and load the data using Cornac

In [2]:
# Init cornac reader object
reader = Reader() # this binarises the data (turns it into implicit feedback)

# Read both the datasets from the files using cornac
movielens_data = reader.read(fpath="../data/u.data", sep="\t")
pda_data = reader.read(fpath="../data/train-PDA2018.csv", sep=",", skip_lines=1)
print("Movielens")
print(movielens_data[:5])
print()
print("PDA")
print(pda_data[:5])

Movielens
[('196', '242', 3.0), ('186', '302', 3.0), ('22', '377', 1.0), ('244', '51', 2.0), ('166', '346', 1.0)]

PDA
[('5', '648', 5.0), ('5', '1394', 5.0), ('5', '3534', 5.0), ('5', '104', 4.0), ('5', '2735', 5.0)]


## Define models

In [3]:
# Most Popular model
most_pop_model = cornac.models.MostPop()

# KNN Models
user_knn = cornac.models.UserKNN(
    k=80, similarity="pearson", name="UserKNN"
)

item_knn = cornac.models.ItemKNN(
    k=80, similarity="pearson", name="UserKNN"
)

# NCF model: We'll use the Pre-Trained NeuMF model, since it performs better as shown in https://arxiv.org/pdf/1708.05031.pdf
gmf = cornac.models.GMF(
    num_factors=8,
    num_epochs=10,
    learner="adam",
    batch_size=256,
    lr=0.001,
    num_neg=50,
    seed=123,
)
mlp = cornac.models.MLP(
    layers=[64, 32, 16, 8],
    act_fn="tanh",
    learner="adam",
    num_epochs=5,
    batch_size=256,
    lr=0.001,
    num_neg=50,
    seed=123,
)
ncf_model = cornac.models.NeuMF(
    name="NeuMF_pretrained",
    learner="adam",
    num_epochs=5,
    batch_size=256,
    lr=0.001,
    num_neg=50,
    seed=123,
    num_factors=gmf.num_factors,
    layers=mlp.layers,
    act_fn=mlp.act_fn,
).pretrain(gmf, mlp)

# BPR Model
bpr_model = cornac.models.BPR(
    k=10,
    learning_rate=0.01,
    lambda_reg=0.01
)

# Eval Metrics
pre_5 = cornac.metrics.Precision(k=5)
pre_10 = cornac.metrics.Precision(k=10)
rec_5 = cornac.metrics.Recall(k=5)
rec_10 = cornac.metrics.Recall(k=10)
ndcg = cornac.metrics.NDCG()
auc = cornac.metrics.AUC()

## Experiment Variables

In [4]:
results_table = [] # List that will contain the results
cv_n_folds = 5 # define number of k splits for cross validation
rating_threshold = 3 # This parameter is the threshold used for ranking metrics

# Algorithms we will be using
cornac_algorithms = {
    "MostPop": most_pop_model,
    "UserKNN": user_knn,
    "ItemKNN": item_knn,
    "BPR": bpr_model, 
    #"NCF": ncf_model
}
# Datasets
datasets = {
    "ML100": movielens_data,
    "PDA2018": pda_data
}

## 5-Fold Cross Validation

In [5]:
for dataset in datasets.keys():
    for algorithm in cornac_algorithms.keys():
        print("Running 5-fold cross validation with", algorithm, "on", dataset, "dataset ...\n\n")
        # Define Cornac cross validation object
        cv = CrossValidation(
            data=datasets[dataset],
            n_folds=cv_n_folds,
            rating_threshold=rating_threshold, # This parameter is the threshold used for ranking metrics
            seed = 0,
            verbose=True
        )
        # Define Cornac experiment (put everything together)
        experiment = cornac.Experiment(
            eval_method=cv,
            models=[cornac_algorithms[algorithm]],    
            metrics=[pre_5, pre_10, rec_5, rec_10, auc, ndcg],
        )
        experiment.run()
        for entry in experiment.result:
            results_dict = entry[0].metric_avg_results
            new_line = [dataset+"-"+algorithm, results_dict['Precision@5'], results_dict['Precision@10'], \
                        results_dict['Recall@5'], results_dict['Recall@10'], results_dict['NDCG@-1']]
            results_table.append(new_line)

Running 5-fold cross validation with MostPop on ML100 dataset ...


rating_threshold = 3.0
exclude_unknowns = True
Fold: 1
---
Training data:
Number of users = 943
Number of items = 1648
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 943
Number of items = 1382
Number of ratings = 19966
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 943
Number of items = 1382
Number of ratings = 19966
---
Total users = 943
Total items = 1648

[MostPop] Training started!

[MostPop] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=943.0, style=ProgressStyle(description_widt…


Fold: 2
---
Training data:
Number of users = 943
Number of items = 1652
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 942
Number of items = 1371
Number of ratings = 19967
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 942
Number of items = 1371
Number of ratings = 19967
---
Total users = 943
Total items = 1652

[MostPop] Training started!

[MostPop] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=942.0, style=ProgressStyle(description_widt…


Fold: 3
---
Training data:
Number of users = 943
Number of items = 1651
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 940
Number of items = 1390
Number of ratings = 19965
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 940
Number of items = 1390
Number of ratings = 19965
---
Total users = 943
Total items = 1651

[MostPop] Training started!

[MostPop] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=940.0, style=ProgressStyle(description_widt…


Fold: 4
---
Training data:
Number of users = 943
Number of items = 1656
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 943
Number of items = 1397
Number of ratings = 19969
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 943
Number of items = 1397
Number of ratings = 19969
---
Total users = 943
Total items = 1656

[MostPop] Training started!

[MostPop] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=943.0, style=ProgressStyle(description_widt…


Fold: 5
---
Training data:
Number of users = 943
Number of items = 1646
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 942
Number of items = 1383
Number of ratings = 19959
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 942
Number of items = 1383
Number of ratings = 19959
---
Total users = 943
Total items = 1646

[MostPop] Training started!

[MostPop] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=942.0, style=ProgressStyle(description_widt…



TEST:
...
[MostPop]
       |    AUC | NDCG@-1 | Precision@10 | Precision@5 | Recall@10 | Recall@5 | Train (s) | Test (s)
------ + ------ + ------- + ------------ + ----------- + --------- + -------- + --------- + --------
Fold 0 | 0.8624 |  0.4056 |       0.0886 |      0.0946 |    0.0799 |   0.0431 |    0.0074 |   1.1289
Fold 1 | 0.8660 |  0.4058 |       0.0888 |      0.0939 |    0.0785 |   0.0426 |    0.0071 |   1.1203
Fold 2 | 0.8622 |  0.4000 |       0.0887 |      0.0898 |    0.0848 |   0.0397 |    0.0059 |   1.1240
Fold 3 | 0.8630 |  0.4054 |       0.0905 |      0.1020 |    0.0867 |   0.0472 |    0.0067 |   1.0908
Fold 4 | 0.8617 |  0.4050 |       0.0930 |      0.1036 |    0.0873 |   0.0466 |    0.0070 |   1.0839
------ + ------ + ------- + ------------ + ----------- + --------- + -------- + --------- + --------
Mean   | 0.8631 |  0.4044 |       0.0899 |      0.0968 |    0.0834 |   0.0438 |    0.0068 |   1.1096
Std    | 0.0015 |  0.0022 |       0.0017 |      0.0052 |    0.0036 | 

HBox(children=(FloatProgress(value=0.0, max=943.0), HTML(value='')))



[UserKNN] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=943.0, style=ProgressStyle(description_widt…


Fold: 2
---
Training data:
Number of users = 943
Number of items = 1652
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 942
Number of items = 1371
Number of ratings = 19967
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 942
Number of items = 1371
Number of ratings = 19967
---
Total users = 943
Total items = 1652

[UserKNN] Training started!


HBox(children=(FloatProgress(value=0.0, max=943.0), HTML(value='')))



[UserKNN] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=942.0, style=ProgressStyle(description_widt…


Fold: 3
---
Training data:
Number of users = 943
Number of items = 1651
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 940
Number of items = 1390
Number of ratings = 19965
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 940
Number of items = 1390
Number of ratings = 19965
---
Total users = 943
Total items = 1651

[UserKNN] Training started!


HBox(children=(FloatProgress(value=0.0, max=943.0), HTML(value='')))



[UserKNN] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=940.0, style=ProgressStyle(description_widt…


Fold: 4
---
Training data:
Number of users = 943
Number of items = 1656
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 943
Number of items = 1397
Number of ratings = 19969
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 943
Number of items = 1397
Number of ratings = 19969
---
Total users = 943
Total items = 1656

[UserKNN] Training started!


HBox(children=(FloatProgress(value=0.0, max=943.0), HTML(value='')))



[UserKNN] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=943.0, style=ProgressStyle(description_widt…


Fold: 5
---
Training data:
Number of users = 943
Number of items = 1646
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 942
Number of items = 1383
Number of ratings = 19959
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 942
Number of items = 1383
Number of ratings = 19959
---
Total users = 943
Total items = 1646

[UserKNN] Training started!


HBox(children=(FloatProgress(value=0.0, max=943.0), HTML(value='')))



[UserKNN] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=942.0, style=ProgressStyle(description_widt…



TEST:
...
[UserKNN]
       |    AUC | NDCG@-1 | Precision@10 | Precision@5 | Recall@10 | Recall@5 | Train (s) | Test (s)
------ + ------ + ------- + ------------ + ----------- + --------- + -------- + --------- + --------
Fold 0 | 0.6447 |  0.2857 |       0.0005 |      0.0002 |    0.0004 |   0.0003 |    0.1496 |   4.3083
Fold 1 | 0.6474 |  0.2851 |       0.0000 |      0.0000 |    0.0000 |   0.0000 |    0.1606 |   4.3424
Fold 2 | 0.6524 |  0.2847 |       0.0006 |      0.0004 |    0.0002 |   0.0001 |    0.1511 |   4.3764
Fold 3 | 0.6503 |  0.2844 |       0.0002 |      0.0002 |    0.0000 |   0.0000 |    0.1520 |   4.4023
Fold 4 | 0.6466 |  0.2840 |       0.0003 |      0.0002 |    0.0001 |   0.0000 |    0.1428 |   4.3486
------ + ------ + ------- + ------------ + ----------- + --------- + -------- + --------- + --------
Mean   | 0.6483 |  0.2848 |       0.0003 |      0.0002 |    0.0002 |   0.0001 |    0.1512 |   4.3556
Std    | 0.0027 |  0.0006 |       0.0002 |      0.0001 |    0.0001 | 

HBox(children=(FloatProgress(value=0.0, max=1648.0), HTML(value='')))



[UserKNN] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=943.0, style=ProgressStyle(description_widt…


Fold: 2
---
Training data:
Number of users = 943
Number of items = 1652
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 942
Number of items = 1371
Number of ratings = 19967
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 942
Number of items = 1371
Number of ratings = 19967
---
Total users = 943
Total items = 1652

[UserKNN] Training started!


HBox(children=(FloatProgress(value=0.0, max=1652.0), HTML(value='')))



[UserKNN] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=942.0, style=ProgressStyle(description_widt…


Fold: 3
---
Training data:
Number of users = 943
Number of items = 1651
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 940
Number of items = 1390
Number of ratings = 19965
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 940
Number of items = 1390
Number of ratings = 19965
---
Total users = 943
Total items = 1651

[UserKNN] Training started!


HBox(children=(FloatProgress(value=0.0, max=1651.0), HTML(value='')))



[UserKNN] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=940.0, style=ProgressStyle(description_widt…


Fold: 4
---
Training data:
Number of users = 943
Number of items = 1656
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 943
Number of items = 1397
Number of ratings = 19969
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 943
Number of items = 1397
Number of ratings = 19969
---
Total users = 943
Total items = 1656

[UserKNN] Training started!


HBox(children=(FloatProgress(value=0.0, max=1656.0), HTML(value='')))



[UserKNN] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=943.0, style=ProgressStyle(description_widt…


Fold: 5
---
Training data:
Number of users = 943
Number of items = 1646
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 942
Number of items = 1383
Number of ratings = 19959
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 942
Number of items = 1383
Number of ratings = 19959
---
Total users = 943
Total items = 1646

[UserKNN] Training started!


HBox(children=(FloatProgress(value=0.0, max=1646.0), HTML(value='')))



[UserKNN] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=942.0, style=ProgressStyle(description_widt…



TEST:
...
[UserKNN]
       |    AUC | NDCG@-1 | Precision@10 | Precision@5 | Recall@10 | Recall@5 | Train (s) | Test (s)
------ + ------ + ------- + ------------ + ----------- + --------- + -------- + --------- + --------
Fold 0 | 0.5422 |  0.2702 |       0.0011 |      0.0009 |    0.0018 |   0.0007 |    0.2706 |   7.0228
Fold 1 | 0.5458 |  0.2697 |       0.0006 |      0.0002 |    0.0004 |   0.0001 |    0.2764 |   6.9754
Fold 2 | 0.5470 |  0.2688 |       0.0004 |      0.0002 |    0.0004 |   0.0001 |    0.2691 |   6.8358
Fold 3 | 0.5458 |  0.2689 |       0.0011 |      0.0004 |    0.0015 |   0.0008 |    0.2755 |   6.9719
Fold 4 | 0.5389 |  0.2692 |       0.0007 |      0.0002 |    0.0011 |   0.0005 |    0.3181 |   7.1395
------ + ------ + ------- + ------------ + ----------- + --------- + -------- + --------- + --------
Mean   | 0.5440 |  0.2693 |       0.0008 |      0.0004 |    0.0011 |   0.0004 |    0.2819 |   6.9891
Std    | 0.0030 |  0.0005 |       0.0002 |      0.0002 |    0.0006 | 

HBox(children=(FloatProgress(value=0.0, description='Ranking', max=943.0, style=ProgressStyle(description_widt…


Fold: 2
---
Training data:
Number of users = 943
Number of items = 1652
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 942
Number of items = 1371
Number of ratings = 19967
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 942
Number of items = 1371
Number of ratings = 19967
---
Total users = 943
Total items = 1652

[BPR] Training started!

[BPR] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=942.0, style=ProgressStyle(description_widt…


Fold: 3
---
Training data:
Number of users = 943
Number of items = 1651
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 940
Number of items = 1390
Number of ratings = 19965
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 940
Number of items = 1390
Number of ratings = 19965
---
Total users = 943
Total items = 1651

[BPR] Training started!

[BPR] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=940.0, style=ProgressStyle(description_widt…


Fold: 4
---
Training data:
Number of users = 943
Number of items = 1656
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 943
Number of items = 1397
Number of ratings = 19969
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 943
Number of items = 1397
Number of ratings = 19969
---
Total users = 943
Total items = 1656

[BPR] Training started!

[BPR] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=943.0, style=ProgressStyle(description_widt…


Fold: 5
---
Training data:
Number of users = 943
Number of items = 1646
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 942
Number of items = 1383
Number of ratings = 19959
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 942
Number of items = 1383
Number of ratings = 19959
---
Total users = 943
Total items = 1646

[BPR] Training started!

[BPR] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=942.0, style=ProgressStyle(description_widt…



TEST:
...
[BPR]
       |    AUC | NDCG@-1 | Precision@10 | Precision@5 | Recall@10 | Recall@5 | Train (s) | Test (s)
------ + ------ + ------- + ------------ + ----------- + --------- + -------- + --------- + --------
Fold 0 | 0.9337 |  0.4579 |       0.1132 |      0.1203 |    0.1189 |   0.0634 |    0.8503 |   1.3876
Fold 1 | 0.9341 |  0.4585 |       0.1160 |      0.1235 |    0.1200 |   0.0679 |    0.6756 |   1.3046
Fold 2 | 0.9318 |  0.4502 |       0.1137 |      0.1194 |    0.1155 |   0.0645 |    0.6251 |   1.3013
Fold 3 | 0.9332 |  0.4505 |       0.1143 |      0.1233 |    0.1131 |   0.0621 |    0.6392 |   1.3256
Fold 4 | 0.9319 |  0.4531 |       0.1140 |      0.1206 |    0.1101 |   0.0641 |    0.6470 |   1.3250
------ + ------ + ------- + ------------ + ----------- + --------- + -------- + --------- + --------
Mean   | 0.9330 |  0.4540 |       0.1143 |      0.1214 |    0.1155 |   0.0644 |    0.6874 |   1.3288
Std    | 0.0009 |  0.0035 |       0.0010 |      0.0017 |    0.0036 |   0.

HBox(children=(FloatProgress(value=0.0, description='Ranking', max=5313.0, style=ProgressStyle(description_wid…


Fold: 2
---
Training data:
Number of users = 5685
Number of items = 1821
Number of ratings = 376569
Max rating = 5.0
Min rating = 1.0
Global mean = 3.6
---
Test data:
Number of users = 5294
Number of items = 1791
Number of ratings = 94127
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 5294
Number of items = 1791
Number of ratings = 94127
---
Total users = 5685
Total items = 1821

[MostPop] Training started!

[MostPop] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=5294.0, style=ProgressStyle(description_wid…


Fold: 3
---
Training data:
Number of users = 5684
Number of items = 1823
Number of ratings = 376568
Max rating = 5.0
Min rating = 1.0
Global mean = 3.6
---
Test data:
Number of users = 5328
Number of items = 1798
Number of ratings = 94129
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 5328
Number of items = 1798
Number of ratings = 94129
---
Total users = 5684
Total items = 1823

[MostPop] Training started!

[MostPop] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=5328.0, style=ProgressStyle(description_wid…


Fold: 4
---
Training data:
Number of users = 5684
Number of items = 1822
Number of ratings = 376569
Max rating = 5.0
Min rating = 1.0
Global mean = 3.6
---
Test data:
Number of users = 5303
Number of items = 1787
Number of ratings = 94125
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 5303
Number of items = 1787
Number of ratings = 94125
---
Total users = 5684
Total items = 1822

[MostPop] Training started!

[MostPop] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=5303.0, style=ProgressStyle(description_wid…


Fold: 5
---
Training data:
Number of users = 5682
Number of items = 1820
Number of ratings = 376569
Max rating = 5.0
Min rating = 1.0
Global mean = 3.6
---
Test data:
Number of users = 5281
Number of items = 1799
Number of ratings = 94121
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 5281
Number of items = 1799
Number of ratings = 94121
---
Total users = 5682
Total items = 1820

[MostPop] Training started!

[MostPop] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=5281.0, style=ProgressStyle(description_wid…



TEST:
...
[MostPop]
       |    AUC | NDCG@-1 | Precision@10 | Precision@5 | Recall@10 | Recall@5 | Train (s) | Test (s)
------ + ------ + ------- + ------------ + ----------- + --------- + -------- + --------- + --------
Fold 0 | 0.8300 |  0.3487 |       0.0701 |      0.0739 |    0.0698 |   0.0394 |    0.0096 |   6.0075
Fold 1 | 0.8296 |  0.3510 |       0.0710 |      0.0739 |    0.0701 |   0.0379 |    0.0097 |   6.3289
Fold 2 | 0.8275 |  0.3482 |       0.0678 |      0.0724 |    0.0687 |   0.0376 |    0.0094 |   6.9634
Fold 3 | 0.8256 |  0.3468 |       0.0684 |      0.0728 |    0.0660 |   0.0369 |    0.0106 |   8.1327
Fold 4 | 0.8250 |  0.3490 |       0.0697 |      0.0764 |    0.0666 |   0.0392 |    0.0084 |   6.3590
------ + ------ + ------- + ------------ + ----------- + --------- + -------- + --------- + --------
Mean   | 0.8276 |  0.3487 |       0.0694 |      0.0739 |    0.0682 |   0.0382 |    0.0095 |   6.7583
Std    | 0.0020 |  0.0014 |       0.0012 |      0.0014 |    0.0017 | 

HBox(children=(FloatProgress(value=0.0, max=5679.0), HTML(value='')))



[UserKNN] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=5313.0, style=ProgressStyle(description_wid…


Fold: 2
---
Training data:
Number of users = 5685
Number of items = 1821
Number of ratings = 376569
Max rating = 5.0
Min rating = 1.0
Global mean = 3.6
---
Test data:
Number of users = 5294
Number of items = 1791
Number of ratings = 94127
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 5294
Number of items = 1791
Number of ratings = 94127
---
Total users = 5685
Total items = 1821

[UserKNN] Training started!


HBox(children=(FloatProgress(value=0.0, max=5685.0), HTML(value='')))



[UserKNN] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=5294.0, style=ProgressStyle(description_wid…


Fold: 3
---
Training data:
Number of users = 5684
Number of items = 1823
Number of ratings = 376568
Max rating = 5.0
Min rating = 1.0
Global mean = 3.6
---
Test data:
Number of users = 5328
Number of items = 1798
Number of ratings = 94129
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 5328
Number of items = 1798
Number of ratings = 94129
---
Total users = 5684
Total items = 1823

[UserKNN] Training started!


HBox(children=(FloatProgress(value=0.0, max=5684.0), HTML(value='')))



[UserKNN] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=5328.0, style=ProgressStyle(description_wid…


Fold: 4
---
Training data:
Number of users = 5684
Number of items = 1822
Number of ratings = 376569
Max rating = 5.0
Min rating = 1.0
Global mean = 3.6
---
Test data:
Number of users = 5303
Number of items = 1787
Number of ratings = 94125
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 5303
Number of items = 1787
Number of ratings = 94125
---
Total users = 5684
Total items = 1822

[UserKNN] Training started!


HBox(children=(FloatProgress(value=0.0, max=5684.0), HTML(value='')))



[UserKNN] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=5303.0, style=ProgressStyle(description_wid…


Fold: 5
---
Training data:
Number of users = 5682
Number of items = 1820
Number of ratings = 376569
Max rating = 5.0
Min rating = 1.0
Global mean = 3.6
---
Test data:
Number of users = 5281
Number of items = 1799
Number of ratings = 94121
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 5281
Number of items = 1799
Number of ratings = 94121
---
Total users = 5682
Total items = 1820

[UserKNN] Training started!


HBox(children=(FloatProgress(value=0.0, max=5682.0), HTML(value='')))



[UserKNN] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=5281.0, style=ProgressStyle(description_wid…



TEST:
...
[UserKNN]
       |    AUC | NDCG@-1 | Precision@10 | Precision@5 | Recall@10 | Recall@5 | Train (s) | Test (s)
------ + ------ + ------- + ------------ + ----------- + --------- + -------- + --------- + --------
Fold 0 | 0.6733 |  0.2626 |       0.0015 |      0.0003 |    0.0031 |   0.0004 |    3.1847 |  81.1177
Fold 1 | 0.6745 |  0.2640 |       0.0016 |      0.0004 |    0.0040 |   0.0004 |    2.8929 |  71.9417
Fold 2 | 0.6721 |  0.2630 |       0.0012 |      0.0003 |    0.0034 |   0.0003 |    2.3949 |  67.8826
Fold 3 | 0.6733 |  0.2628 |       0.0009 |      0.0002 |    0.0022 |   0.0004 |    2.7825 |  64.6914
Fold 4 | 0.6725 |  0.2630 |       0.0012 |      0.0002 |    0.0027 |   0.0002 |    2.2662 |  65.9374
------ + ------ + ------- + ------------ + ----------- + --------- + -------- + --------- + --------
Mean   | 0.6731 |  0.2631 |       0.0013 |      0.0003 |    0.0031 |   0.0004 |    2.7043 |  70.3141
Std    | 0.0008 |  0.0005 |       0.0002 |      0.0001 |    0.0006 | 

HBox(children=(FloatProgress(value=0.0, max=1823.0), HTML(value='')))



[UserKNN] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=5313.0, style=ProgressStyle(description_wid…


Fold: 2
---
Training data:
Number of users = 5685
Number of items = 1821
Number of ratings = 376569
Max rating = 5.0
Min rating = 1.0
Global mean = 3.6
---
Test data:
Number of users = 5294
Number of items = 1791
Number of ratings = 94127
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 5294
Number of items = 1791
Number of ratings = 94127
---
Total users = 5685
Total items = 1821

[UserKNN] Training started!


HBox(children=(FloatProgress(value=0.0, max=1821.0), HTML(value='')))



[UserKNN] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=5294.0, style=ProgressStyle(description_wid…


Fold: 3
---
Training data:
Number of users = 5684
Number of items = 1823
Number of ratings = 376568
Max rating = 5.0
Min rating = 1.0
Global mean = 3.6
---
Test data:
Number of users = 5328
Number of items = 1798
Number of ratings = 94129
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 5328
Number of items = 1798
Number of ratings = 94129
---
Total users = 5684
Total items = 1823

[UserKNN] Training started!


HBox(children=(FloatProgress(value=0.0, max=1823.0), HTML(value='')))



[UserKNN] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=5328.0, style=ProgressStyle(description_wid…


Fold: 4
---
Training data:
Number of users = 5684
Number of items = 1822
Number of ratings = 376569
Max rating = 5.0
Min rating = 1.0
Global mean = 3.6
---
Test data:
Number of users = 5303
Number of items = 1787
Number of ratings = 94125
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 5303
Number of items = 1787
Number of ratings = 94125
---
Total users = 5684
Total items = 1822

[UserKNN] Training started!


HBox(children=(FloatProgress(value=0.0, max=1822.0), HTML(value='')))



[UserKNN] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=5303.0, style=ProgressStyle(description_wid…


Fold: 5
---
Training data:
Number of users = 5682
Number of items = 1820
Number of ratings = 376569
Max rating = 5.0
Min rating = 1.0
Global mean = 3.6
---
Test data:
Number of users = 5281
Number of items = 1799
Number of ratings = 94121
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 5281
Number of items = 1799
Number of ratings = 94121
---
Total users = 5682
Total items = 1820

[UserKNN] Training started!


HBox(children=(FloatProgress(value=0.0, max=1820.0), HTML(value='')))



[UserKNN] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=5281.0, style=ProgressStyle(description_wid…



TEST:
...
[UserKNN]
       |    AUC | NDCG@-1 | Precision@10 | Precision@5 | Recall@10 | Recall@5 | Train (s) | Test (s)
------ + ------ + ------- + ------------ + ----------- + --------- + -------- + --------- + --------
Fold 0 | 0.5712 |  0.2474 |       0.0055 |      0.0047 |    0.0033 |   0.0015 |    0.6648 |  58.0588
Fold 1 | 0.5690 |  0.2466 |       0.0041 |      0.0030 |    0.0024 |   0.0007 |    0.6971 |  45.2179
Fold 2 | 0.5693 |  0.2466 |       0.0045 |      0.0031 |    0.0029 |   0.0009 |    0.6363 |  48.7825
Fold 3 | 0.5684 |  0.2470 |       0.0051 |      0.0038 |    0.0033 |   0.0016 |    0.6334 |  48.2660
Fold 4 | 0.5683 |  0.2467 |       0.0042 |      0.0034 |    0.0022 |   0.0008 |    1.1042 |  49.5730
------ + ------ + ------- + ------------ + ----------- + --------- + -------- + --------- + --------
Mean   | 0.5692 |  0.2468 |       0.0047 |      0.0036 |    0.0028 |   0.0011 |    0.7472 |  49.9796
Std    | 0.0011 |  0.0003 |       0.0005 |      0.0006 |    0.0005 | 

HBox(children=(FloatProgress(value=0.0, description='Ranking', max=5313.0, style=ProgressStyle(description_wid…


Fold: 2
---
Training data:
Number of users = 5685
Number of items = 1821
Number of ratings = 376569
Max rating = 5.0
Min rating = 1.0
Global mean = 3.6
---
Test data:
Number of users = 5294
Number of items = 1791
Number of ratings = 94127
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 5294
Number of items = 1791
Number of ratings = 94127
---
Total users = 5685
Total items = 1821

[BPR] Training started!

[BPR] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=5294.0, style=ProgressStyle(description_wid…


Fold: 3
---
Training data:
Number of users = 5684
Number of items = 1823
Number of ratings = 376568
Max rating = 5.0
Min rating = 1.0
Global mean = 3.6
---
Test data:
Number of users = 5328
Number of items = 1798
Number of ratings = 94129
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 5328
Number of items = 1798
Number of ratings = 94129
---
Total users = 5684
Total items = 1823

[BPR] Training started!

[BPR] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=5328.0, style=ProgressStyle(description_wid…


Fold: 4
---
Training data:
Number of users = 5684
Number of items = 1822
Number of ratings = 376569
Max rating = 5.0
Min rating = 1.0
Global mean = 3.6
---
Test data:
Number of users = 5303
Number of items = 1787
Number of ratings = 94125
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 5303
Number of items = 1787
Number of ratings = 94125
---
Total users = 5684
Total items = 1822

[BPR] Training started!

[BPR] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=5303.0, style=ProgressStyle(description_wid…


Fold: 5
---
Training data:
Number of users = 5682
Number of items = 1820
Number of ratings = 376569
Max rating = 5.0
Min rating = 1.0
Global mean = 3.6
---
Test data:
Number of users = 5281
Number of items = 1799
Number of ratings = 94121
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 5281
Number of items = 1799
Number of ratings = 94121
---
Total users = 5682
Total items = 1820

[BPR] Training started!

[BPR] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Ranking', max=5281.0, style=ProgressStyle(description_wid…



TEST:
...
[BPR]
       |    AUC | NDCG@-1 | Precision@10 | Precision@5 | Recall@10 | Recall@5 | Train (s) | Test (s)
------ + ------ + ------- + ------------ + ----------- + --------- + -------- + --------- + --------
Fold 0 | 0.9031 |  0.3904 |       0.0882 |      0.0962 |    0.1009 |   0.0570 |    3.3421 |   7.3767
Fold 1 | 0.9029 |  0.3912 |       0.0898 |      0.0954 |    0.0979 |   0.0536 |    4.4609 |   7.1950
Fold 2 | 0.9023 |  0.3885 |       0.0889 |      0.0931 |    0.0939 |   0.0514 |    4.9192 |   7.0318
Fold 3 | 0.8996 |  0.3874 |       0.0876 |      0.0938 |    0.0959 |   0.0514 |    3.8431 |   7.5820
Fold 4 | 0.9000 |  0.3885 |       0.0889 |      0.0943 |    0.0944 |   0.0536 |    3.6668 |   8.2425
------ + ------ + ------- + ------------ + ----------- + --------- + -------- + --------- + --------
Mean   | 0.9016 |  0.3892 |       0.0887 |      0.0946 |    0.0966 |   0.0534 |    4.0464 |   7.4856
Std    | 0.0015 |  0.0014 |       0.0007 |      0.0011 |    0.0026 |   0.

In [6]:
# Display results of running the algorithms
results_table_headers = ["Recommender", "Pre@5", "Pre@10", "Rec@5", "Rec@10", "NDCG"]
print(tabulate(results_table, results_table_headers, tablefmt="pipe"))

| Recommender     |       Pre@5 |     Pre@10 |       Rec@5 |      Rec@10 |     NDCG |
|:----------------|------------:|-----------:|------------:|------------:|---------:|
| ML100-MostPop   | 0.0945802   | 0.0886291  | 0.0431486   | 0.0799196   | 0.405632 |
| ML100-UserKNN   | 0.00021254  | 0.00053135 | 0.000265675 | 0.000401886 | 0.285699 |
| ML100-ItemKNN   | 0.000850159 | 0.0010627  | 0.00071437  | 0.00178445  | 0.27017  |
| ML100-BPR       | 0.120298    | 0.113177   | 0.0634099   | 0.118859    | 0.45788  |
| PDA2018-MostPop | 0.0739311   | 0.0701476  | 0.0394338   | 0.0697588   | 0.348685 |
| PDA2018-UserKNN | 0.000264851 | 0.00145668 | 0.000414723 | 0.00311618  | 0.262592 |
| PDA2018-ItemKNN | 0.00469164  | 0.00548619 | 0.00149846  | 0.00333021  | 0.247353 |
| PDA2018-BPR     | 0.0962164   | 0.0881763  | 0.0569755   | 0.10091     | 0.390439 |


In [8]:
# Export data
# Export the results to a csv file
results_df = pd.DataFrame(results_table, columns=["Recommender", "Pre@5", "Pre@10", "Rec@5", "Rec@10", "NDCG"])
results_df.to_csv("../data/cornac_item_recommendation_results.csv")