In [21]:
import numpy as np
import pandas as pd
import cornac
import tensorflow as tf
from cornac.eval_methods import CrossValidation, RatioSplit
from cornac.data import Reader
from cornac.data import Dataset
from cornac.hyperopt import Discrete, GridSearch
from tabulate import tabulate

This implementation of Neural Collaborative Filtering is by using the Cornac package, which is similiar to Surprise, but has some more advanced algorithms. NCF is a generalization of the matrix factorization problem using a multilayer perceptron

# Read and load data

In [22]:
# Init cornac reader object
reader = Reader()

# Read both the datasets from the files using cornac
movielens_data = reader.read(fpath="../data/u.data", sep="\t")
pda_data = reader.read(fpath="../data/train-PDA2018.csv", sep=",", skip_lines=1)
print("Movielens")
print(movielens_data[:5])
print()
print("PDA")
print(pda_data[:5])

Movielens
[('196', '242', 3.0), ('186', '302', 3.0), ('22', '377', 1.0), ('244', '51', 2.0), ('166', '346', 1.0)]

PDA
[('5', '648', 5.0), ('5', '1394', 5.0), ('5', '3534', 5.0), ('5', '104', 4.0), ('5', '2735', 5.0)]


In [23]:
# Build the datasets (This is similar to how surprise uses build_full_trainset)
movielens_dataset = Dataset.build(movielens_data)
pda_dataset = Dataset.build(pda_data)
# Print out some basic information about the datasets
print("General information on the training sets we will be using \n")
print("1) Number of items in each dataset", " ML100k:", movielens_dataset.num_items, "PDA:", pda_dataset.num_items)
print("2) Number of users in each dataset", " ML100k:", movielens_dataset.num_users, "PDA:", pda_dataset.num_users)
print("3) Number of ratings in each dataset", " ML100k:", movielens_dataset.num_ratings, "PDA:", pda_dataset.num_ratings)
print("4) Mean rating", " ML100k:", movielens_dataset.global_mean, "PDA:", movielens_dataset.global_mean)

General information on the training sets we will be using 

1) Number of items in each dataset  ML100k: 1682 PDA: 1824
2) Number of users in each dataset  ML100k: 943 PDA: 5690
3) Number of ratings in each dataset  ML100k: 100000 PDA: 470711
4) Mean rating  ML100k: 3.52986 PDA: 3.52986


# NCF Model

In [4]:
# Now we have data that is ready to be fed to the model

# First of let's define the NCF model using cornac. Cornac gives us the possibility to tweak a lot of hyperparameters
# We'll be using the basic architecture described in the NCF Paper: https://arxiv.org/pdf/1708.05031.pdf
ncf_model = cornac.models.NeuMF(
    num_factors=8,
    act_fn="relu",
    learner="adam",
    num_epochs=15,
    batch_size=128,
    lr=0.001,
)

In [5]:
# Next we'll define the evaluation metrics for this model
mae = cornac.metrics.MAE()
rmse = cornac.metrics.RMSE()
rec_5 = cornac.metrics.Recall(k=5)
pre_5 = cornac.metrics.Precision(k=5)
rec_10 = cornac.metrics.Recall(k=10)
pre_10 = cornac.metrics.Precision(k=10)
ndcg_5 = cornac.metrics.NDCG(k=5)
ndcg_10 = cornac.metrics.NDCG(k=10)

In [12]:
# We created a simple model before so let us try and run a grid search for hyperparameter tuning

# Define the parameters we want to tune and their values
n_epochs_domain = Discrete(name="num_epochs", values=[5,10,15,20])
lr_domain = Discrete("lr", values=[0.001, 0.005, 0.01, 0.05])
reg_mf_domain = Discrete("reg_mf", values=[0.01, 0.05, 0.1, 0.5])
search_domain = [n_epochs_domain, lr_domain, reg_mf_domain]

# Define the evaluation methods that will be used for the grid search. For this part we'll use a normal 80-10-10 
# train-val-test split.

print("ML100K...")
ml_train_test = RatioSplit(
    data=movielens_data,
    val_size=0.1,
    test_size=0.1,
    rating_threshold=2.5, # This parameter is the threshold used for ranking metrics
    exclude_unknowns=False,
    verbose=True
)

# We will run the Grid Search for two measures: RMSE (Prediction) and NDCG (Ranking)
# Also for time and performance constraints we will only use the Movielens dataset for the grid search
# (since the two datasets are so similar)

# Define the two GridSearch objects, one for the rmse and the other for NDCG
pred_gs = GridSearch(model=ncf_model, space=search_domain, metric=rmse, eval_method=ml_train_test)
rank_gs = GridSearch(model=ncf_model, space=search_domain, metric=ndcg_10, eval_method=ml_train_test)

## Define GridSearch for Movielens
ncf_gridsearch = cornac.Experiment(
    eval_method=ml_train_test,
    models=[pred_gs, rank_gs],    
    metrics=[rmse, ndcg_10],
)

ML100K...
rating_threshold = 2.5
exclude_unknowns = False
---
Training data:
Number of users = 943
Number of items = 1645
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 920
Number of items = 1274
Number of ratings = 10000
Number of unknown users = 0
Number of unknown items = 17
---
Validation data:
Number of users = 917
Number of items = 1249
Number of ratings = 10000
---
Total users = 943
Total items = 1682


In [13]:
## RUN Grisearch for Movielens
gridsearch_results = ncf_gridsearch.run()


[GridSearch_NeuMF] Training started!
Evaluating: {'lr': 0.001, 'num_epochs': 5, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 5, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 5, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 5, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 10, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 10, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 10, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 10, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 15, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 15, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 15, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 15, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 20, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 20, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 20, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 20, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 5, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 5, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 5, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 5, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 10, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 10, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 10, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 10, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 15, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 15, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 15, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 15, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 20, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 20, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 20, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 20, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 5, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 5, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 5, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 5, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 10, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 10, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 10, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 10, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 15, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 15, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 15, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 15, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 20, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 20, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 20, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 20, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 5, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 5, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 5, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 5, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 10, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 10, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 10, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 10, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 15, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 15, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 15, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 15, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 20, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 20, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 20, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 20, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Best parameter settings: {'lr': 0.001, 'num_epochs': 5, 'reg_mf': 0.01}
RMSE = 2.7530

[GridSearch_NeuMF] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=10000.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=920.0, style=ProgressStyle(description_widt…




IndexError: index 1668 is out of bounds for axis 0 with size 1662

In [14]:
pred_gs.best_params

{'lr': 0.001, 'num_epochs': 5, 'reg_mf': 0.01}

# 5-fold Cross Validation

In [24]:
# Use the model parameters that were found to work best by the grid search
ncf_model = cornac.models.NeuMF(
    num_factors=8,
    act_fn="relu",
    learner="adam",
    num_epochs=5,
    batch_size=256,
    lr=0.001,
    reg_mf=0.01
)

In [19]:
# Here we are creating the cross validation procedures for the evaluation. As we can see from the parameters
# we will be running 5f CV on our model on both datasets. The objects that we construct here will be used
# in the cornac experiments in the next cell.
print("ML100K...")
ml_cv = CrossValidation(
    data=movielens_data,
    n_folds=5,
    rating_threshold=2.5, # This parameter is the threshold used for ranking metrics
    seed = 0,
    exclude_unknowns=False,
    verbose=True
)
print()
print("PDA...")
pda_cv = CrossValidation(
    data=pda_data,
    n_folds=5,
    rating_threshold=2.5,
    seed = 0,
    exclude_unknowns=False,
    verbose=True
)

ML100K...
rating_threshold = 2.5
exclude_unknowns = False

PDA...
rating_threshold = 2.5
exclude_unknowns = False


In [25]:
# RUN 5-fold cross validation w/ Grid Search on Movielens
ml_ncf = cornac.Experiment(
    eval_method=ml_cv,
    models=[ncf_model],    
    metrics=[mae, rmse, ndcg_5, ndcg_10, pre_5, pre_10, rec_5, rec_10],
)
ml_ncf.run()

Fold: 1
---
Training data:
Number of users = 943
Number of items = 1648
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 943
Number of items = 1416
Number of ratings = 20000
Number of unknown users = 0
Number of unknown items = 34
---
Validation data:
Number of users = 943
Number of items = 1416
Number of ratings = 20000
---
Total users = 943
Total items = 1682

[NeuMF] Training started!


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



[NeuMF] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=20000.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=943.0, style=ProgressStyle(description_widt…


Fold: 2
---
Training data:
Number of users = 943
Number of items = 1652
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 942
Number of items = 1401
Number of ratings = 20000
Number of unknown users = 0
Number of unknown items = 30
---
Validation data:
Number of users = 942
Number of items = 1401
Number of ratings = 20000
---
Total users = 943
Total items = 1682

[NeuMF] Training started!


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



[NeuMF] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=20000.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=942.0, style=ProgressStyle(description_widt…


Fold: 3
---
Training data:
Number of users = 943
Number of items = 1651
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 940
Number of items = 1421
Number of ratings = 20000
Number of unknown users = 0
Number of unknown items = 31
---
Validation data:
Number of users = 940
Number of items = 1421
Number of ratings = 20000
---
Total users = 943
Total items = 1682

[NeuMF] Training started!


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



[NeuMF] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=20000.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=940.0, style=ProgressStyle(description_widt…


Fold: 4
---
Training data:
Number of users = 943
Number of items = 1656
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 943
Number of items = 1423
Number of ratings = 20000
Number of unknown users = 0
Number of unknown items = 26
---
Validation data:
Number of users = 943
Number of items = 1423
Number of ratings = 20000
---
Total users = 943
Total items = 1682

[NeuMF] Training started!


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



[NeuMF] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=20000.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=943.0, style=ProgressStyle(description_widt…


Fold: 5
---
Training data:
Number of users = 943
Number of items = 1646
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 942
Number of items = 1419
Number of ratings = 20000
Number of unknown users = 0
Number of unknown items = 36
---
Validation data:
Number of users = 942
Number of items = 1419
Number of ratings = 20000
---
Total users = 943
Total items = 1682

[NeuMF] Training started!


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



[NeuMF] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=20000.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=942.0, style=ProgressStyle(description_widt…



TEST:
...
[NeuMF]
       |    MAE |   RMSE | NDCG@10 | NDCG@5 | Precision@10 | Precision@5 | Recall@10 | Recall@5 | Train (s) | Test (s)
------ + ------ + ------ + ------- + ------ + ------------ + ----------- + --------- + -------- + --------- + --------
Fold 0 | 2.5988 | 2.7876 |  0.1059 | 0.1032 |       0.0880 |      0.0990 |    0.0833 |   0.0474 |   66.2111 |  20.0365
Fold 1 | 2.6018 | 2.7943 |  0.1369 | 0.1277 |       0.1145 |      0.1243 |    0.1120 |   0.0618 |   80.7319 |  20.0575
Fold 2 | 2.5886 | 2.7787 |  0.1275 | 0.1199 |       0.1043 |      0.1155 |    0.1070 |   0.0584 |   77.6107 |  19.4486
Fold 3 | 2.5774 | 2.7714 |  0.1355 | 0.1258 |       0.1141 |      0.1220 |    0.1098 |   0.0604 |   82.7890 |  19.0722
Fold 4 | 2.5876 | 2.7770 |  0.1341 | 0.1232 |       0.1105 |      0.1172 |    0.1090 |   0.0588 |   79.8157 |  20.3870
------ + ------ + ------ + ------- + ------ + ------------ + ----------- + --------- + -------- + --------- + --------
Mean   | 2.5908 | 2.7818 |  

In [27]:
# RUN 5-fold cross validation on PDA
pda_ncf = cornac.Experiment(
    eval_method=pda_cv,
    models=[ncf_model],    
    metrics=[mae, rmse, ndcg_5, ndcg_10, pre_5, pre_10, rec_5, rec_10],
)
pda_ncf.run()

Fold: 1
---
Training data:
Number of users = 5679
Number of items = 1823
Number of ratings = 376569
Max rating = 5.0
Min rating = 1.0
Global mean = 3.6
---
Test data:
Number of users = 5324
Number of items = 1781
Number of ratings = 94142
Number of unknown users = 11
Number of unknown items = 1
---
Validation data:
Number of users = 5324
Number of items = 1781
Number of ratings = 94142
---
Total users = 5690
Total items = 1824

[NeuMF] Training started!


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



[NeuMF] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=94142.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=5324.0, style=ProgressStyle(description_wid…


Fold: 2
---
Training data:
Number of users = 5685
Number of items = 1821
Number of ratings = 376569
Max rating = 5.0
Min rating = 1.0
Global mean = 3.6
---
Test data:
Number of users = 5299
Number of items = 1794
Number of ratings = 94142
Number of unknown users = 5
Number of unknown items = 3
---
Validation data:
Number of users = 5299
Number of items = 1794
Number of ratings = 94142
---
Total users = 5690
Total items = 1824

[NeuMF] Training started!


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



[NeuMF] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=94142.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=5299.0, style=ProgressStyle(description_wid…


Fold: 3
---
Training data:
Number of users = 5684
Number of items = 1823
Number of ratings = 376568
Max rating = 5.0
Min rating = 1.0
Global mean = 3.6
---
Test data:
Number of users = 5334
Number of items = 1799
Number of ratings = 94143
Number of unknown users = 6
Number of unknown items = 1
---
Validation data:
Number of users = 5334
Number of items = 1799
Number of ratings = 94143
---
Total users = 5690
Total items = 1824

[NeuMF] Training started!


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



[NeuMF] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=94143.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=5334.0, style=ProgressStyle(description_wid…


Fold: 4
---
Training data:
Number of users = 5684
Number of items = 1822
Number of ratings = 376569
Max rating = 5.0
Min rating = 1.0
Global mean = 3.6
---
Test data:
Number of users = 5309
Number of items = 1789
Number of ratings = 94142
Number of unknown users = 6
Number of unknown items = 2
---
Validation data:
Number of users = 5309
Number of items = 1789
Number of ratings = 94142
---
Total users = 5690
Total items = 1824

[NeuMF] Training started!


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



[NeuMF] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=94142.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=5309.0, style=ProgressStyle(description_wid…


Fold: 5
---
Training data:
Number of users = 5682
Number of items = 1820
Number of ratings = 376569
Max rating = 5.0
Min rating = 1.0
Global mean = 3.6
---
Test data:
Number of users = 5289
Number of items = 1803
Number of ratings = 94142
Number of unknown users = 8
Number of unknown items = 4
---
Validation data:
Number of users = 5289
Number of items = 1803
Number of ratings = 94142
---
Total users = 5690
Total items = 1824

[NeuMF] Training started!


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



[NeuMF] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=94142.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=5289.0, style=ProgressStyle(description_wid…



TEST:
...
[NeuMF]
       |    MAE |   RMSE | NDCG@10 | NDCG@5 | Precision@10 | Precision@5 | Recall@10 | Recall@5 | Train (s) | Test (s)
------ + ------ + ------ + ------- + ------ + ------------ + ----------- + --------- + -------- + --------- + --------
Fold 0 | 2.8236 | 2.9691 |  0.1025 | 0.0953 |       0.0817 |      0.0885 |    0.0872 |   0.0488 |  390.1943 |  95.1420
Fold 1 | 2.8269 | 2.9736 |  0.1091 | 0.1010 |       0.0860 |      0.0917 |    0.0918 |   0.0493 |  388.6798 |  97.3243
Fold 2 | 2.8348 | 2.9775 |  0.1060 | 0.0976 |       0.0825 |      0.0872 |    0.0892 |   0.0485 |  391.0382 |  93.5340
Fold 3 | 2.8301 | 2.9760 |  0.1019 | 0.0950 |       0.0806 |      0.0873 |    0.0841 |   0.0473 |  377.3545 | 112.6226
Fold 4 | 2.8279 | 2.9756 |  0.0979 | 0.0919 |       0.0777 |      0.0832 |    0.0795 |   0.0450 |  404.8905 |  99.2113
------ + ------ + ------ + ------- + ------ + ------------ + ----------- + --------- + -------- + --------- + --------
Mean   | 2.8286 | 2.9744 |  

# Results

In [46]:
print("ML100 Results")
for entry in ml_ncf.result:
    print(entry[0].metric_avg_results)

OrderedDict([('MAE', 2.5987645546726577), ('RMSE', 2.7876102501735334), ('NDCG@10', 0.10588883036252389), ('NDCG@5', 0.10317558587685781), ('Precision@10', 0.08799149840595107), ('Precision@5', 0.09904357066950109), ('Recall@10', 0.08334936037982428), ('Recall@5', 0.047396971710349574), ('Train (s)', 66.21114301681519), ('Test (s)', 20.03649401664734)])


In [47]:
print("PDA2018 Results")
for entry in pda_ncf.result:
    print(entry[0].metric_avg_results)

PDA2018 Results
OrderedDict([('MAE', 2.8235679560935534), ('RMSE', 2.9691053157935787), ('NDCG@10', 0.10253133077935488), ('NDCG@5', 0.0952878940782564), ('Precision@10', 0.08172550500283378), ('Precision@5', 0.08846516896356174), ('Recall@10', 0.08718384069956338), ('Recall@5', 0.048804795802218345), ('Train (s)', 390.19425773620605), ('Test (s)', 95.14201927185059)])
