In [1]:
import numpy as np
import pandas as pd
import cornac
import tensorflow as tf
from cornac.eval_methods import CrossValidation, RatioSplit
from cornac.data import Reader
from cornac.data import Dataset
from cornac.hyperopt import Discrete, GridSearch
from tabulate import tabulate

This implementation of Neural Collaborative Filtering is done by using the Cornac package, which is similiar to Surprise, but has some more advanced algorithms. NCF is a generalization of the matrix factorization problem using a multilayer perceptron.

# Read and load data

In [2]:
# Init cornac reader object
reader = Reader()

# Read both the datasets from the files using cornac
movielens_data = reader.read(fpath="../data/u.data", sep="\t")
pda_data = reader.read(fpath="../data/train-PDA2018.csv", sep=",", skip_lines=1)
print("Movielens")
print(movielens_data[:5])
print()
print("PDA")
print(pda_data[:5])

Movielens
[('196', '242', 3.0), ('186', '302', 3.0), ('22', '377', 1.0), ('244', '51', 2.0), ('166', '346', 1.0)]

PDA
[('5', '648', 5.0), ('5', '1394', 5.0), ('5', '3534', 5.0), ('5', '104', 4.0), ('5', '2735', 5.0)]


In [3]:
# Build the datasets (This is similar to how surprise uses build_full_trainset)
movielens_dataset = Dataset.build(movielens_data)
pda_dataset = Dataset.build(pda_data)
# Print out some basic information about the datasets
print("General information on the training sets we will be using \n")
print("1) Number of items in each dataset", " ML100k:", movielens_dataset.num_items, "PDA:", pda_dataset.num_items)
print("2) Number of users in each dataset", " ML100k:", movielens_dataset.num_users, "PDA:", pda_dataset.num_users)
print("3) Number of ratings in each dataset", " ML100k:", movielens_dataset.num_ratings, "PDA:", pda_dataset.num_ratings)
print("4) Mean rating", " ML100k:", movielens_dataset.global_mean, "PDA:", movielens_dataset.global_mean)

General information on the training sets we will be using 

1) Number of items in each dataset  ML100k: 1682 PDA: 1824
2) Number of users in each dataset  ML100k: 943 PDA: 5690
3) Number of ratings in each dataset  ML100k: 100000 PDA: 470711
4) Mean rating  ML100k: 3.52986 PDA: 3.52986


# NCF Model

In [4]:
# Now we have data that is ready to be fed to the model

# First of let's define the NCF model using cornac. Cornac gives us the possibility to tweak a lot of hyperparameters
# We'll be using the basic architecture described in the NCF Paper: https://arxiv.org/pdf/1708.05031.pdf
ncf_model = cornac.models.NeuMF(
    num_factors=8,
    act_fn="relu",
    learner="adam",
    num_epochs=15,
    batch_size=128,
    lr=0.001,
)

In [5]:
# Next we'll define the evaluation metrics for this model
mae = cornac.metrics.MAE()
rmse = cornac.metrics.RMSE()
rec_5 = cornac.metrics.Recall(k=5)
pre_5 = cornac.metrics.Precision(k=5)
rec_10 = cornac.metrics.Recall(k=10)
pre_10 = cornac.metrics.Precision(k=10)
ndcg_5 = cornac.metrics.NDCG(k=5)
ndcg_10 = cornac.metrics.NDCG(k=10)

In [6]:
# We created a simple model before so let us try and run a grid search for hyperparameter tuning

# Define the parameters we want to tune and their values
n_epochs_domain = Discrete(name="num_epochs", values=[5,10,15,20])
lr_domain = Discrete("lr", values=[0.001, 0.005, 0.01, 0.05])
reg_mf_domain = Discrete("reg_mf", values=[0.01, 0.05, 0.1, 0.5])
search_domain = [n_epochs_domain, lr_domain, reg_mf_domain]

# Define the evaluation methods that will be used for the grid search. For this part we'll use a normal 80-10-10 
# train-val-test split.

print("ML100K...")
ml_train_test = RatioSplit(
    data=movielens_data,
    val_size=0.1,
    test_size=0.1,
    rating_threshold=3, # This parameter is the threshold used for ranking metrics
    verbose=True
)

# We will run the Grid Search for two measures: RMSE (Prediction) and NDCG (Ranking)
# Also for time and performance constraints we will only use the Movielens data for the grid search
# since the two datasets are very similar)

# Define the two GridSearch objects, one for the rmse and the other for NDCG
pred_gs = GridSearch(model=ncf_model, space=search_domain, metric=rmse, eval_method=ml_train_test)
rank_gs = GridSearch(model=ncf_model, space=search_domain, metric=ndcg_5, eval_method=ml_train_test)

## Define GridSearch for Movielens
ncf_gridsearch = cornac.Experiment(
    eval_method=ml_train_test,
    models=[pred_gs, rank_gs],    
    metrics=[rmse, ndcg_5],
)

ML100K...
rating_threshold = 3.0
exclude_unknowns = True
---
Training data:
Number of users = 943
Number of items = 1655
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 927
Number of items = 1242
Number of ratings = 9985
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 926
Number of items = 1250
Number of ratings = 9983
---
Total users = 943
Total items = 1655


In [7]:
## RUN Grisearch for Movielens
gridsearch_results = ncf_gridsearch.run()


[GridSearch_NeuMF] Training started!
Evaluating: {'lr': 0.001, 'num_epochs': 5, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 5, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 5, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 5, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 10, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 10, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 10, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 10, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 15, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 15, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 15, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 15, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 20, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 20, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 20, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 20, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 5, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 5, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 5, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 5, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 10, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 10, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 10, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 10, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 15, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 15, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 15, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 15, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 20, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 20, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 20, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 20, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 5, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 5, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 5, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 5, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 10, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 10, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 10, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 10, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 15, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 15, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 15, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 15, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 20, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 20, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 20, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 20, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 5, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 5, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 5, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 5, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 10, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 10, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 10, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 10, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 15, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 15, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 15, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 15, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 20, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 20, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 20, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 20, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Best parameter settings: {'lr': 0.001, 'num_epochs': 5, 'reg_mf': 0.01}
RMSE = 2.7685

[GridSearch_NeuMF] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=9985.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=927.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='Rating', max=9983.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=926.0, style=ProgressStyle(description_widt…



[GridSearch_NeuMF] Training started!
Evaluating: {'lr': 0.001, 'num_epochs': 5, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 5, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 5, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 5, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 10, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 10, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 10, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 10, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 15, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 15, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 15, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 15, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 20, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 20, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 20, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.001, 'num_epochs': 20, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 5, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 5, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 5, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 5, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 10, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 10, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 10, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 10, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 15, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 15, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 15, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 15, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 20, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 20, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 20, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.005, 'num_epochs': 20, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 5, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 5, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 5, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 5, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 10, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 10, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 10, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 10, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 15, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 15, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 15, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 15, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 20, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 20, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 20, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.01, 'num_epochs': 20, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 5, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 5, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 5, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 5, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 10, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 10, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 10, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 10, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 15, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 15, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 15, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 15, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 20, 'reg_mf': 0.01}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 20, 'reg_mf': 0.05}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 20, 'reg_mf': 0.1}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Evaluating: {'lr': 0.05, 'num_epochs': 20, 'reg_mf': 0.5}


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Best parameter settings: {'lr': 0.001, 'num_epochs': 15, 'reg_mf': 0.1}
NDCG@5 = 0.0765

[GridSearch_NeuMF] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=9985.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=927.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='Rating', max=9983.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=926.0, style=ProgressStyle(description_widt…



VALIDATION:
...
                 |   RMSE | NDCG@5 | Time (s)
---------------- + ------ + ------ + --------
GridSearch_NeuMF | 2.7440 | 0.0637 |   9.2396
GridSearch_NeuMF | 2.7440 | 0.0765 |   6.8326

TEST:
...
                 |   RMSE | NDCG@5 | Train (s) | Test (s)
---------------- + ------ + ------ + --------- + --------
GridSearch_NeuMF | 2.7363 | 0.0783 | 9021.7504 |   8.3793
GridSearch_NeuMF | 2.7363 | 0.0665 | 8747.0747 |   6.9001



In [8]:
pred_gs.best_params

{'lr': 0.001, 'num_epochs': 5, 'reg_mf': 0.01}

# 5-fold Cross Validation

In [9]:
# Use the model parameters that were found to work best by the grid search
ncf_model = cornac.models.NeuMF(
    num_factors=8,
    act_fn="relu",
    learner="adam",
    num_epochs=5,
    batch_size=256,
    lr=0.001,
    reg_mf=0.01
)

In [10]:
# Here we are creating the cross validation procedures for the evaluation. As we can see from the parameters
# we will be running 5f CV on our model on both datasets. The objects that we construct here will be used
# in the cornac experiments in the next cell.
print("ML100K...")
ml_cv = CrossValidation(
    data=movielens_data,
    n_folds=5,
    rating_threshold=2.5, # This parameter is the threshold used for ranking metrics
    seed = 0,
    exclude_unknowns=False,
    verbose=True
)
print()
print("PDA...")
pda_cv = CrossValidation(
    data=pda_data,
    n_folds=5,
    rating_threshold=2.5,
    seed = 0,
    exclude_unknowns=False,
    verbose=True
)

ML100K...
rating_threshold = 2.5
exclude_unknowns = False

PDA...
rating_threshold = 2.5
exclude_unknowns = False


In [11]:
# RUN 5-fold cross validation w/ Grid Search on Movielens
ml_ncf = cornac.Experiment(
    eval_method=ml_cv,
    models=[ncf_model],    
    metrics=[mae, rmse, ndcg_5, ndcg_10, pre_5, pre_10, rec_5, rec_10],
)
ml_ncf.run()

Fold: 1
---
Training data:
Number of users = 943
Number of items = 1648
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 943
Number of items = 1416
Number of ratings = 20000
Number of unknown users = 0
Number of unknown items = 34
---
Validation data:
Number of users = 943
Number of items = 1416
Number of ratings = 20000
---
Total users = 943
Total items = 1682

[NeuMF] Training started!


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



[NeuMF] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=20000.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=943.0, style=ProgressStyle(description_widt…


Fold: 2
---
Training data:
Number of users = 943
Number of items = 1652
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 942
Number of items = 1401
Number of ratings = 20000
Number of unknown users = 0
Number of unknown items = 30
---
Validation data:
Number of users = 942
Number of items = 1401
Number of ratings = 20000
---
Total users = 943
Total items = 1682

[NeuMF] Training started!


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



[NeuMF] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=20000.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=942.0, style=ProgressStyle(description_widt…


Fold: 3
---
Training data:
Number of users = 943
Number of items = 1651
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 940
Number of items = 1421
Number of ratings = 20000
Number of unknown users = 0
Number of unknown items = 31
---
Validation data:
Number of users = 940
Number of items = 1421
Number of ratings = 20000
---
Total users = 943
Total items = 1682

[NeuMF] Training started!


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



[NeuMF] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=20000.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=940.0, style=ProgressStyle(description_widt…


Fold: 4
---
Training data:
Number of users = 943
Number of items = 1656
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 943
Number of items = 1423
Number of ratings = 20000
Number of unknown users = 0
Number of unknown items = 26
---
Validation data:
Number of users = 943
Number of items = 1423
Number of ratings = 20000
---
Total users = 943
Total items = 1682

[NeuMF] Training started!


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



[NeuMF] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=20000.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=943.0, style=ProgressStyle(description_widt…


Fold: 5
---
Training data:
Number of users = 943
Number of items = 1646
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 942
Number of items = 1419
Number of ratings = 20000
Number of unknown users = 0
Number of unknown items = 36
---
Validation data:
Number of users = 942
Number of items = 1419
Number of ratings = 20000
---
Total users = 943
Total items = 1682

[NeuMF] Training started!


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



[NeuMF] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=20000.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=942.0, style=ProgressStyle(description_widt…



TEST:
...
[NeuMF]
       |    MAE |   RMSE | NDCG@10 | NDCG@5 | Precision@10 | Precision@5 | Recall@10 | Recall@5 | Train (s) | Test (s)
------ + ------ + ------ + ------- + ------ + ------------ + ----------- + --------- + -------- + --------- + --------
Fold 0 | 2.5988 | 2.7876 |  0.1299 | 0.1245 |       0.1068 |      0.1152 |    0.0996 |   0.0573 |   51.1113 |  12.0869
Fold 1 | 2.6018 | 2.7943 |  0.1320 | 0.1224 |       0.1090 |      0.1165 |    0.1061 |   0.0585 |   51.2636 |  13.5429
Fold 2 | 2.5886 | 2.7787 |  0.1285 | 0.1202 |       0.1030 |      0.1115 |    0.1037 |   0.0572 |   51.7263 |  12.1934
Fold 3 | 2.5774 | 2.7714 |  0.1393 | 0.1342 |       0.1146 |      0.1248 |    0.1057 |   0.0627 |   51.2672 |  13.2277
Fold 4 | 2.5876 | 2.7770 |  0.1315 | 0.1234 |       0.1066 |      0.1157 |    0.1043 |   0.0587 |   51.7993 |  12.0925
------ + ------ + ------ + ------- + ------ + ------------ + ----------- + --------- + -------- + --------- + --------
Mean   | 2.5908 | 2.7818 |  

In [12]:
# RUN 5-fold cross validation on PDA
pda_ncf = cornac.Experiment(
    eval_method=pda_cv,
    models=[ncf_model],    
    metrics=[mae, rmse, ndcg_5, ndcg_10, pre_5, pre_10, rec_5, rec_10],
)
pda_ncf.run()

Fold: 1
---
Training data:
Number of users = 5679
Number of items = 1823
Number of ratings = 376569
Max rating = 5.0
Min rating = 1.0
Global mean = 3.6
---
Test data:
Number of users = 5324
Number of items = 1781
Number of ratings = 94142
Number of unknown users = 11
Number of unknown items = 1
---
Validation data:
Number of users = 5324
Number of items = 1781
Number of ratings = 94142
---
Total users = 5690
Total items = 1824

[NeuMF] Training started!


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



[NeuMF] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=94142.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=5324.0, style=ProgressStyle(description_wid…


Fold: 2
---
Training data:
Number of users = 5685
Number of items = 1821
Number of ratings = 376569
Max rating = 5.0
Min rating = 1.0
Global mean = 3.6
---
Test data:
Number of users = 5299
Number of items = 1794
Number of ratings = 94142
Number of unknown users = 5
Number of unknown items = 3
---
Validation data:
Number of users = 5299
Number of items = 1794
Number of ratings = 94142
---
Total users = 5690
Total items = 1824

[NeuMF] Training started!


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



[NeuMF] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=94142.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=5299.0, style=ProgressStyle(description_wid…


Fold: 3
---
Training data:
Number of users = 5684
Number of items = 1823
Number of ratings = 376568
Max rating = 5.0
Min rating = 1.0
Global mean = 3.6
---
Test data:
Number of users = 5334
Number of items = 1799
Number of ratings = 94143
Number of unknown users = 6
Number of unknown items = 1
---
Validation data:
Number of users = 5334
Number of items = 1799
Number of ratings = 94143
---
Total users = 5690
Total items = 1824

[NeuMF] Training started!


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



[NeuMF] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=94143.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=5334.0, style=ProgressStyle(description_wid…


Fold: 4
---
Training data:
Number of users = 5684
Number of items = 1822
Number of ratings = 376569
Max rating = 5.0
Min rating = 1.0
Global mean = 3.6
---
Test data:
Number of users = 5309
Number of items = 1789
Number of ratings = 94142
Number of unknown users = 6
Number of unknown items = 2
---
Validation data:
Number of users = 5309
Number of items = 1789
Number of ratings = 94142
---
Total users = 5690
Total items = 1824

[NeuMF] Training started!


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



[NeuMF] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=94142.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=5309.0, style=ProgressStyle(description_wid…


Fold: 5
---
Training data:
Number of users = 5682
Number of items = 1820
Number of ratings = 376569
Max rating = 5.0
Min rating = 1.0
Global mean = 3.6
---
Test data:
Number of users = 5289
Number of items = 1803
Number of ratings = 94142
Number of unknown users = 8
Number of unknown items = 4
---
Validation data:
Number of users = 5289
Number of items = 1803
Number of ratings = 94142
---
Total users = 5690
Total items = 1824

[NeuMF] Training started!


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



[NeuMF] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=94142.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=5289.0, style=ProgressStyle(description_wid…



TEST:
...
[NeuMF]
       |    MAE |   RMSE | NDCG@10 | NDCG@5 | Precision@10 | Precision@5 | Recall@10 | Recall@5 | Train (s) | Test (s)
------ + ------ + ------ + ------- + ------ + ------------ + ----------- + --------- + -------- + --------- + --------
Fold 0 | 2.8236 | 2.9691 |  0.1047 | 0.0987 |       0.0827 |      0.0909 |    0.0877 |   0.0487 |  251.7973 |  58.7208
Fold 1 | 2.8269 | 2.9736 |  0.1039 | 0.0969 |       0.0823 |      0.0882 |    0.0855 |   0.0472 |  248.8930 |  58.7856
Fold 2 | 2.8348 | 2.9775 |  0.1069 | 0.0990 |       0.0835 |      0.0890 |    0.0891 |   0.0499 |  247.6447 |  59.7625
Fold 3 | 2.8301 | 2.9760 |  0.1007 | 0.0945 |       0.0807 |      0.0884 |    0.0843 |   0.0471 |  248.2407 |  58.5994
Fold 4 | 2.8279 | 2.9756 |  0.1014 | 0.0946 |       0.0812 |      0.0864 |    0.0823 |   0.0464 |  247.5853 |  64.0015
------ + ------ + ------ + ------- + ------ + ------------ + ----------- + --------- + -------- + --------- + --------
Mean   | 2.8286 | 2.9744 |  

# Results

In [13]:
print("ML100 Results")
for entry in ml_ncf.result:
    results_dict = entry[0].metric_avg_results
    ml_results_temp_df = pd.DataFrame(results_dict, index=['ML100-NCF' for key in results_dict.keys()])
    ml_results_df = ml_results_temp_df.iloc[0, :].T
    print(ml_results_df)

ML100 Results
MAE              2.598765
RMSE             2.787610
NDCG@10          0.129924
NDCG@5           0.124456
Precision@10     0.106801
Precision@5      0.115197
Recall@10        0.099639
Recall@5         0.057339
Train (s)       51.111275
Test (s)        12.086863
Name: ML100-NCF, dtype: float64


In [14]:
print("PDA2018 Results")
for entry in pda_ncf.result:
    results_dict = entry[0].metric_avg_results
    pda_results_temp_df = pd.DataFrame(results_dict, index=['PDA-NCF' for key in results_dict.keys()])
    pda_results_df = pda_results_temp_df.iloc[0, :].T
    print(pda_results_df)

PDA2018 Results
MAE               2.823568
RMSE              2.969105
NDCG@10           0.104685
NDCG@5            0.098732
Precision@10      0.082688
Precision@5       0.090919
Recall@10         0.087711
Recall@5          0.048714
Train (s)       251.797252
Test (s)         58.720833
Name: PDA-NCF, dtype: float64


In [15]:
# Export data
final_results_df = ml_results_df.append(pda_results_df)
final_results_df.to_csv("../data/ncf_results.csv")