In [2]:
import pandas as pd
import numpy as np
import sqlite3
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import seaborn as sns
from tqdm import tqdm

import cornac
from cornac.data import Reader
from cornac.eval_methods import BaseMethod, RatioSplit
from cornac.models import Recommender, BPR, WMF

%load_ext autoreload
%autoreload 2
%matplotlib inline
# %tensorflow_version 1.x
import tensorflow as tf

print(f"Cornac version: {cornac.__version__}")
print(f"Tensorflow version: {tf.__version__}")


SEED = 2020
VERBOSE = False

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Cornac version: 1.7.1
Tensorflow version: 1.15.0


In [75]:
reader = Reader()
df = reader.read('D:/CS608 Project/filtered_triplets.csv', sep="\t", skip_lines=1)
train_data = reader.read('D:/CS608 Project/triplets_train.csv', sep="\t", skip_lines=1)
test_data = reader.read('D:/CS608 Project/triplets_test.csv', sep="\t", skip_lines=1)

In [86]:
# base_method = BaseMethod.from_splits(train_data, test_data, exclude_unknowns=True, seed=SEED, verbose=True)

eval_metrics = [
  cornac.metrics.AUC(),
  cornac.metrics.Precision(k=10),
  cornac.metrics.Recall(k=10),
  cornac.metrics.FMeasure(k=10),
  cornac.metrics.NDCG(k=[10]),
  cornac.metrics.NCRR(k=[10]),
  cornac.metrics.MRR(),
  cornac.metrics.MAP()
]

## Implicit Feedback Model

### 1. BPR

In [87]:
from cornac.hyperopt import Discrete, Continuous
from cornac.hyperopt import GridSearch, RandomSearch

bpr = BPR(max_iter=100, verbose=True, seed=2020)
rs = RatioSplit(data=df, test_size=0.1, val_size=0.1, early_stop=True, verbose=True)
recall_10 = cornac.metrics.Recall(k=10)

# Wrap BPR model inside GridSearch along with the searching space
gs_bpr = GridSearch(
    model=bpr,
    space=[
        Discrete("k", [20, 60, 100, 120]),
        Discrete('learning_rate', [1e-4, 1e-3]), 
        Discrete("lambda_reg", [1e-2, 1e-3]),
    ],
    metric=recall_10,
    eval_method = rs,
)


rating_threshold = 1.0
exclude_unknowns = True
---
Training data:
Number of users = 41943
Number of items = 164682
Number of ratings = 1537968
Max rating = 150.0
Min rating = 5.0
Global mean = 11.0
---
Test data:
Number of users = 40987
Number of items = 53903
Number of ratings = 184053
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 41080
Number of items = 53791
Number of ratings = 184415
---
Total users = 41943
Total items = 164682


In [89]:
# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=rs,
    models=[gs_bpr],
    metrics=eval_metrics,
    user_based=False,
).run()


[GridSearch_BPR] Training started!
Evaluating: {'k': 20, 'lambda_reg': 0.001, 'learning_rate': 0.0001}


HBox(children=(IntProgress(value=0), HTML(value='')))


Optimization finished!
Evaluating: {'k': 20, 'lambda_reg': 0.001, 'learning_rate': 0.001}


HBox(children=(IntProgress(value=0), HTML(value='')))


Optimization finished!
Evaluating: {'k': 20, 'lambda_reg': 0.01, 'learning_rate': 0.0001}


HBox(children=(IntProgress(value=0), HTML(value='')))


Optimization finished!
Evaluating: {'k': 20, 'lambda_reg': 0.01, 'learning_rate': 0.001}


HBox(children=(IntProgress(value=0), HTML(value='')))


Optimization finished!
Evaluating: {'k': 60, 'lambda_reg': 0.001, 'learning_rate': 0.0001}


HBox(children=(IntProgress(value=0), HTML(value='')))


Optimization finished!
Evaluating: {'k': 60, 'lambda_reg': 0.001, 'learning_rate': 0.001}


HBox(children=(IntProgress(value=0), HTML(value='')))


Optimization finished!
Evaluating: {'k': 60, 'lambda_reg': 0.01, 'learning_rate': 0.0001}


HBox(children=(IntProgress(value=0), HTML(value='')))


Optimization finished!
Evaluating: {'k': 60, 'lambda_reg': 0.01, 'learning_rate': 0.001}


HBox(children=(IntProgress(value=0), HTML(value='')))


Optimization finished!
Evaluating: {'k': 100, 'lambda_reg': 0.001, 'learning_rate': 0.0001}


HBox(children=(IntProgress(value=0), HTML(value='')))


Optimization finished!
Evaluating: {'k': 100, 'lambda_reg': 0.001, 'learning_rate': 0.001}


HBox(children=(IntProgress(value=0), HTML(value='')))


Optimization finished!
Evaluating: {'k': 100, 'lambda_reg': 0.01, 'learning_rate': 0.0001}


HBox(children=(IntProgress(value=0), HTML(value='')))


Optimization finished!
Evaluating: {'k': 100, 'lambda_reg': 0.01, 'learning_rate': 0.001}


HBox(children=(IntProgress(value=0), HTML(value='')))


Optimization finished!
Evaluating: {'k': 120, 'lambda_reg': 0.001, 'learning_rate': 0.0001}


HBox(children=(IntProgress(value=0), HTML(value='')))


Optimization finished!
Evaluating: {'k': 120, 'lambda_reg': 0.001, 'learning_rate': 0.001}


HBox(children=(IntProgress(value=0), HTML(value='')))


Optimization finished!
Evaluating: {'k': 120, 'lambda_reg': 0.01, 'learning_rate': 0.0001}


HBox(children=(IntProgress(value=0), HTML(value='')))


Optimization finished!
Evaluating: {'k': 120, 'lambda_reg': 0.01, 'learning_rate': 0.001}


HBox(children=(IntProgress(value=0), HTML(value='')))


Optimization finished!
Best parameter settings: {'k': 20, 'lambda_reg': 0.001, 'learning_rate': 0.0001}
Recall@10 = 0.0196

[GridSearch_BPR] Evaluation started!


HBox(children=(IntProgress(value=0, description='Ranking', max=40987, style=ProgressStyle(description_width='i…




HBox(children=(IntProgress(value=0, description='Ranking', max=41080, style=ProgressStyle(description_width='i…



VALIDATION:
...
               |    AUC |  F1@10 |    MAP |    MRR | NCRR@10 | NDCG@10 | Precision@10 | Recall@10 |  Time (s)
-------------- + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + ---------
GridSearch_BPR | 0.8833 | 0.0112 | 0.0110 | 0.0324 |  0.0126 |  0.0148 |       0.0085 |    0.0196 | 4815.5898

TEST:
...
               |    AUC |  F1@10 |    MAP |    MRR | NCRR@10 | NDCG@10 | Precision@10 | Recall@10 |  Train (s) |  Test (s)
-------------- + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + ---------- + ---------
GridSearch_BPR | 0.8824 | 0.0108 | 0.0107 | 0.0318 |  0.0122 |  0.0143 |       0.0082 |    0.0192 | 26902.6740 | 4721.2327



In [90]:
gs_bpr_2 = GridSearch(
    model=bpr,
    space=[
        Discrete("k", [10, 20, 30]),
        Discrete('learning_rate', [1e-4]), 
        Discrete("lambda_reg", [1e-4, 1e-3]),
    ],
    metric=recall_10,
    eval_method = rs,
)

cornac.Experiment(
    eval_method=rs,
    models=[gs_bpr_2],
    metrics=eval_metrics,
    user_based=False,
).run()


[GridSearch_BPR] Training started!
Evaluating: {'k': 10, 'lambda_reg': 0.0001, 'learning_rate': 0.0001}


HBox(children=(IntProgress(value=0), HTML(value='')))


Optimization finished!
Evaluating: {'k': 10, 'lambda_reg': 0.001, 'learning_rate': 0.0001}


HBox(children=(IntProgress(value=0), HTML(value='')))


Optimization finished!
Evaluating: {'k': 20, 'lambda_reg': 0.0001, 'learning_rate': 0.0001}


HBox(children=(IntProgress(value=0), HTML(value='')))


Optimization finished!
Evaluating: {'k': 20, 'lambda_reg': 0.001, 'learning_rate': 0.0001}


HBox(children=(IntProgress(value=0), HTML(value='')))


Optimization finished!
Evaluating: {'k': 30, 'lambda_reg': 0.0001, 'learning_rate': 0.0001}


HBox(children=(IntProgress(value=0), HTML(value='')))


Optimization finished!
Evaluating: {'k': 30, 'lambda_reg': 0.001, 'learning_rate': 0.0001}


HBox(children=(IntProgress(value=0), HTML(value='')))


Optimization finished!
Best parameter settings: {'k': 10, 'lambda_reg': 0.0001, 'learning_rate': 0.0001}
Recall@10 = 0.0196

[GridSearch_BPR] Evaluation started!


HBox(children=(IntProgress(value=0, description='Ranking', max=40987, style=ProgressStyle(description_width='i…




HBox(children=(IntProgress(value=0, description='Ranking', max=41080, style=ProgressStyle(description_width='i…



VALIDATION:
...
               |    AUC |  F1@10 |    MAP |    MRR | NCRR@10 | NDCG@10 | Precision@10 | Recall@10 |  Time (s)
-------------- + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + ---------
GridSearch_BPR | 0.8823 | 0.0112 | 0.0110 | 0.0324 |  0.0126 |  0.0148 |       0.0085 |    0.0196 | 5009.8855

TEST:
...
               |    AUC |  F1@10 |    MAP |    MRR | NCRR@10 | NDCG@10 | Precision@10 | Recall@10 | Train (s) |  Test (s)
-------------- + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + ---------
GridSearch_BPR | 0.8817 | 0.0108 | 0.0107 | 0.0318 |  0.0122 |  0.0143 |       0.0082 |    0.0192 | 9495.3565 | 5076.5292



In [None]:
base_method = BaseMethod.from_splits(train_data, test_data, exclude_unknowns=True, seed=SEED, verbose=True)
bpr = BPR(k=20, max_iter=200, learning_rate=0.0001, lambda_reg=0.0001, verbose=True, seed=SEED, name=f"song_BPR")
cornac.Experiment(eval_method=base_method, models=[bpr], metrics=eval_metrics).run()

### 2. WMF

In [100]:
from cornac.hyperopt import Discrete, Continuous
from cornac.hyperopt import GridSearch, RandomSearch

wmf = WMF(a=1.0, lambda_u=0.001, lambda_v=0.001, max_iter=100, verbose=True, seed=2020)
auc = cornac.metrics.AUC()

# Wrap WMF model inside GridSearch along with the searching space
gs_wmf = GridSearch(
    model=wmf,
    space=[
        Discrete("k", [20, 40, 60]),
        Discrete('learning_rate', [1e-4, 1e-3]), 
        Discrete("b", [1e-2, 1e-3]),
    ],
    metric=auc,
    eval_method = rs,
)

In [101]:
# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=rs,
    models=[gs_wmf],
    metrics=eval_metrics,
    user_based=False,
).run()


[GridSearch_WMF] Training started!
Evaluating: {'b': 0.001, 'k': 20, 'learning_rate': 0.0001}


HBox(children=(IntProgress(value=0), HTML(value='')))

KeyboardInterrupt: 

In [None]:
base_method = BaseMethod.from_splits(train_data, test_data, exclude_unknowns=True, seed=SEED, verbose=True)
wmf = WMF(k=20, max_iter=200, a=1.0, b=0.01, learning_rate=0.001, lambda_u=0.01, lambda_v=0.01,
          verbose=True, seed=SEED)

cornac.Experiment(eval_method=base_method, models=[wmf], metrics=eval_metrics).run()

rating_threshold = 1.0
exclude_unknowns = True
---
Training data:
Number of users = 41943
Number of items = 153590
Number of ratings = 1288050
Max rating = 150.0
Min rating = 5.0
Global mean = 11.0
---
Test data:
Number of users = 41943
Number of items = 87576
Number of ratings = 603916
Number of unknown users = 0
Number of unknown items = 0
---
Total users = 41943
Total items = 153590

[WMF] Training started!


HBox(children=(IntProgress(value=0, max=200), HTML(value='')))