In [None]:
import os
import sys
sys.path.append("..")
from nbr.preparation import Preprocess, save_split, Corpus
from nbr.trainer import NBRTrainer
from nbr.model import BPR, SLRC, NBRKNN
import torch
import random
import numpy as np
import optuna
import warnings
warnings.filterwarnings("ignore")

# TaFeng

Fix seed:

In [None]:
seed = 10
torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)

Read interactions data (filter users with less than 5 transactions, high purchase frequency and one-day users and items with less than 10 transactions). Train dataset - all baskets except the last two, validation dataset - the last but one basket, test dataset - the last basket:

In [None]:
corpus_path = "./data/"
dataset_name = "ta_feng"

preprocessor = Preprocess(corpus_path, dataset_name)
preprocessor.load_data(5, 10, filt=True)
save_split(corpus_path, dataset_name, preprocessor)

Before preprocessing: #users = 32266, #items = 23812, #clicks = 817741 (#illegal records = 0)
After preprocessing: #users = 7358, #items = 11202, #clicks = 368951
Saving dataset in ./data//data_ta_feng/...


In [None]:
corpus = Corpus(corpus_path, dataset_name)
corpus.load_data()

Train SLRC model with best hyperparams (see ./testing_slrc.ipynb):

In [None]:
trainer = NBRTrainer(
    corpus=corpus,
    max_epochs=20,
    topk=10,
    early_stop_num=3
)

train dataset preparing...


100%|██████████| 7358/7358 [00:11<00:00, 638.75it/s]


dev dataset preparing...


100%|██████████| 7357/7357 [00:01<00:00, 4386.31it/s]


test dataset preparing...


100%|██████████| 7357/7357 [00:02<00:00, 2830.84it/s]


In [None]:
slrc_best_params = {'emb_size': 64, 'batch_size': 64, 'lr': 0.0006142297613045982, 'l2_reg_coef': 0.0047331742711911855}

params = {
    "model": SLRC(
        base_model_class=BPR,
        base_model_config={
            "emb_size": slrc_best_params["emb_size"],
            "user_num": corpus.n_users,
            "item_num": corpus.n_items,
            "click_num": corpus.n_clicks
        },
        item_num=corpus.n_items,
        avg_repeat_interval=corpus.total_avg_interval
    ),
    "batch_size": slrc_best_params["batch_size"],
    "lr": slrc_best_params["lr"],
    "l2_reg_coef": slrc_best_params["l2_reg_coef"]
}

trainer.init_hyperparams(**params)

In [None]:
trainer.train()

Epoch 1:


Batch loss = 0.648382: 100%|██████████| 4445/4445 [00:46<00:00, 95.77it/s]


Evaluation (dev):



100%|██████████| 7357/7357 [01:18<00:00, 94.15it/s] 


 {'precision': 0.05296996058175887, 'recall': 0.11711213174337326, 'ndcg': 0.10312238567113029}
Epoch 2:



Batch loss = 0.620238: 100%|██████████| 4445/4445 [00:44<00:00, 99.37it/s] 


Evaluation (dev):



100%|██████████| 7357/7357 [01:19<00:00, 92.87it/s]


 {'precision': 0.055008835123012106, 'recall': 0.12688101431125864, 'ndcg': 0.10693213499769577}





Epoch 3:


Batch loss = 0.516222: 100%|██████████| 4445/4445 [00:44<00:00, 99.62it/s]


Evaluation (dev):



100%|██████████| 7357/7357 [01:15<00:00, 97.55it/s] 


 {'precision': 0.05620497485388066, 'recall': 0.13271546807521978, 'ndcg': 0.1095805846360861}
Epoch 4:



Batch loss = 0.485183: 100%|██████████| 4445/4445 [00:44<00:00, 100.69it/s]


Evaluation (dev):



100%|██████████| 7357/7357 [01:17<00:00, 95.05it/s]


 {'precision': 0.05596030990893027, 'recall': 0.13146033295519385, 'ndcg': 0.10894357560113532}
Epoch 5:



Batch loss = 0.424968: 100%|██████████| 4445/4445 [00:44<00:00, 100.76it/s]


Evaluation (dev):



100%|██████████| 7357/7357 [01:14<00:00, 98.15it/s] 


 {'precision': 0.055892347424221826, 'recall': 0.1296131429002266, 'ndcg': 0.10869963177684373}
Epoch 6:



Batch loss = 0.407367: 100%|██████████| 4445/4445 [00:44<00:00, 99.59it/s] 


Evaluation (dev):



100%|██████████| 7357/7357 [01:17<00:00, 95.38it/s] 


 {'precision': 0.05601467989669703, 'recall': 0.12955432394369182, 'ndcg': 0.10851308820929444}





SLRC(
  (base_model): BPR(
    (user_emb): Embedding(7358, 64)
    (item_emb): Embedding(11202, 64)
  )
)

Save users' embeddings for validation step (user's embedding for validation step is his vector of recommendation scores for all items at the time of validation):

In [None]:
dev_user_emb = trainer.get_predictions(mode="dev")

100%|██████████| 7357/7357 [01:27<00:00, 84.54it/s] 


Tune KNN hyperparams on validation dataset:

In [None]:
trainer = NBRTrainer(
    corpus=corpus,
    max_epochs=None,
    topk=10,
    early_stop_num=None
)

train dataset preparing...


100%|██████████| 7358/7358 [00:11<00:00, 645.05it/s]


dev dataset preparing...


100%|██████████| 7357/7357 [00:01<00:00, 4430.63it/s]


test dataset preparing...


100%|██████████| 7357/7357 [00:01<00:00, 3869.20it/s]


In [None]:
def objective(trial):
    params = {
        "model": NBRKNN(
            item_num=corpus.n_items,
            user_num=corpus.n_users,
            nearest_neighbors_num=trial.suggest_int("nearest_neighbors_num", low=1, high=200),
            alpha=trial.suggest_float("alpha", 0.0, 1.0, step=0.05),
            user_emb=dev_user_emb
        )
    }

    trainer.init_hyperparams(**params)
    metrics = trainer.evaluate(mode="dev")
    score = metrics["ndcg"]
    return score

In [None]:
sampler = optuna.samplers.TPESampler(seed=seed)
study = optuna.create_study(direction="maximize", sampler=sampler)
study.optimize(objective, n_trials=25)

[32m[I 2023-04-23 17:16:00,621][0m A new study created in memory with name: no-name-81d739ff-b480-460a-bfa0-1ef674af2e74[0m
100%|██████████| 7357/7357 [06:17<00:00, 19.47it/s]
[32m[I 2023-04-23 17:22:23,884][0m Trial 0 finished with value: 0.0653908618873418 and parameters: {'nearest_neighbors_num': 155, 'alpha': 0.0}. Best is trial 0 with value: 0.0653908618873418.[0m
100%|██████████| 7357/7357 [06:11<00:00, 19.79it/s]
[32m[I 2023-04-23 17:28:39,754][0m Trial 1 finished with value: 0.11081685513222486 and parameters: {'nearest_neighbors_num': 127, 'alpha': 0.75}. Best is trial 1 with value: 0.11081685513222486.[0m
100%|██████████| 7357/7357 [06:46<00:00, 18.10it/s]
[32m[I 2023-04-23 17:35:31,426][0m Trial 2 finished with value: 0.09549438245097476 and parameters: {'nearest_neighbors_num': 100, 'alpha': 0.2}. Best is trial 1 with value: 0.11081685513222486.[0m
100%|██████████| 7357/7357 [06:38<00:00, 18.45it/s]
[32m[I 2023-04-23 17:42:14,985][0m Trial 3 finished with valu

Test SLRCKNN (calculate scores for different seeds):

In [None]:
test_metrics = {
    "precision": [],
    "recall": [],
    "ndcg": []
}

In [None]:
for seed in range(5):
    print(f"\n___SEED___{seed}")
    torch.manual_seed(seed)
    random.seed(seed)
    np.random.seed(seed)

    trainer = NBRTrainer(
        corpus=corpus,
        max_epochs=20,
        topk=10,
        early_stop_num=3
    )

    params = {
        "model": SLRC(
            base_model_class=BPR,
            base_model_config={
                "emb_size": slrc_best_params["emb_size"],
                "user_num": corpus.n_users,
                "item_num": corpus.n_items,
                "click_num": corpus.n_clicks
            },
            item_num=corpus.n_items,
            avg_repeat_interval=corpus.total_avg_interval
        ),
        "batch_size": slrc_best_params["batch_size"],
        "lr": slrc_best_params["lr"],
        "l2_reg_coef": slrc_best_params["l2_reg_coef"]
    }

    trainer.init_hyperparams(**params)
    trainer.train()

    dev_user_emb = trainer.get_predictions(mode="dev")
    test_user_emb = trainer.get_predictions(mode="test")

    trainer = NBRTrainer(
        corpus=corpus,
        max_epochs=None,
        topk=10,
        early_stop_num=None
    )

    params = {
        "model": NBRKNN(
            item_num=corpus.n_items,
            user_num=corpus.n_users,
            nearest_neighbors_num=study.best_params["nearest_neighbors_num"],
            alpha=study.best_params["alpha"],
            user_emb=dev_user_emb
        )
    }
    params["model"].set_emb(test_user_emb)

    trainer.init_hyperparams(**params)

    metrics = trainer.evaluate(mode="test")

    test_metrics["precision"].append(metrics["precision"])
    test_metrics["recall"].append(metrics["recall"])
    test_metrics["ndcg"].append(metrics["ndcg"])


___SEED___0
train dataset preparing...


100%|██████████| 7358/7358 [00:10<00:00, 679.69it/s]


dev dataset preparing...


100%|██████████| 7357/7357 [00:02<00:00, 2826.54it/s]


test dataset preparing...


100%|██████████| 7357/7357 [00:01<00:00, 3977.13it/s]


Epoch 1:


Batch loss = 0.647844: 100%|██████████| 4445/4445 [00:50<00:00, 87.92it/s]


Evaluation (dev):



100%|██████████| 7357/7357 [01:19<00:00, 92.70it/s] 


 {'precision': 0.05306510806035069, 'recall': 0.11760657494409904, 'ndcg': 0.10338726165353773}
Epoch 2:



Batch loss = 0.613529: 100%|██████████| 4445/4445 [00:47<00:00, 93.57it/s] 


Evaluation (dev):



100%|██████████| 7357/7357 [01:18<00:00, 93.96it/s] 


 {'precision': 0.055280685061845865, 'recall': 0.12787036008393035, 'ndcg': 0.10752063171562534}
Epoch 3:



Batch loss = 0.484176: 100%|██████████| 4445/4445 [00:48<00:00, 92.16it/s]


Evaluation (dev):



100%|██████████| 7357/7357 [01:20<00:00, 91.66it/s] 


 {'precision': 0.05630012233247248, 'recall': 0.13254850400181878, 'ndcg': 0.1095571150683061}
Epoch 4:



Batch loss = 0.412783: 100%|██████████| 4445/4445 [00:47<00:00, 93.04it/s]


Evaluation (dev):



100%|██████████| 7357/7357 [01:18<00:00, 93.97it/s] 


 {'precision': 0.056069049884463776, 'recall': 0.13089943342672652, 'ndcg': 0.10860200062750996}
Epoch 5:



Batch loss = 0.40947: 100%|██████████| 4445/4445 [00:47<00:00, 93.05it/s] 


Evaluation (dev):



100%|██████████| 7357/7357 [01:20<00:00, 91.34it/s] 



 {'precision': 0.055579719994563, 'recall': 0.12869468613279975, 'ndcg': 0.1081829218646605}
Epoch 6:


Batch loss = 0.405149: 100%|██████████| 4445/4445 [00:47<00:00, 93.46it/s]


Evaluation (dev):



100%|██████████| 7357/7357 [01:18<00:00, 93.92it/s]


 {'precision': 0.05598749490281364, 'recall': 0.13105695275419954, 'ndcg': 0.1091629061070286}



100%|██████████| 7357/7357 [01:18<00:00, 93.68it/s] 
100%|██████████| 7357/7357 [01:18<00:00, 93.98it/s] 


train dataset preparing...


100%|██████████| 7358/7358 [00:10<00:00, 678.91it/s]


dev dataset preparing...


100%|██████████| 7357/7357 [00:02<00:00, 3540.92it/s]


test dataset preparing...


100%|██████████| 7357/7357 [00:01<00:00, 3949.13it/s]
100%|██████████| 7357/7357 [04:58<00:00, 24.67it/s]


{'precision': [0.06450999048525215], 'recall': [0.15753115753591798], 'ndcg': [0.12668185595179357]}

___SEED___1
train dataset preparing...


100%|██████████| 7358/7358 [00:11<00:00, 652.38it/s]


dev dataset preparing...


100%|██████████| 7357/7357 [00:01<00:00, 4670.75it/s]


test dataset preparing...


100%|██████████| 7357/7357 [00:02<00:00, 3349.24it/s]

Epoch 1:



Batch loss = 0.648115: 100%|██████████| 4445/4445 [00:48<00:00, 91.75it/s]



Evaluation (dev):


100%|██████████| 7357/7357 [01:17<00:00, 95.14it/s] 


 {'precision': 0.052983553078700556, 'recall': 0.11730884025233931, 'ndcg': 0.10326253450365228}
Epoch 2:



Batch loss = 0.618138: 100%|██████████| 4445/4445 [00:47<00:00, 92.79it/s]


Evaluation (dev):



100%|██████████| 7357/7357 [01:21<00:00, 90.49it/s] 


 {'precision': 0.05518553758325404, 'recall': 0.12778208290447507, 'ndcg': 0.10736117864801158}
Epoch 3:



Batch loss = 0.49879: 100%|██████████| 4445/4445 [00:49<00:00, 90.33it/s]


Evaluation (dev):



100%|██████████| 7357/7357 [01:17<00:00, 94.56it/s] 


 {'precision': 0.05597390240587196, 'recall': 0.13212535969929048, 'ndcg': 0.10932099089009956}
Epoch 4:



Batch loss = 0.498291: 100%|██████████| 4445/4445 [00:50<00:00, 88.78it/s]


Evaluation (dev):



100%|██████████| 7357/7357 [01:18<00:00, 93.30it/s] 


 {'precision': 0.056286529835530785, 'recall': 0.1320072938810533, 'ndcg': 0.10893477085005902}
Epoch 5:



Batch loss = 0.393214: 100%|██████████| 4445/4445 [00:47<00:00, 93.52it/s]


Evaluation (dev):



100%|██████████| 7357/7357 [01:19<00:00, 92.76it/s]


 {'precision': 0.05566127497621313, 'recall': 0.128561368615953, 'ndcg': 0.10802543395474196}
Epoch 6:



Batch loss = 0.372129: 100%|██████████| 4445/4445 [00:48<00:00, 92.00it/s] 


Evaluation (dev):



100%|██████████| 7357/7357 [01:15<00:00, 97.29it/s] 


 {'precision': 0.05598749490281364, 'recall': 0.12938509283272898, 'ndcg': 0.1089098140710593}



100%|██████████| 7357/7357 [01:19<00:00, 92.89it/s] 
100%|██████████| 7357/7357 [01:15<00:00, 97.58it/s] 


train dataset preparing...


100%|██████████| 7358/7358 [00:11<00:00, 649.58it/s]


dev dataset preparing...


100%|██████████| 7357/7357 [00:01<00:00, 4983.80it/s]


test dataset preparing...


100%|██████████| 7357/7357 [00:01<00:00, 4574.46it/s]
100%|██████████| 7357/7357 [04:55<00:00, 24.90it/s]


{'precision': [0.06450999048525215, 0.06457795296996058], 'recall': [0.15753115753591798, 0.15716937136322623], 'ndcg': [0.12668185595179357, 0.12680662331986717]}

___SEED___2
train dataset preparing...


100%|██████████| 7358/7358 [00:11<00:00, 644.42it/s]


dev dataset preparing...


100%|██████████| 7357/7357 [00:01<00:00, 4832.44it/s]


test dataset preparing...


100%|██████████| 7357/7357 [00:01<00:00, 4173.96it/s]


Epoch 1:


Batch loss = 0.64851: 100%|██████████| 4445/4445 [00:48<00:00, 91.08it/s]


Evaluation (dev):



100%|██████████| 7357/7357 [01:17<00:00, 94.36it/s] 


 {'precision': 0.052942775587875496, 'recall': 0.11726026876172108, 'ndcg': 0.10313874782031339}
Epoch 2:



Batch loss = 0.613944: 100%|██████████| 4445/4445 [00:48<00:00, 91.04it/s]



Evaluation (dev):


100%|██████████| 7357/7357 [01:19<00:00, 92.21it/s] 


 {'precision': 0.0552263150740791, 'recall': 0.12784914315486998, 'ndcg': 0.10745646263383438}
Epoch 3:



Batch loss = 0.515254: 100%|██████████| 4445/4445 [00:47<00:00, 93.19it/s]


Evaluation (dev):



100%|██████████| 7357/7357 [01:17<00:00, 94.78it/s]


 {'precision': 0.05615060486611391, 'recall': 0.1321609177451479, 'ndcg': 0.10964041700697584}
Epoch 4:



Batch loss = 0.40877: 100%|██████████| 4445/4445 [00:48<00:00, 92.39it/s]


Evaluation (dev):



100%|██████████| 7357/7357 [01:18<00:00, 94.31it/s] 


 {'precision': 0.05593312491504689, 'recall': 0.12980271658558626, 'ndcg': 0.10834286228903398}
Epoch 5:



Batch loss = 0.396547: 100%|██████████| 4445/4445 [00:48<00:00, 92.16it/s]


Evaluation (dev):



100%|██████████| 7357/7357 [01:16<00:00, 96.21it/s] 


 {'precision': 0.056082642381405465, 'recall': 0.13033290741866962, 'ndcg': 0.10892434725283648}
Epoch 6:



Batch loss = 0.359622: 100%|██████████| 4445/4445 [00:49<00:00, 89.89it/s]


Evaluation (dev):



100%|██████████| 7357/7357 [01:17<00:00, 94.98it/s]


 {'precision': 0.056259344841647414, 'recall': 0.13130733438514197, 'ndcg': 0.1093480871326376}



100%|██████████| 7357/7357 [01:15<00:00, 97.47it/s] 
100%|██████████| 7357/7357 [01:19<00:00, 92.94it/s] 


train dataset preparing...


100%|██████████| 7358/7358 [00:10<00:00, 675.80it/s]


dev dataset preparing...


100%|██████████| 7357/7357 [00:01<00:00, 3865.39it/s]


test dataset preparing...


100%|██████████| 7357/7357 [00:01<00:00, 4311.82it/s]
100%|██████████| 7357/7357 [04:57<00:00, 24.71it/s]


{'precision': [0.06450999048525215, 0.06457795296996058, 0.06449639798831046], 'recall': [0.15753115753591798, 0.15716937136322623, 0.15741345312343014], 'ndcg': [0.12668185595179357, 0.12680662331986717, 0.12738603062291842]}

___SEED___3
train dataset preparing...


100%|██████████| 7358/7358 [00:10<00:00, 709.86it/s]


dev dataset preparing...


100%|██████████| 7357/7357 [00:02<00:00, 3114.30it/s]


test dataset preparing...


100%|██████████| 7357/7357 [00:01<00:00, 4281.86it/s]

Epoch 1:



Batch loss = 0.648213: 100%|██████████| 4445/4445 [00:49<00:00, 90.30it/s] 


Evaluation (dev):



100%|██████████| 7357/7357 [01:17<00:00, 94.58it/s]


 {'precision': 0.05292918309093381, 'recall': 0.11722428625684035, 'ndcg': 0.10334206215700821}
Epoch 2:



Batch loss = 0.613215: 100%|██████████| 4445/4445 [00:48<00:00, 91.89it/s]


Evaluation (dev):



100%|██████████| 7357/7357 [01:17<00:00, 94.38it/s]


 {'precision': 0.055307870055729236, 'recall': 0.12817648204870014, 'ndcg': 0.10749804528714656}
Epoch 3:



Batch loss = 0.522081: 100%|██████████| 4445/4445 [00:48<00:00, 91.74it/s]


Evaluation (dev):



100%|██████████| 7357/7357 [01:16<00:00, 96.58it/s] 


 {'precision': 0.056096234878347154, 'recall': 0.1320079945827257, 'ndcg': 0.10941848721221557}
Epoch 4:



Batch loss = 0.455008: 100%|██████████| 4445/4445 [00:48<00:00, 91.79it/s]


Evaluation (dev):



100%|██████████| 7357/7357 [01:19<00:00, 92.09it/s] 


 {'precision': 0.05605545738752209, 'recall': 0.13052547489044342, 'ndcg': 0.10846417290285777}
Epoch 5:



Batch loss = 0.462366: 100%|██████████| 4445/4445 [00:48<00:00, 90.80it/s]


Evaluation (dev):



100%|██████████| 7357/7357 [01:18<00:00, 93.83it/s] 


 {'precision': 0.05559331249150469, 'recall': 0.12858402374791061, 'ndcg': 0.10827293938903636}
Epoch 6:



Batch loss = 0.455538: 100%|██████████| 4445/4445 [00:49<00:00, 90.23it/s]


Evaluation (dev):



100%|██████████| 7357/7357 [01:16<00:00, 96.60it/s] 


 {'precision': 0.056449639798831046, 'recall': 0.13171414304460102, 'ndcg': 0.10979318641736421}
Epoch 7:



Batch loss = 0.354771: 100%|██████████| 4445/4445 [00:50<00:00, 87.27it/s]


Evaluation (dev):



100%|██████████| 7357/7357 [01:17<00:00, 94.68it/s] 


 {'precision': 0.056123419872230525, 'recall': 0.13106735390259025, 'ndcg': 0.10969199779528051}
Epoch 8:



Batch loss = 0.431846: 100%|██████████| 4445/4445 [00:49<00:00, 89.61it/s]


Evaluation (dev):



100%|██████████| 7357/7357 [01:17<00:00, 95.02it/s] 


 {'precision': 0.056272937338589096, 'recall': 0.13061539721897414, 'ndcg': 0.10898434919210259}
Epoch 9:



Batch loss = 0.355546: 100%|██████████| 4445/4445 [00:49<00:00, 89.36it/s]


Evaluation (dev):



100%|██████████| 7357/7357 [01:17<00:00, 94.54it/s] 


 {'precision': 0.05598749490281364, 'recall': 0.1302838275905165, 'ndcg': 0.10907176493139453}



100%|██████████| 7357/7357 [01:15<00:00, 97.29it/s] 
100%|██████████| 7357/7357 [01:19<00:00, 92.43it/s] 


train dataset preparing...


100%|██████████| 7358/7358 [00:11<00:00, 647.55it/s]


dev dataset preparing...


100%|██████████| 7357/7357 [00:01<00:00, 4879.01it/s]


test dataset preparing...


100%|██████████| 7357/7357 [00:01<00:00, 4177.82it/s]
100%|██████████| 7357/7357 [05:00<00:00, 24.47it/s]


{'precision': [0.06450999048525215, 0.06457795296996058, 0.06449639798831046, 0.06414299306782656], 'recall': [0.15753115753591798, 0.15716937136322623, 0.15741345312343014, 0.1542435238541085], 'ndcg': [0.12668185595179357, 0.12680662331986717, 0.12738603062291842, 0.12652970898499827]}

___SEED___4
train dataset preparing...


100%|██████████| 7358/7358 [00:11<00:00, 639.56it/s]


dev dataset preparing...


100%|██████████| 7357/7357 [00:01<00:00, 4912.18it/s]


test dataset preparing...


100%|██████████| 7357/7357 [00:01<00:00, 4187.43it/s]

Epoch 1:



Batch loss = 0.648428: 100%|██████████| 4445/4445 [00:50<00:00, 88.35it/s]


Evaluation (dev):



100%|██████████| 7357/7357 [01:18<00:00, 93.13it/s] 


 {'precision': 0.05283403561234199, 'recall': 0.11688602227934589, 'ndcg': 0.10315941097580893}
Epoch 2:



Batch loss = 0.617539: 100%|██████████| 4445/4445 [00:49<00:00, 89.81it/s]


Evaluation (dev):



100%|██████████| 7357/7357 [01:19<00:00, 92.08it/s] 


 {'precision': 0.055280685061845865, 'recall': 0.12808553041197648, 'ndcg': 0.1075739078236309}
Epoch 3:



Batch loss = 0.526964: 100%|██████████| 4445/4445 [00:50<00:00, 88.85it/s]


Evaluation (dev):



100%|██████████| 7357/7357 [01:17<00:00, 94.66it/s] 



 {'precision': 0.05610982737528884, 'recall': 0.1318116758528083, 'ndcg': 0.10937556278952634}
Epoch 4:


Batch loss = 0.467776: 100%|██████████| 4445/4445 [00:51<00:00, 87.11it/s]


Evaluation (dev):



100%|██████████| 7357/7357 [01:16<00:00, 95.82it/s] 


 {'precision': 0.05589234742422183, 'recall': 0.13093346527700872, 'ndcg': 0.10873333472870504}
Epoch 5:



Batch loss = 0.472686: 100%|██████████| 4445/4445 [00:49<00:00, 89.56it/s]



Evaluation (dev):


100%|██████████| 7357/7357 [01:21<00:00, 90.67it/s] 


 {'precision': 0.05564768247927144, 'recall': 0.12844266576522984, 'ndcg': 0.10794656347332765}
Epoch 6:



Batch loss = 0.3681: 100%|██████████| 4445/4445 [00:49<00:00, 89.10it/s]


Evaluation (dev):



100%|██████████| 7357/7357 [01:18<00:00, 93.97it/s] 


 {'precision': 0.05593312491504689, 'recall': 0.12913710209741236, 'ndcg': 0.10902443637702317}



100%|██████████| 7357/7357 [01:18<00:00, 94.10it/s] 
100%|██████████| 7357/7357 [01:18<00:00, 93.50it/s] 


train dataset preparing...


100%|██████████| 7358/7358 [00:11<00:00, 640.01it/s]


dev dataset preparing...


100%|██████████| 7357/7357 [00:01<00:00, 4902.25it/s]


test dataset preparing...


100%|██████████| 7357/7357 [00:01<00:00, 4065.62it/s]
100%|██████████| 7357/7357 [05:02<00:00, 24.28it/s]

{'precision': [0.06450999048525215, 0.06457795296996058, 0.06449639798831046, 0.06414299306782656, 0.06436047301889357], 'recall': [0.15753115753591798, 0.15716937136322623, 0.15741345312343014, 0.1542435238541085, 0.15622437697356328], 'ndcg': [0.12668185595179357, 0.12680662331986717, 0.12738603062291842, 0.12652970898499827, 0.12634257404156424]}





In [None]:
{
    "precision": np.array(test_metrics["precision"]).mean(),
    "recall": np.array(test_metrics["recall"]).mean(),
    "ndcg": np.array(test_metrics["ndcg"]).mean(),
}

{'precision': 0.06441756150604867,
 'recall': 0.15651637657004921,
 'ndcg': 0.12674935858422834}

# TaoBao

Fix seed:

In [None]:
seed = 10
torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)

Read interactions data (filter users with less than 10 transactions, high purchase frequency and one-day users and items with less than 10 transactions). Train dataset - all baskets except the last two, validation dataset - the last but one basket, test dataset - the last basket:

In [None]:
corpus_path = "./data/"
dataset_name = "taobao"

preprocessor = Preprocess(corpus_path, dataset_name)
preprocessor.load_data(10, 10, filt=True)
save_split(corpus_path, dataset_name, preprocessor)

Before preprocessing: #users = 672404, #items = 638962, #clicks = 2015807 (#illegal records = 0)
After preprocessing: #users = 10092, #items = 22286, #clicks = 67991
Saving dataset in ./data//data_taobao/...


In [None]:
corpus = Corpus(corpus_path, dataset_name)
corpus.load_data()

Train SLRC model with best hyperparams (see ./testing_slrc.ipynb):

In [None]:
trainer = NBRTrainer(
    corpus=corpus,
    max_epochs=20,
    topk=10,
    early_stop_num=3
)

train dataset preparing...


100%|██████████| 10092/10092 [00:52<00:00, 192.79it/s]


dev dataset preparing...


100%|██████████| 9307/9307 [00:00<00:00, 28371.88it/s]


test dataset preparing...


100%|██████████| 9307/9307 [00:00<00:00, 17490.10it/s]


In [None]:
slrc_best_params = {'emb_size': 32, 'batch_size': 256, 'lr': 1.0851391597925009e-05, 'l2_reg_coef': 0.03239377807560215}

params = {
    "model": SLRC(
        base_model_class=BPR,
        base_model_config={
            "emb_size": slrc_best_params["emb_size"],
            "user_num": corpus.n_users,
            "item_num": corpus.n_items,
            "click_num": corpus.n_clicks
        },
        item_num=corpus.n_items,
        avg_repeat_interval=corpus.total_avg_interval
    ),
    "batch_size": slrc_best_params["batch_size"],
    "lr": slrc_best_params["lr"],
    "l2_reg_coef": slrc_best_params["l2_reg_coef"]
}

trainer.init_hyperparams(**params)

In [None]:
trainer.train()

Epoch 1:


Batch loss = 0.671828: 100%|██████████| 191/191 [00:03<00:00, 57.04it/s]


Evaluation (dev):



100%|██████████| 9307/9307 [02:58<00:00, 52.10it/s]


 {'precision': 0.010443752014612655, 'recall': 0.09904193975860463, 'ndcg': 0.07283291962948076}





Epoch 2:


Batch loss = 0.671776: 100%|██████████| 191/191 [00:03<00:00, 63.08it/s]


Evaluation (dev):



100%|██████████| 9307/9307 [02:54<00:00, 53.42it/s]


 {'precision': 0.010433007413774578, 'recall': 0.09898821675441424, 'ndcg': 0.0728535311003784}
Epoch 3:



Batch loss = 0.671721: 100%|██████████| 191/191 [00:03<00:00, 56.73it/s]


Evaluation (dev):



100%|██████████| 9307/9307 [02:47<00:00, 55.46it/s]


 {'precision': 0.010433007413774578, 'recall': 0.09898821675441424, 'ndcg': 0.07282528539007627}
Epoch 4:



Batch loss = 0.671692: 100%|██████████| 191/191 [00:03<00:00, 56.97it/s]


Evaluation (dev):



100%|██████████| 9307/9307 [02:49<00:00, 55.01it/s]


 {'precision': 0.0104222628129365, 'recall': 0.09888077074603345, 'ndcg': 0.07280322602580534}
Epoch 5:



Batch loss = 0.671657: 100%|██████████| 191/191 [00:02<00:00, 64.91it/s]


Evaluation (dev):



100%|██████████| 9307/9307 [02:49<00:00, 54.78it/s]


 {'precision': 0.0104222628129365, 'recall': 0.09888077074603345, 'ndcg': 0.07288007857110268}
Epoch 6:



Batch loss = 0.671621: 100%|██████████| 191/191 [00:02<00:00, 64.77it/s]


Evaluation (dev):



100%|██████████| 9307/9307 [02:48<00:00, 55.09it/s]


 {'precision': 0.010411518212098422, 'recall': 0.09877332473765266, 'ndcg': 0.07279130981357429}
Epoch 7:



Batch loss = 0.671538: 100%|██████████| 191/191 [00:03<00:00, 62.79it/s]


Evaluation (dev):



100%|██████████| 9307/9307 [02:50<00:00, 54.51it/s]


 {'precision': 0.010411518212098422, 'recall': 0.09877332473765266, 'ndcg': 0.07284312215258729}
Epoch 8:



Batch loss = 0.671486: 100%|██████████| 191/191 [00:03<00:00, 55.48it/s]


Evaluation (dev):



100%|██████████| 9307/9307 [02:48<00:00, 55.36it/s]


 {'precision': 0.010411518212098422, 'recall': 0.09877332473765266, 'ndcg': 0.0728146605521322}





SLRC(
  (base_model): BPR(
    (user_emb): Embedding(10092, 32)
    (item_emb): Embedding(22286, 32)
  )
)

Save users' embeddings for validation step (user's embedding for validation step is his vector of recommendation scores for all items at the time of validation):

In [None]:
dev_user_emb = trainer.get_predictions(mode="dev")

100%|██████████| 9307/9307 [02:48<00:00, 55.35it/s]


Tune KNN hyperparams on validation dataset:

In [None]:
trainer = NBRTrainer(
    corpus=corpus,
    max_epochs=None,
    topk=10,
    early_stop_num=None
)

train dataset preparing...


100%|██████████| 10092/10092 [00:36<00:00, 279.85it/s]


dev dataset preparing...


100%|██████████| 9307/9307 [00:00<00:00, 31671.81it/s]


test dataset preparing...


100%|██████████| 9307/9307 [00:00<00:00, 24917.03it/s]


In [None]:
def objective(trial):
    params = {
        "model": NBRKNN(
            item_num=corpus.n_items,
            user_num=corpus.n_users,
            nearest_neighbors_num=trial.suggest_int("nearest_neighbors_num", low=0, high=200, step=10),
            alpha=trial.suggest_float("alpha", 0.0, 1.0, step=0.05),
            user_emb=dev_user_emb
        )
    }

    trainer.init_hyperparams(**params)
    metrics = trainer.evaluate(mode="dev")
    score = metrics["ndcg"]
    return score

In [None]:
sampler = optuna.samplers.TPESampler(seed=seed)
study = optuna.create_study(direction="maximize", sampler=sampler)
study.optimize(objective, n_trials=25)

[32m[I 2023-04-22 11:10:59,225][0m A new study created in memory with name: no-name-b2391149-ef11-47d7-bee4-7c156b63cb88[0m
100%|██████████| 9307/9307 [21:44<00:00,  7.14it/s]
[32m[I 2023-04-22 11:32:58,461][0m Trial 0 finished with value: 0.05488958889693859 and parameters: {'nearest_neighbors_num': 160, 'alpha': 0.0}. Best is trial 0 with value: 0.05488958889693859.[0m
100%|██████████| 9307/9307 [22:13<00:00,  6.98it/s]
[32m[I 2023-04-22 11:55:32,208][0m Trial 1 finished with value: 0.0739815303147157 and parameters: {'nearest_neighbors_num': 130, 'alpha': 0.75}. Best is trial 1 with value: 0.0739815303147157.[0m
100%|██████████| 9307/9307 [21:59<00:00,  7.05it/s]
[32m[I 2023-04-22 12:17:52,354][0m Trial 2 finished with value: 0.07189376061244337 and parameters: {'nearest_neighbors_num': 100, 'alpha': 0.2}. Best is trial 1 with value: 0.0739815303147157.[0m
100%|██████████| 9307/9307 [21:24<00:00,  7.25it/s]
[32m[I 2023-04-22 12:39:37,152][0m Trial 3 finished with value

Test SLRCKNN (calculate scores for different seeds):

In [None]:
test_metrics = {
    "precision": [],
    "recall": [],
    "ndcg": []
}

In [None]:
for seed in range(2):
    print(f"\n___SEED___{seed}")
    torch.manual_seed(seed)
    random.seed(seed)
    np.random.seed(seed)

    trainer = NBRTrainer(
        corpus=corpus,
        max_epochs=20,
        topk=10,
        early_stop_num=3
    )

    params = {
        "model": SLRC(
            base_model_class=BPR,
            base_model_config={
                "emb_size": slrc_best_params["emb_size"],
                "user_num": corpus.n_users,
                "item_num": corpus.n_items,
                "click_num": corpus.n_clicks
            },
            item_num=corpus.n_items,
            avg_repeat_interval=corpus.total_avg_interval
        ),
        "batch_size": slrc_best_params["batch_size"],
        "lr": slrc_best_params["lr"],
        "l2_reg_coef": slrc_best_params["l2_reg_coef"]
    }

    trainer.init_hyperparams(**params)
    trainer.train()

    dev_user_emb = trainer.get_predictions(mode="dev")
    test_user_emb = trainer.get_predictions(mode="test")

    trainer = NBRTrainer(
        corpus=corpus,
        max_epochs=None,
        topk=10,
        early_stop_num=None
    )

    params = {
        "model": NBRKNN(
            item_num=corpus.n_items,
            user_num=corpus.n_users,
            nearest_neighbors_num=study.best_params["nearest_neighbors_num"],
            alpha=study.best_params["alpha"],
            user_emb=dev_user_emb
        )
    }
    params["model"].set_emb(test_user_emb)

    trainer.init_hyperparams(**params)

    metrics = trainer.evaluate(mode="test")

    test_metrics["precision"].append(metrics["precision"])
    test_metrics["recall"].append(metrics["recall"])
    test_metrics["ndcg"].append(metrics["ndcg"])
    print(test_metrics)

In [None]:
{
    "precision": np.array(test_metrics["precision"]).mean(),
    "recall": np.array(test_metrics["recall"]).mean(),
    "ndcg": np.array(test_metrics["ndcg"]).mean(),
}

{'precision': 0.012189749650800474,
 'recall': 0.11793094803194727,
 'ndcg': 0.08094238210382983}

# Dunnhumby

Fix seed:

In [None]:
seed = 10
torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)

Read interactions data (filter users with less than 5 transactions, high purchase frequency and one-day users and items with less than 10 transactions). Train dataset - all baskets except the last two, validation dataset - the last but one basket, test dataset - the last basket:

In [None]:
corpus_path = "./data/"
dataset_name = "dunnhumby"

preprocessor = Preprocess(corpus_path, dataset_name)
preprocessor.load_data(5, 10, filt=True)
save_split(corpus_path, dataset_name, preprocessor)

Before preprocessing: #users = 2500, #items = 92339, #clicks = 2595370 (#illegal records = 0)
After preprocessing: #users = 2358, #items = 26756, #clicks = 1976796
Saving dataset in ./data//data_dunnhumby/...


In [None]:
corpus = Corpus(corpus_path, dataset_name)
corpus.load_data()

Train SLRC model with best hyperparams (see ./testing_slrc.ipynb):

In [None]:
trainer = NBRTrainer(
    corpus=corpus,
    max_epochs=20,
    topk=10,
    early_stop_num=3
)

train dataset preparing...


100%|██████████| 2358/2358 [00:10<00:00, 220.61it/s]


dev dataset preparing...


100%|██████████| 2357/2357 [00:10<00:00, 219.72it/s]


test dataset preparing...


100%|██████████| 2357/2357 [00:11<00:00, 199.37it/s]


In [None]:
slrc_best_params = {'emb_size': 32, 'batch_size': 256, 'lr': 0.0006366285017414498, 'l2_reg_coef': 0.09063752099202302}

params = {
    "model": SLRC(
        base_model_class=BPR,
        base_model_config={
            "emb_size": slrc_best_params["emb_size"],
            "user_num": corpus.n_users,
            "item_num": corpus.n_items,
            "click_num": corpus.n_clicks
        },
        item_num=corpus.n_items,
        avg_repeat_interval=corpus.total_avg_interval
    ),
    "batch_size": slrc_best_params["batch_size"],
    "lr": slrc_best_params["lr"],
    "l2_reg_coef": slrc_best_params["l2_reg_coef"]
}

trainer.init_hyperparams(**params)

In [None]:
trainer.train()

Epoch 1:


Batch loss = 0.575686: 100%|██████████| 7530/7530 [02:11<00:00, 57.43it/s]


Evaluation (dev):



100%|██████████| 2357/2357 [01:15<00:00, 31.39it/s]


 {'precision': 0.11904963937208317, 'recall': 0.182277700424532, 'ndcg': 0.1694145228041231}
Epoch 2:



Batch loss = 0.533279: 100%|██████████| 7530/7530 [02:14<00:00, 56.14it/s]



Evaluation (dev):


100%|██████████| 2357/2357 [01:11<00:00, 33.18it/s]


 {'precision': 0.11714043275350022, 'recall': 0.18073043804384614, 'ndcg': 0.16877583293808682}
Epoch 3:



Batch loss = 0.498366: 100%|██████████| 7530/7530 [02:16<00:00, 55.17it/s]


Evaluation (dev):



100%|██████████| 2357/2357 [01:12<00:00, 32.48it/s]


 {'precision': 0.11709800593975392, 'recall': 0.18133129790695426, 'ndcg': 0.16794579661060294}
Epoch 4:



Batch loss = 0.459155: 100%|██████████| 7530/7530 [02:16<00:00, 55.09it/s]


Evaluation (dev):



100%|██████████| 2357/2357 [01:12<00:00, 32.59it/s]


 {'precision': 0.11646160373355961, 'recall': 0.1801680333166432, 'ndcg': 0.16614035622344148}





SLRC(
  (base_model): BPR(
    (user_emb): Embedding(2358, 32)
    (item_emb): Embedding(26756, 32)
  )
)

Save users' embeddings for validation step (user's embedding for validation step is his vector of recommendation scores for all items at the time of validation):

In [None]:
dev_user_emb = trainer.get_predictions(mode="dev")

100%|██████████| 2357/2357 [01:14<00:00, 31.70it/s]


Tune KNN hyperparams on validation dataset:

In [None]:
trainer = NBRTrainer(
    corpus=corpus,
    max_epochs=None,
    topk=10,
    early_stop_num=None
)

train dataset preparing...


100%|██████████| 2358/2358 [00:10<00:00, 221.38it/s]


dev dataset preparing...


100%|██████████| 2357/2357 [00:10<00:00, 223.89it/s]


test dataset preparing...


100%|██████████| 2357/2357 [00:12<00:00, 195.26it/s]


In [None]:
def objective(trial):
    params = {
        "model": NBRKNN(
            item_num=corpus.n_items,
            user_num=corpus.n_users,
            nearest_neighbors_num=trial.suggest_int("nearest_neighbors_num", low=1, high=200),
            alpha=trial.suggest_float("alpha", 0.0, 1.0, step=0.05),
            user_emb=dev_user_emb
        )
    }

    trainer.init_hyperparams(**params)
    metrics = trainer.evaluate(mode="dev")
    score = metrics["ndcg"]
    return score

In [None]:
sampler = optuna.samplers.TPESampler(seed=seed)
study = optuna.create_study(direction="maximize", sampler=sampler)
study.optimize(objective, n_trials=25)

[32m[I 2023-04-23 09:44:10,283][0m A new study created in memory with name: no-name-dab2f14e-1955-4c2c-a7e3-0be41ae025c0[0m
100%|██████████| 2357/2357 [03:09<00:00, 12.42it/s]
[32m[I 2023-04-23 09:47:25,259][0m Trial 0 finished with value: 0.12505135057878455 and parameters: {'nearest_neighbors_num': 155, 'alpha': 0.0}. Best is trial 0 with value: 0.12505135057878455.[0m
100%|██████████| 2357/2357 [02:48<00:00, 13.98it/s]
[32m[I 2023-04-23 09:50:17,249][0m Trial 1 finished with value: 0.17325260973753215 and parameters: {'nearest_neighbors_num': 127, 'alpha': 0.75}. Best is trial 1 with value: 0.17325260973753215.[0m
100%|██████████| 2357/2357 [02:45<00:00, 14.27it/s]
[32m[I 2023-04-23 09:53:05,837][0m Trial 2 finished with value: 0.16500951841871095 and parameters: {'nearest_neighbors_num': 100, 'alpha': 0.2}. Best is trial 1 with value: 0.17325260973753215.[0m
100%|██████████| 2357/2357 [02:35<00:00, 15.13it/s]
[32m[I 2023-04-23 09:55:47,644][0m Trial 3 finished with va

Test SLRCKNN (calculate scores for different seeds):

In [None]:
test_metrics = {
    "precision": [],
    "recall": [],
    "ndcg": []
}

In [None]:
for seed in range(10):
    print(f"\n___SEED___{seed}")
    torch.manual_seed(seed)
    random.seed(seed)
    np.random.seed(seed)

    trainer = NBRTrainer(
        corpus=corpus,
        max_epochs=20,
        topk=10,
        early_stop_num=3
    )

    params = {
        "model": SLRC(
            base_model_class=BPR,
            base_model_config={
                "emb_size": slrc_best_params["emb_size"],
                "user_num": corpus.n_users,
                "item_num": corpus.n_items,
                "click_num": corpus.n_clicks
            },
            item_num=corpus.n_items,
            avg_repeat_interval=corpus.total_avg_interval
        ),
        "batch_size": slrc_best_params["batch_size"],
        "lr": slrc_best_params["lr"],
        "l2_reg_coef": slrc_best_params["l2_reg_coef"]
    }

    trainer.init_hyperparams(**params)
    trainer.train()

    dev_user_emb = trainer.get_predictions(mode="dev")
    test_user_emb = trainer.get_predictions(mode="test")

    trainer = NBRTrainer(
        corpus=corpus,
        max_epochs=None,
        topk=10,
        early_stop_num=None
    )

    params = {
        "model": NBRKNN(
            item_num=corpus.n_items,
            user_num=corpus.n_users,
            nearest_neighbors_num=study.best_params["nearest_neighbors_num"],
            alpha=study.best_params["alpha"],
            user_emb=dev_user_emb
        )
    }
    params["model"].set_emb(test_user_emb)

    trainer.init_hyperparams(**params)

    metrics = trainer.evaluate(mode="test")

    test_metrics["precision"].append(metrics["precision"])
    test_metrics["recall"].append(metrics["recall"])
    test_metrics["ndcg"].append(metrics["ndcg"])


___SEED___5
train dataset preparing...


100%|██████████| 2358/2358 [00:10<00:00, 230.36it/s]


dev dataset preparing...


100%|██████████| 2357/2357 [00:11<00:00, 198.26it/s]


test dataset preparing...


100%|██████████| 2357/2357 [00:11<00:00, 206.72it/s]

Epoch 1:



Batch loss = 0.576517: 100%|██████████| 7530/7530 [02:16<00:00, 55.27it/s]


Evaluation (dev):



100%|██████████| 2357/2357 [01:11<00:00, 33.01it/s]


 {'precision': 0.11909206618582946, 'recall': 0.1816936625914881, 'ndcg': 0.16942664273979618}
Epoch 2:



Batch loss = 0.52811: 100%|██████████| 7530/7530 [02:16<00:00, 55.21it/s]


Evaluation (dev):



100%|██████████| 2357/2357 [01:13<00:00, 32.22it/s]



 {'precision': 0.11726771319473907, 'recall': 0.18205315665071786, 'ndcg': 0.16932931108776003}
Epoch 3:


Batch loss = 0.502174: 100%|██████████| 7530/7530 [02:15<00:00, 55.70it/s]



Evaluation (dev):


100%|██████████| 2357/2357 [01:13<00:00, 32.08it/s]



 {'precision': 0.11701315231226135, 'recall': 0.18059159247070394, 'ndcg': 0.16785163645861054}
Epoch 4:


Batch loss = 0.461852: 100%|██████████| 7530/7530 [02:14<00:00, 55.82it/s]


Evaluation (dev):



100%|██████████| 2357/2357 [01:11<00:00, 33.04it/s]


 {'precision': 0.11667373780229105, 'recall': 0.1804356088446267, 'ndcg': 0.16649578888396796}



100%|██████████| 2357/2357 [01:13<00:00, 32.15it/s]
100%|██████████| 2357/2357 [01:13<00:00, 32.06it/s]


train dataset preparing...


100%|██████████| 2358/2358 [00:08<00:00, 276.14it/s]


dev dataset preparing...


100%|██████████| 2357/2357 [00:11<00:00, 204.31it/s]


test dataset preparing...


100%|██████████| 2357/2357 [00:11<00:00, 198.20it/s]
100%|██████████| 2357/2357 [02:00<00:00, 19.55it/s]



___SEED___6
train dataset preparing...


100%|██████████| 2358/2358 [00:09<00:00, 244.41it/s]


dev dataset preparing...


100%|██████████| 2357/2357 [00:10<00:00, 226.17it/s]


test dataset preparing...


100%|██████████| 2357/2357 [00:12<00:00, 196.07it/s]


Epoch 1:


Batch loss = 0.575805: 100%|██████████| 7530/7530 [02:16<00:00, 54.97it/s]


Evaluation (dev):



100%|██████████| 2357/2357 [01:11<00:00, 33.04it/s]


 {'precision': 0.1189647857445906, 'recall': 0.18178134620769365, 'ndcg': 0.16954631282716995}
Epoch 2:



Batch loss = 0.532363: 100%|██████████| 7530/7530 [02:15<00:00, 55.48it/s]


Evaluation (dev):



100%|██████████| 2357/2357 [01:12<00:00, 32.64it/s]


 {'precision': 0.11739499363597794, 'recall': 0.1817094479443964, 'ndcg': 0.16923569394288773}
Epoch 3:



Batch loss = 0.499747: 100%|██████████| 7530/7530 [02:14<00:00, 55.98it/s]


Evaluation (dev):



100%|██████████| 2357/2357 [01:12<00:00, 32.62it/s]


 {'precision': 0.11718285956724651, 'recall': 0.18091609397344938, 'ndcg': 0.167754981486724}
Epoch 4:



Batch loss = 0.462985: 100%|██████████| 7530/7530 [02:14<00:00, 55.98it/s]


Evaluation (dev):



100%|██████████| 2357/2357 [01:10<00:00, 33.33it/s]


 {'precision': 0.11654645736105218, 'recall': 0.18121965610649654, 'ndcg': 0.16633207977679035}



100%|██████████| 2357/2357 [01:12<00:00, 32.45it/s]
100%|██████████| 2357/2357 [01:10<00:00, 33.22it/s]


train dataset preparing...


100%|██████████| 2358/2358 [00:10<00:00, 235.51it/s]


dev dataset preparing...


100%|██████████| 2357/2357 [00:11<00:00, 205.72it/s]


test dataset preparing...


100%|██████████| 2357/2357 [00:11<00:00, 204.04it/s]
100%|██████████| 2357/2357 [02:00<00:00, 19.50it/s]



___SEED___7
train dataset preparing...


100%|██████████| 2358/2358 [00:08<00:00, 273.38it/s]


dev dataset preparing...


100%|██████████| 2357/2357 [00:11<00:00, 203.50it/s]


test dataset preparing...


100%|██████████| 2357/2357 [00:11<00:00, 204.51it/s]

Epoch 1:



Batch loss = 0.576668: 100%|██████████| 7530/7530 [02:14<00:00, 55.95it/s]


Evaluation (dev):



100%|██████████| 2357/2357 [01:11<00:00, 33.13it/s]


 {'precision': 0.118879932117098, 'recall': 0.1819256543850468, 'ndcg': 0.16957137538323058}





Epoch 2:


Batch loss = 0.5369: 100%|██████████| 7530/7530 [02:14<00:00, 55.96it/s]


Evaluation (dev):



100%|██████████| 2357/2357 [01:12<00:00, 32.50it/s]


 {'precision': 0.11731014000848536, 'recall': 0.18189316691467738, 'ndcg': 0.16943840425944304}
Epoch 3:



Batch loss = 0.467617: 100%|██████████| 7530/7530 [02:14<00:00, 56.07it/s]


Evaluation (dev):



100%|██████████| 2357/2357 [01:12<00:00, 32.62it/s]


 {'precision': 0.11688587187102249, 'recall': 0.1812946529555746, 'ndcg': 0.16804107086527448}
Epoch 4:



Batch loss = 0.474033: 100%|██████████| 7530/7530 [02:15<00:00, 55.57it/s]


Evaluation (dev):



100%|██████████| 2357/2357 [01:11<00:00, 32.77it/s]


 {'precision': 0.11654645736105221, 'recall': 0.18039926126278322, 'ndcg': 0.1662084808753522}



100%|██████████| 2357/2357 [01:13<00:00, 31.99it/s]
100%|██████████| 2357/2357 [01:11<00:00, 32.97it/s]


train dataset preparing...


100%|██████████| 2358/2358 [00:08<00:00, 280.73it/s]


dev dataset preparing...


100%|██████████| 2357/2357 [00:11<00:00, 200.07it/s]


test dataset preparing...


100%|██████████| 2357/2357 [00:11<00:00, 198.37it/s]
100%|██████████| 2357/2357 [02:00<00:00, 19.64it/s]



___SEED___8
train dataset preparing...


100%|██████████| 2358/2358 [00:10<00:00, 232.59it/s]


dev dataset preparing...


100%|██████████| 2357/2357 [00:11<00:00, 211.85it/s]


test dataset preparing...


100%|██████████| 2357/2357 [00:10<00:00, 220.89it/s]

Epoch 1:



Batch loss = 0.575454: 100%|██████████| 7530/7530 [02:16<00:00, 55.15it/s]


Evaluation (dev):



100%|██████████| 2357/2357 [01:14<00:00, 31.54it/s]


 {'precision': 0.11909206618582945, 'recall': 0.18189637145436685, 'ndcg': 0.1694731387135353}
Epoch 2:



Batch loss = 0.531506: 100%|██████████| 7530/7530 [02:15<00:00, 55.65it/s]


Evaluation (dev):



100%|██████████| 2357/2357 [01:13<00:00, 32.18it/s]


 {'precision': 0.11735256682223166, 'recall': 0.18180877962591882, 'ndcg': 0.16934792847171723}
Epoch 3:



Batch loss = 0.505593: 100%|██████████| 7530/7530 [02:17<00:00, 54.84it/s]


Evaluation (dev):



100%|██████████| 2357/2357 [01:12<00:00, 32.45it/s]


 {'precision': 0.11718285956724651, 'recall': 0.181164966619237, 'ndcg': 0.16792980349814512}
Epoch 4:



Batch loss = 0.452233: 100%|██████████| 7530/7530 [02:18<00:00, 54.26it/s]



Evaluation (dev):


100%|██████████| 2357/2357 [01:13<00:00, 32.13it/s]


 {'precision': 0.11629189647857446, 'recall': 0.179298056851432, 'ndcg': 0.16540755906633184}



100%|██████████| 2357/2357 [01:12<00:00, 32.68it/s]
100%|██████████| 2357/2357 [01:14<00:00, 31.66it/s]


train dataset preparing...


100%|██████████| 2358/2358 [00:08<00:00, 276.05it/s]


dev dataset preparing...


100%|██████████| 2357/2357 [00:11<00:00, 207.49it/s]


test dataset preparing...


100%|██████████| 2357/2357 [00:11<00:00, 206.00it/s]
100%|██████████| 2357/2357 [02:00<00:00, 19.49it/s]



___SEED___9
train dataset preparing...


100%|██████████| 2358/2358 [00:10<00:00, 233.56it/s]


dev dataset preparing...


100%|██████████| 2357/2357 [00:10<00:00, 224.29it/s]


test dataset preparing...


100%|██████████| 2357/2357 [00:11<00:00, 213.91it/s]

Epoch 1:



Batch loss = 0.575165: 100%|██████████| 7530/7530 [02:17<00:00, 54.82it/s]


Evaluation (dev):



100%|██████████| 2357/2357 [01:12<00:00, 32.64it/s]


 {'precision': 0.1186677980483666, 'recall': 0.18133206525202128, 'ndcg': 0.169222446929408}
Epoch 2:



Batch loss = 0.531134: 100%|██████████| 7530/7530 [02:15<00:00, 55.49it/s]


Evaluation (dev):



100%|██████████| 2357/2357 [01:11<00:00, 32.89it/s]


 {'precision': 0.11692829868476878, 'recall': 0.18118967642153244, 'ndcg': 0.16907854927875582}
Epoch 3:



Batch loss = 0.476006: 100%|██████████| 7530/7530 [02:15<00:00, 55.68it/s]


Evaluation (dev):



100%|██████████| 2357/2357 [01:13<00:00, 31.93it/s]


 {'precision': 0.11671616461603734, 'recall': 0.18052541718280948, 'ndcg': 0.16759469297125762}
Epoch 4:



Batch loss = 0.461174: 100%|██████████| 7530/7530 [02:18<00:00, 54.26it/s]


Evaluation (dev):



100%|██████████| 2357/2357 [01:11<00:00, 32.86it/s]


 {'precision': 0.11658888417479847, 'recall': 0.1801133409705749, 'ndcg': 0.16616766212100362}



100%|██████████| 2357/2357 [01:13<00:00, 32.18it/s]
100%|██████████| 2357/2357 [01:12<00:00, 32.50it/s]


train dataset preparing...


100%|██████████| 2358/2358 [00:10<00:00, 232.09it/s]


dev dataset preparing...


100%|██████████| 2357/2357 [00:11<00:00, 208.60it/s]


test dataset preparing...


100%|██████████| 2357/2357 [00:10<00:00, 222.01it/s]
100%|██████████| 2357/2357 [02:00<00:00, 19.54it/s]


In [None]:
{
    "precision": np.array(test_metrics["precision"]).mean(),
    "recall": np.array(test_metrics["recall"]).mean(),
    "ndcg": np.array(test_metrics["ndcg"]).mean(),
}

{'precision': 0.12135341535850656,
 'recall': 0.17906825951192068,
 'ndcg': 0.17062814846815227}