In [3]:
import os
import sys
sys.path.append("..")
from nbr.preparation import Preprocess, save_split, Corpus
from nbr.trainer import NBRTrainer
from nbr.model import TIFUKNNTimeDays
import torch
import random
import numpy as np
import optuna
import warnings
warnings.filterwarnings("ignore")

# TaFeng

Fix seed:

In [3]:
seed = 10
torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)

Read interactions data (filter users with less than 5 transactions, high purchase frequency and one-day users and items with less than 10 transactions). Train dataset - all baskets except the last two, validation dataset - the last but one basket, test dataset - the last basket:

In [4]:
corpus_path = "./data/"
dataset_name = "ta_feng"

preprocessor = Preprocess(corpus_path, dataset_name)
preprocessor.load_data(5, 10, filt=True)
save_split(corpus_path, dataset_name, preprocessor)

Before preprocessing: #users = 32266, #items = 23812, #clicks = 817741 (#illegal records = 0)
After preprocessing: #users = 7358, #items = 11202, #clicks = 368951
Saving dataset in ./data//data_ta_feng/...


In [5]:
corpus = Corpus(corpus_path, dataset_name)
corpus.load_data()

Tune hyperparams on validation dataset:

In [6]:
trainer = NBRTrainer(
    corpus=corpus,
    max_epochs=None,
    topk=10,
    early_stop_num=None
)

train dataset preparing...


100%|██████████| 7358/7358 [00:15<00:00, 489.34it/s]


dev dataset preparing...


100%|██████████| 7357/7357 [00:03<00:00, 2424.25it/s]


test dataset preparing...


100%|██████████| 7357/7357 [00:02<00:00, 3026.00it/s]


In [8]:
def objective(trial):
    params = {
        "model": TIFUKNNTimeDays(
            item_num=corpus.n_items,
            user_num=corpus.n_users,
            group_size_days=trial.suggest_int("group_size_days", 1, 365),
            within_decay_rate=trial.suggest_float("within_decay_rate", 0.1, 1.0, step=0.1),
            group_decay_rate=trial.suggest_float("group_decay_rate", 0.1, 1.0, step=0.1),
            nearest_neighbors_num=trial.suggest_int("nearest_neighbors_num", low=25, high=500, step=25),
            alpha=trial.suggest_float("alpha", 0.0, 1.0, step=0.1),
            use_log = trial.suggest_categorical("use_log", [True, False]),
            corpus=corpus
        )
    }

    trainer.init_hyperparams(**params)
    metrics = trainer.evaluate(mode="dev")
    score = metrics["ndcg"]
    return score

In [10]:
sampler = optuna.samplers.TPESampler(seed=seed)
study = optuna.create_study(direction="maximize", sampler=sampler)
study.optimize(objective, n_trials=25)

[32m[I 2023-04-29 10:05:16,973][0m A new study created in memory with name: no-name-edec6e64-1447-4b55-a29a-2a08c121cda2[0m


TIFUKNNTimeDays fitting...


100%|██████████| 7358/7358 [01:07<00:00, 109.02it/s]
[32m[I 2023-04-29 10:06:31,602][0m Trial 0 finished with value: 0.08417837669486034 and parameters: {'group_size_days': 282, 'within_decay_rate': 0.1, 'group_decay_rate': 0.7000000000000001, 'nearest_neighbors_num': 375, 'alpha': 0.5, 'use_log': True}. Best is trial 0 with value: 0.08417837669486034.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 7358/7358 [01:15<00:00, 97.73it/s] 
[32m[I 2023-04-29 10:07:52,680][0m Trial 1 finished with value: 0.08107169520651933 and parameters: {'group_size_days': 278, 'within_decay_rate': 0.2, 'group_decay_rate': 0.1, 'nearest_neighbors_num': 350, 'alpha': 1.0, 'use_log': False}. Best is trial 0 with value: 0.08417837669486034.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 7358/7358 [01:07<00:00, 108.54it/s]
[32m[I 2023-04-29 10:09:06,135][0m Trial 2 finished with value: 0.09409736208740892 and parameters: {'group_size_days': 297, 'within_decay_rate': 0.7000000000000001, 'group_decay_rate': 0.8, 'nearest_neighbors_num': 150, 'alpha': 1.0, 'use_log': True}. Best is trial 2 with value: 0.09409736208740892.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 7358/7358 [01:07<00:00, 109.43it/s]
[32m[I 2023-04-29 10:10:19,915][0m Trial 3 finished with value: 0.0899712994800866 and parameters: {'group_size_days': 52, 'within_decay_rate': 0.4, 'group_decay_rate': 0.7000000000000001, 'nearest_neighbors_num': 225, 'alpha': 0.4, 'use_log': True}. Best is trial 2 with value: 0.09409736208740892.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 7358/7358 [01:08<00:00, 106.75it/s]
[32m[I 2023-04-29 10:11:34,286][0m Trial 4 finished with value: 0.09943049264273611 and parameters: {'group_size_days': 238, 'within_decay_rate': 0.7000000000000001, 'group_decay_rate': 0.9, 'nearest_neighbors_num': 275, 'alpha': 0.9, 'use_log': True}. Best is trial 4 with value: 0.09943049264273611.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 7358/7358 [01:09<00:00, 105.69it/s]
[32m[I 2023-04-29 10:12:49,133][0m Trial 5 finished with value: 0.06575078563480784 and parameters: {'group_size_days': 110, 'within_decay_rate': 0.2, 'group_decay_rate': 0.9, 'nearest_neighbors_num': 25, 'alpha': 0.6000000000000001, 'use_log': False}. Best is trial 4 with value: 0.09943049264273611.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 7358/7358 [01:09<00:00, 105.93it/s]
[32m[I 2023-04-29 10:14:03,558][0m Trial 6 finished with value: 0.11015189400663232 and parameters: {'group_size_days': 73, 'within_decay_rate': 0.9, 'group_decay_rate': 0.4, 'nearest_neighbors_num': 400, 'alpha': 0.30000000000000004, 'use_log': True}. Best is trial 6 with value: 0.11015189400663232.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 7358/7358 [01:10<00:00, 104.68it/s]
[32m[I 2023-04-29 10:15:18,831][0m Trial 7 finished with value: 0.08315398270776554 and parameters: {'group_size_days': 61, 'within_decay_rate': 0.4, 'group_decay_rate': 0.1, 'nearest_neighbors_num': 425, 'alpha': 0.1, 'use_log': False}. Best is trial 6 with value: 0.11015189400663232.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 7358/7358 [01:07<00:00, 108.49it/s]
[32m[I 2023-04-29 10:16:33,673][0m Trial 8 finished with value: 0.0930543366897407 and parameters: {'group_size_days': 361, 'within_decay_rate': 0.5, 'group_decay_rate': 0.9, 'nearest_neighbors_num': 150, 'alpha': 0.6000000000000001, 'use_log': True}. Best is trial 6 with value: 0.11015189400663232.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 7358/7358 [01:08<00:00, 107.22it/s]
[32m[I 2023-04-29 10:17:49,334][0m Trial 9 finished with value: 0.06664441206384558 and parameters: {'group_size_days': 216, 'within_decay_rate': 0.1, 'group_decay_rate': 0.4, 'nearest_neighbors_num': 50, 'alpha': 0.30000000000000004, 'use_log': False}. Best is trial 6 with value: 0.11015189400663232.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 7358/7358 [01:05<00:00, 111.63it/s]
[32m[I 2023-04-29 10:19:02,601][0m Trial 10 finished with value: 0.08973733291690814 and parameters: {'group_size_days': 139, 'within_decay_rate': 1.0, 'group_decay_rate': 0.4, 'nearest_neighbors_num': 450, 'alpha': 0.0, 'use_log': True}. Best is trial 6 with value: 0.11015189400663232.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 7358/7358 [01:06<00:00, 111.01it/s]
[32m[I 2023-04-29 10:20:16,461][0m Trial 11 finished with value: 0.09290810725547442 and parameters: {'group_size_days': 9, 'within_decay_rate': 0.8, 'group_decay_rate': 0.5, 'nearest_neighbors_num': 300, 'alpha': 0.8, 'use_log': True}. Best is trial 6 with value: 0.11015189400663232.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 7358/7358 [01:07<00:00, 108.96it/s]
[32m[I 2023-04-29 10:21:31,549][0m Trial 12 finished with value: 0.11295864483900496 and parameters: {'group_size_days': 197, 'within_decay_rate': 1.0, 'group_decay_rate': 0.30000000000000004, 'nearest_neighbors_num': 500, 'alpha': 0.2, 'use_log': True}. Best is trial 12 with value: 0.11295864483900496.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 7358/7358 [01:08<00:00, 106.83it/s]
100%|██████████| 7357/7357 [11:17<00:00, 10.86it/s]
[32m[I 2023-04-29 10:34:05,885][0m Trial 13 finished with value: 0.11295864483900496 and parameters: {'group_size_days': 156, 'within_decay_rate': 1.0, 'group_decay_rate': 0.30000000000000004, 'nearest_neighbors_num': 500, 'alpha': 0.2, 'use_log': True}. Best is trial 12 with value: 0.11295864483900496.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 7358/7358 [01:07<00:00, 108.33it/s]
100%|██████████| 7357/7357 [11:22<00:00, 10.78it/s]
[32m[I 2023-04-29 10:46:42,365][0m Trial 14 finished with value: 0.11312625017928453 and parameters: {'group_size_days': 190, 'within_decay_rate': 1.0, 'group_decay_rate': 0.2, 'nearest_neighbors_num': 475, 'alpha': 0.2, 'use_log': True}. Best is trial 14 with value: 0.11312625017928453.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 7358/7358 [01:06<00:00, 110.06it/s]
100%|██████████| 7357/7357 [11:43<00:00, 10.45it/s]
[32m[I 2023-04-29 10:59:40,831][0m Trial 15 finished with value: 0.08903716627491305 and parameters: {'group_size_days': 195, 'within_decay_rate': 0.8, 'group_decay_rate': 0.2, 'nearest_neighbors_num': 500, 'alpha': 0.0, 'use_log': True}. Best is trial 14 with value: 0.11312625017928453.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 7358/7358 [01:07<00:00, 108.40it/s]
100%|██████████| 7357/7357 [11:57<00:00, 10.25it/s]
[32m[I 2023-04-29 11:12:52,893][0m Trial 16 finished with value: 0.11295864483900496 and parameters: {'group_size_days': 164, 'within_decay_rate': 1.0, 'group_decay_rate': 0.2, 'nearest_neighbors_num': 500, 'alpha': 0.2, 'use_log': True}. Best is trial 14 with value: 0.11312625017928453.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 7358/7358 [01:08<00:00, 106.70it/s]
100%|██████████| 7357/7357 [08:16<00:00, 14.83it/s]
[32m[I 2023-04-29 11:22:23,921][0m Trial 17 finished with value: 0.10830287780210922 and parameters: {'group_size_days': 245, 'within_decay_rate': 0.8, 'group_decay_rate': 0.30000000000000004, 'nearest_neighbors_num': 325, 'alpha': 0.2, 'use_log': True}. Best is trial 14 with value: 0.11312625017928453.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 7358/7358 [01:09<00:00, 105.37it/s]
100%|██████████| 7357/7357 [11:31<00:00, 10.64it/s]
[32m[I 2023-04-29 11:35:14,049][0m Trial 18 finished with value: 0.08494748403722352 and parameters: {'group_size_days': 113, 'within_decay_rate': 0.6, 'group_decay_rate': 0.6, 'nearest_neighbors_num': 450, 'alpha': 0.4, 'use_log': False}. Best is trial 14 with value: 0.11312625017928453.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 7358/7358 [01:08<00:00, 107.71it/s]
100%|██████████| 7357/7357 [07:46<00:00, 15.78it/s]
[32m[I 2023-04-29 11:44:16,456][0m Trial 19 finished with value: 0.1095121738354846 and parameters: {'group_size_days': 351, 'within_decay_rate': 0.9, 'group_decay_rate': 0.2, 'nearest_neighbors_num': 225, 'alpha': 0.1, 'use_log': True}. Best is trial 14 with value: 0.11312625017928453.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 7358/7358 [01:08<00:00, 107.98it/s]
100%|██████████| 7357/7357 [11:13<00:00, 10.92it/s]
[32m[I 2023-04-29 11:56:45,133][0m Trial 20 finished with value: 0.1096700906066822 and parameters: {'group_size_days': 211, 'within_decay_rate': 0.9, 'group_decay_rate': 0.5, 'nearest_neighbors_num': 450, 'alpha': 0.7000000000000001, 'use_log': True}. Best is trial 14 with value: 0.11312625017928453.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 7358/7358 [01:06<00:00, 110.52it/s]
100%|██████████| 7357/7357 [11:43<00:00, 10.45it/s]
[32m[I 2023-04-29 12:09:41,535][0m Trial 21 finished with value: 0.11295864483900496 and parameters: {'group_size_days': 161, 'within_decay_rate': 1.0, 'group_decay_rate': 0.30000000000000004, 'nearest_neighbors_num': 500, 'alpha': 0.2, 'use_log': True}. Best is trial 14 with value: 0.11312625017928453.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 7358/7358 [01:06<00:00, 109.95it/s]
100%|██████████| 7357/7357 [11:37<00:00, 10.55it/s]
[32m[I 2023-04-29 12:22:34,287][0m Trial 22 finished with value: 0.11297487437897484 and parameters: {'group_size_days': 178, 'within_decay_rate': 1.0, 'group_decay_rate': 0.30000000000000004, 'nearest_neighbors_num': 500, 'alpha': 0.30000000000000004, 'use_log': True}. Best is trial 14 with value: 0.11312625017928453.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 7358/7358 [01:06<00:00, 110.98it/s]
100%|██████████| 7357/7357 [10:43<00:00, 11.43it/s]
[32m[I 2023-04-29 12:34:30,339][0m Trial 23 finished with value: 0.1124514092509248 and parameters: {'group_size_days': 187, 'within_decay_rate': 0.9, 'group_decay_rate': 0.1, 'nearest_neighbors_num': 400, 'alpha': 0.4, 'use_log': True}. Best is trial 14 with value: 0.11312625017928453.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 7358/7358 [01:08<00:00, 107.52it/s]
100%|██████████| 7357/7357 [11:15<00:00, 10.89it/s]
[32m[I 2023-04-29 12:47:00,249][0m Trial 24 finished with value: 0.10333602826948025 and parameters: {'group_size_days': 246, 'within_decay_rate': 0.7000000000000001, 'group_decay_rate': 0.30000000000000004, 'nearest_neighbors_num': 450, 'alpha': 0.30000000000000004, 'use_log': True}. Best is trial 14 with value: 0.11312625017928453.[0m


Test:

In [11]:
trainer = NBRTrainer(
    corpus=corpus,
    max_epochs=None,
    topk=10,
    early_stop_num=None
)
params = {
    "model": TIFUKNNTimeDays(
        item_num=corpus.n_items,
        user_num=corpus.n_users,
        group_size_days=study.best_params["group_size_days"],
        within_decay_rate=study.best_params["within_decay_rate"],
        group_decay_rate=study.best_params["group_decay_rate"],
        nearest_neighbors_num=study.best_params["nearest_neighbors_num"],
        alpha=study.best_params["alpha"],
        use_log=study.best_params["use_log"],
        corpus=corpus
    )
}

trainer.init_hyperparams(**params)

train dataset preparing...


100%|██████████| 7358/7358 [00:15<00:00, 470.28it/s]


dev dataset preparing...


100%|██████████| 7357/7357 [00:02<00:00, 2539.10it/s]


test dataset preparing...


100%|██████████| 7357/7357 [00:02<00:00, 3034.50it/s]


TIFUKNNTimeDays fitting...


100%|██████████| 7358/7358 [01:07<00:00, 108.31it/s]


In [12]:
trainer.evaluate(mode="test")

100%|██████████| 7357/7357 [08:30<00:00, 14.40it/s]


{'precision': 0.061519641158080736,
 'recall': 0.1503349546760062,
 'ndcg': 0.1248435198538412}

# TaoBao

Fix seed:

In [4]:
seed = 10
torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)

Read interactions data (filter users with less than 10 transactions, high purchase frequency and one-day users and items with less than 10 transactions). Train dataset - all baskets except the last two, validation dataset - the last but one basket, test dataset - the last basket:

In [5]:
corpus_path = "./data/"
dataset_name = "taobao"

preprocessor = Preprocess(corpus_path, dataset_name)
preprocessor.load_data(10, 10, filt=True)
save_split(corpus_path, dataset_name, preprocessor)

Before preprocessing: #users = 672404, #items = 638962, #clicks = 2015807 (#illegal records = 0)
After preprocessing: #users = 10092, #items = 22286, #clicks = 67991
Saving dataset in ./data//data_taobao/...


In [6]:
corpus = Corpus(corpus_path, dataset_name)
corpus.load_data()

Tune hyperparams on validation dataset:

In [7]:
trainer = NBRTrainer(
    corpus=corpus,
    max_epochs=None,
    topk=10,
    early_stop_num=None
)

train dataset preparing...


100%|██████████| 10092/10092 [00:51<00:00, 196.96it/s]


dev dataset preparing...


100%|██████████| 9307/9307 [00:00<00:00, 21531.86it/s]


test dataset preparing...


100%|██████████| 9307/9307 [00:00<00:00, 22219.24it/s]


In [8]:
def objective(trial):
    params = {
        "model": TIFUKNNTimeDays(
            item_num=corpus.n_items,
            user_num=corpus.n_users,
            group_size_days=trial.suggest_int("group_size_days", 1, 365),
            within_decay_rate=trial.suggest_float("within_decay_rate", 0.1, 1.0, step=0.1),
            group_decay_rate=trial.suggest_float("group_decay_rate", 0.1, 1.0, step=0.1),
            nearest_neighbors_num=trial.suggest_int("nearest_neighbors_num", low=25, high=500, step=25),
            alpha=trial.suggest_float("alpha", 0.0, 1.0, step=0.1),
            use_log = trial.suggest_categorical("use_log", [True, False]),
            corpus=corpus
        )
    }

    trainer.init_hyperparams(**params)
    metrics = trainer.evaluate(mode="dev")
    score = metrics["ndcg"]
    return score

In [None]:
sampler = optuna.samplers.TPESampler(seed=seed)
study = optuna.create_study(direction="maximize", sampler=sampler)
study.optimize(objective, n_trials=25)

[32m[I 2023-04-29 19:20:27,224][0m A new study created in memory with name: no-name-c16507f0-75d6-4cd0-b752-d83b141dd5c3[0m


TIFUKNNTimeDays fitting...


100%|██████████| 10092/10092 [01:11<00:00, 141.87it/s]
100%|██████████| 9307/9307 [34:45<00:00,  4.46it/s]
[32m[I 2023-04-29 19:56:40,036][0m Trial 0 finished with value: 0.07412356489993777 and parameters: {'group_size_days': 282, 'within_decay_rate': 0.1, 'group_decay_rate': 0.7000000000000001, 'nearest_neighbors_num': 375, 'alpha': 0.5, 'use_log': True}. Best is trial 0 with value: 0.07412356489993777.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 10092/10092 [01:31<00:00, 110.17it/s]
100%|██████████| 9307/9307 [37:57<00:00,  4.09it/s]
[32m[I 2023-04-29 20:36:39,638][0m Trial 1 finished with value: 0.07461211446161903 and parameters: {'group_size_days': 278, 'within_decay_rate': 0.2, 'group_decay_rate': 0.1, 'nearest_neighbors_num': 350, 'alpha': 1.0, 'use_log': False}. Best is trial 1 with value: 0.07461211446161903.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 10092/10092 [01:21<00:00, 123.38it/s]
100%|██████████| 9307/9307 [27:00<00:00,  5.74it/s]
[32m[I 2023-04-29 21:05:22,368][0m Trial 2 finished with value: 0.07467950451300674 and parameters: {'group_size_days': 297, 'within_decay_rate': 0.7000000000000001, 'group_decay_rate': 0.8, 'nearest_neighbors_num': 150, 'alpha': 1.0, 'use_log': True}. Best is trial 2 with value: 0.07467950451300674.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 10092/10092 [01:25<00:00, 118.73it/s]
100%|██████████| 9307/9307 [31:59<00:00,  4.85it/s]
[32m[I 2023-04-29 21:39:07,242][0m Trial 3 finished with value: 0.075011472143333 and parameters: {'group_size_days': 52, 'within_decay_rate': 0.4, 'group_decay_rate': 0.7000000000000001, 'nearest_neighbors_num': 225, 'alpha': 0.4, 'use_log': True}. Best is trial 3 with value: 0.075011472143333.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 10092/10092 [01:15<00:00, 134.11it/s]
100%|██████████| 9307/9307 [31:29<00:00,  4.93it/s]
[32m[I 2023-04-29 22:12:10,883][0m Trial 4 finished with value: 0.0749721199516413 and parameters: {'group_size_days': 238, 'within_decay_rate': 0.7000000000000001, 'group_decay_rate': 0.9, 'nearest_neighbors_num': 275, 'alpha': 0.9, 'use_log': True}. Best is trial 3 with value: 0.075011472143333.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 10092/10092 [01:09<00:00, 144.98it/s]
100%|██████████| 9307/9307 [23:42<00:00,  6.54it/s]
[32m[I 2023-04-29 22:37:22,288][0m Trial 5 finished with value: 0.06914656447053072 and parameters: {'group_size_days': 110, 'within_decay_rate': 0.2, 'group_decay_rate': 0.9, 'nearest_neighbors_num': 25, 'alpha': 0.6000000000000001, 'use_log': False}. Best is trial 3 with value: 0.075011472143333.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 10092/10092 [01:09<00:00, 144.98it/s]
100%|██████████| 9307/9307 [32:51<00:00,  4.72it/s]
[32m[I 2023-04-29 23:11:42,634][0m Trial 6 finished with value: 0.0741416679567004 and parameters: {'group_size_days': 73, 'within_decay_rate': 0.9, 'group_decay_rate': 0.4, 'nearest_neighbors_num': 400, 'alpha': 0.30000000000000004, 'use_log': True}. Best is trial 3 with value: 0.075011472143333.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 10092/10092 [01:09<00:00, 144.90it/s]
100%|██████████| 9307/9307 [31:46<00:00,  4.88it/s]
[32m[I 2023-04-29 23:44:57,773][0m Trial 7 finished with value: 0.0696559918157874 and parameters: {'group_size_days': 61, 'within_decay_rate': 0.4, 'group_decay_rate': 0.1, 'nearest_neighbors_num': 425, 'alpha': 0.1, 'use_log': False}. Best is trial 3 with value: 0.075011472143333.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 10092/10092 [01:08<00:00, 147.52it/s]
100%|██████████| 9307/9307 [20:59<00:00,  7.39it/s]
[32m[I 2023-04-30 00:07:24,646][0m Trial 8 finished with value: 0.0750488417336101 and parameters: {'group_size_days': 361, 'within_decay_rate': 0.5, 'group_decay_rate': 0.9, 'nearest_neighbors_num': 150, 'alpha': 0.6000000000000001, 'use_log': True}. Best is trial 8 with value: 0.0750488417336101.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 10092/10092 [01:10<00:00, 142.54it/s]
100%|██████████| 9307/9307 [23:19<00:00,  6.65it/s]
[32m[I 2023-04-30 00:32:13,392][0m Trial 9 finished with value: 0.063607298142758 and parameters: {'group_size_days': 216, 'within_decay_rate': 0.1, 'group_decay_rate': 0.4, 'nearest_neighbors_num': 50, 'alpha': 0.30000000000000004, 'use_log': False}. Best is trial 8 with value: 0.0750488417336101.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 10092/10092 [01:11<00:00, 141.55it/s]
100%|██████████| 9307/9307 [22:36<00:00,  6.86it/s]
[32m[I 2023-04-30 00:56:20,794][0m Trial 10 finished with value: 0.06843755632142837 and parameters: {'group_size_days': 346, 'within_decay_rate': 1.0, 'group_decay_rate': 1.0, 'nearest_neighbors_num': 175, 'alpha': 0.7000000000000001, 'use_log': True}. Best is trial 8 with value: 0.0750488417336101.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 10092/10092 [01:09<00:00, 145.33it/s]
100%|██████████| 9307/9307 [25:08<00:00,  6.17it/s]
[32m[I 2023-04-30 01:22:57,734][0m Trial 11 finished with value: 0.0747768699628456 and parameters: {'group_size_days': 9, 'within_decay_rate': 0.5, 'group_decay_rate': 0.6, 'nearest_neighbors_num': 200, 'alpha': 0.4, 'use_log': True}. Best is trial 8 with value: 0.0750488417336101.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 10092/10092 [01:10<00:00, 142.71it/s]
100%|██████████| 9307/9307 [27:18<00:00,  5.68it/s]
[32m[I 2023-04-30 01:51:46,447][0m Trial 12 finished with value: 0.07522178650053729 and parameters: {'group_size_days': 150, 'within_decay_rate': 0.4, 'group_decay_rate': 0.7000000000000001, 'nearest_neighbors_num': 275, 'alpha': 0.7000000000000001, 'use_log': True}. Best is trial 12 with value: 0.07522178650053729.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 10092/10092 [01:10<00:00, 143.91it/s]
100%|██████████| 9307/9307 [27:15<00:00,  5.69it/s]
[32m[I 2023-04-30 02:20:31,376][0m Trial 13 finished with value: 0.07542939761002994 and parameters: {'group_size_days': 151, 'within_decay_rate': 0.6, 'group_decay_rate': 0.5, 'nearest_neighbors_num': 300, 'alpha': 0.8, 'use_log': True}. Best is trial 13 with value: 0.07542939761002994.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 10092/10092 [01:11<00:00, 141.79it/s]
100%|██████████| 9307/9307 [27:17<00:00,  5.68it/s]
[32m[I 2023-04-30 02:49:21,358][0m Trial 14 finished with value: 0.07494594173969953 and parameters: {'group_size_days': 141, 'within_decay_rate': 0.7000000000000001, 'group_decay_rate': 0.4, 'nearest_neighbors_num': 300, 'alpha': 0.8, 'use_log': True}. Best is trial 13 with value: 0.07542939761002994.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 10092/10092 [01:11<00:00, 141.47it/s]
100%|██████████| 9307/9307 [32:22<00:00,  4.79it/s]
[32m[I 2023-04-30 03:23:14,732][0m Trial 15 finished with value: 0.07562455779004285 and parameters: {'group_size_days': 160, 'within_decay_rate': 0.30000000000000004, 'group_decay_rate': 0.5, 'nearest_neighbors_num': 500, 'alpha': 0.8, 'use_log': True}. Best is trial 15 with value: 0.07562455779004285.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 10092/10092 [01:09<00:00, 144.41it/s]
100%|██████████| 9307/9307 [32:31<00:00,  4.77it/s]
[32m[I 2023-04-30 03:57:14,599][0m Trial 16 finished with value: 0.07562455779004285 and parameters: {'group_size_days': 176, 'within_decay_rate': 0.30000000000000004, 'group_decay_rate': 0.30000000000000004, 'nearest_neighbors_num': 500, 'alpha': 0.8, 'use_log': True}. Best is trial 15 with value: 0.07562455779004285.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 10092/10092 [01:08<00:00, 148.35it/s]
100%|██████████| 9307/9307 [32:33<00:00,  4.77it/s]
[32m[I 2023-04-30 04:31:14,827][0m Trial 17 finished with value: 0.07570213983766064 and parameters: {'group_size_days': 198, 'within_decay_rate': 0.30000000000000004, 'group_decay_rate': 0.2, 'nearest_neighbors_num': 475, 'alpha': 0.9, 'use_log': True}. Best is trial 17 with value: 0.07570213983766064.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 10092/10092 [01:09<00:00, 144.60it/s]
100%|██████████| 9307/9307 [36:56<00:00,  4.20it/s]
[32m[I 2023-04-30 05:09:40,330][0m Trial 18 finished with value: 0.07444575656979122 and parameters: {'group_size_days': 213, 'within_decay_rate': 0.30000000000000004, 'group_decay_rate': 0.2, 'nearest_neighbors_num': 500, 'alpha': 0.9, 'use_log': False}. Best is trial 17 with value: 0.07570213983766064.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 10092/10092 [01:18<00:00, 127.79it/s]
100%|██████████| 9307/9307 [34:53<00:00,  4.44it/s]
[32m[I 2023-04-30 05:46:15,949][0m Trial 19 finished with value: 0.015318695366841066 and parameters: {'group_size_days': 115, 'within_decay_rate': 0.30000000000000004, 'group_decay_rate': 0.2, 'nearest_neighbors_num': 450, 'alpha': 0.0, 'use_log': True}. Best is trial 17 with value: 0.07570213983766064.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 10092/10092 [01:18<00:00, 128.52it/s]
100%|██████████| 9307/9307 [34:30<00:00,  4.49it/s]
[32m[I 2023-04-30 06:22:27,641][0m Trial 20 finished with value: 0.07510070831765085 and parameters: {'group_size_days': 241, 'within_decay_rate': 0.2, 'group_decay_rate': 0.5, 'nearest_neighbors_num': 450, 'alpha': 1.0, 'use_log': True}. Best is trial 17 with value: 0.07570213983766064.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 10092/10092 [01:18<00:00, 128.43it/s]
100%|██████████| 9307/9307 [36:49<00:00,  4.21it/s]
[32m[I 2023-04-30 07:00:55,832][0m Trial 21 finished with value: 0.07562455779004285 and parameters: {'group_size_days': 180, 'within_decay_rate': 0.30000000000000004, 'group_decay_rate': 0.30000000000000004, 'nearest_neighbors_num': 500, 'alpha': 0.8, 'use_log': True}. Best is trial 17 with value: 0.07570213983766064.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 10092/10092 [01:22<00:00, 122.69it/s]
 39%|███▉      | 3609/9307 [14:04<26:20,  3.60it/s]

Test:

In [8]:
trainer = NBRTrainer(
    corpus=corpus,
    max_epochs=None,
    topk=10,
    early_stop_num=None
)
params = {
    "model": TIFUKNNTimeDays(
        item_num=corpus.n_items,
        user_num=corpus.n_users,
        group_size_days=study.best_params["group_size_days"],
        within_decay_rate=study.best_params["within_decay_rate"],
        group_decay_rate=study.best_params["group_decay_rate"],
        nearest_neighbors_num=study.best_params["nearest_neighbors_num"],
        alpha=study.best_params["alpha"],
        use_log=study.best_params["use_log"],
        corpus=corpus
    )
}

trainer.init_hyperparams(**params)

train dataset preparing...


100%|██████████| 10092/10092 [00:30<00:00, 326.20it/s]


dev dataset preparing...


100%|██████████| 9307/9307 [00:00<00:00, 39460.79it/s]


test dataset preparing...


100%|██████████| 9307/9307 [00:00<00:00, 32211.14it/s]


TIFUKNNTimeDays fitting...


100%|██████████| 10092/10092 [00:44<00:00, 225.52it/s]


In [9]:
trainer.evaluate(mode="test")

100%|██████████| 9307/9307 [17:22<00:00,  8.93it/s]


{'precision': 0.007832814010959494,
 'recall': 0.07634038895455032,
 'ndcg': 0.05425719248855343}

# Dunnhumby

Fix seed:

In [3]:
seed = 10
torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)

Read interactions data (filter users with less than 5 transactions, high purchase frequency and one-day users and items with less than 10 transactions). Train dataset - all baskets except the last two, validation dataset - the last but one basket, test dataset - the last basket:

In [4]:
corpus_path = "./data/"
dataset_name = "dunnhumby"

preprocessor = Preprocess(corpus_path, dataset_name)
preprocessor.load_data(5, 10, filt=True)
save_split(corpus_path, dataset_name, preprocessor)

Before preprocessing: #users = 2500, #items = 92339, #clicks = 2595370 (#illegal records = 0)
After preprocessing: #users = 2358, #items = 26756, #clicks = 1976796
Saving dataset in ./data//data_dunnhumby/...


In [5]:
corpus = Corpus(corpus_path, dataset_name)
corpus.load_data()

Tune hyperparams on validation dataset:

In [6]:
trainer = NBRTrainer(
    corpus=corpus,
    max_epochs=None,
    topk=10,
    early_stop_num=None
)

train dataset preparing...


100%|██████████| 2358/2358 [00:14<00:00, 167.99it/s]


dev dataset preparing...


100%|██████████| 2357/2357 [00:13<00:00, 169.00it/s]


test dataset preparing...


100%|██████████| 2357/2357 [00:13<00:00, 168.47it/s]


In [7]:
def objective(trial):
    params = {
        "model": TIFUKNNTimeDays(
            item_num=corpus.n_items,
            user_num=corpus.n_users,
            group_size_days=trial.suggest_int("group_size_days", 1, 365),
            within_decay_rate=trial.suggest_float("within_decay_rate", 0.1, 1.0, step=0.1),
            group_decay_rate=trial.suggest_float("group_decay_rate", 0.1, 1.0, step=0.1),
            nearest_neighbors_num=trial.suggest_int("nearest_neighbors_num", low=25, high=500, step=25),
            alpha=trial.suggest_float("alpha", 0.0, 1.0, step=0.1),
            use_log = trial.suggest_categorical("use_log", [True, False]),
            corpus=corpus
        )
    }

    trainer.init_hyperparams(**params)
    metrics = trainer.evaluate(mode="dev")
    score = metrics["ndcg"]
    return score

In [8]:
sampler = optuna.samplers.TPESampler(seed=seed)
study = optuna.create_study(direction="maximize", sampler=sampler)
study.optimize(objective, n_trials=25)

[32m[I 2023-04-29 13:00:14,299][0m A new study created in memory with name: no-name-979a2065-9e76-4da3-a9ea-e10f803b01c2[0m


TIFUKNNTimeDays fitting...


100%|██████████| 2358/2358 [07:46<00:00,  5.06it/s]
100%|██████████| 2357/2357 [05:06<00:00,  7.68it/s]
[32m[I 2023-04-29 13:13:15,930][0m Trial 0 finished with value: 0.10244781767325224 and parameters: {'group_size_days': 282, 'within_decay_rate': 0.1, 'group_decay_rate': 0.7000000000000001, 'nearest_neighbors_num': 375, 'alpha': 0.5, 'use_log': True}. Best is trial 0 with value: 0.10244781767325224.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 2358/2358 [07:45<00:00,  5.06it/s]
100%|██████████| 2357/2357 [05:04<00:00,  7.73it/s]
[32m[I 2023-04-29 13:26:14,502][0m Trial 1 finished with value: 0.09752797557074781 and parameters: {'group_size_days': 278, 'within_decay_rate': 0.2, 'group_decay_rate': 0.1, 'nearest_neighbors_num': 350, 'alpha': 1.0, 'use_log': False}. Best is trial 0 with value: 0.10244781767325224.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 2358/2358 [08:01<00:00,  4.89it/s]
100%|██████████| 2357/2357 [03:07<00:00, 12.54it/s]
[32m[I 2023-04-29 13:37:33,719][0m Trial 2 finished with value: 0.12786320823521036 and parameters: {'group_size_days': 297, 'within_decay_rate': 0.7000000000000001, 'group_decay_rate': 0.8, 'nearest_neighbors_num': 150, 'alpha': 1.0, 'use_log': True}. Best is trial 2 with value: 0.12786320823521036.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 2358/2358 [07:55<00:00,  4.96it/s]
100%|██████████| 2357/2357 [03:44<00:00, 10.52it/s]
[32m[I 2023-04-29 13:49:19,292][0m Trial 3 finished with value: 0.1450757981540146 and parameters: {'group_size_days': 52, 'within_decay_rate': 0.4, 'group_decay_rate': 0.7000000000000001, 'nearest_neighbors_num': 225, 'alpha': 0.4, 'use_log': True}. Best is trial 3 with value: 0.1450757981540146.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 2358/2358 [08:13<00:00,  4.78it/s]
100%|██████████| 2357/2357 [04:27<00:00,  8.82it/s]
[32m[I 2023-04-29 14:02:06,884][0m Trial 4 finished with value: 0.14793432737615742 and parameters: {'group_size_days': 238, 'within_decay_rate': 0.7000000000000001, 'group_decay_rate': 0.9, 'nearest_neighbors_num': 275, 'alpha': 0.9, 'use_log': True}. Best is trial 4 with value: 0.14793432737615742.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 2358/2358 [07:46<00:00,  5.05it/s]
100%|██████████| 2357/2357 [02:35<00:00, 15.15it/s]
[32m[I 2023-04-29 14:12:37,430][0m Trial 5 finished with value: 0.09183445530510459 and parameters: {'group_size_days': 110, 'within_decay_rate': 0.2, 'group_decay_rate': 0.9, 'nearest_neighbors_num': 25, 'alpha': 0.6000000000000001, 'use_log': False}. Best is trial 4 with value: 0.14793432737615742.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 2358/2358 [08:13<00:00,  4.78it/s]
100%|██████████| 2357/2357 [05:42<00:00,  6.88it/s]
[32m[I 2023-04-29 14:26:41,761][0m Trial 6 finished with value: 0.16284213604050088 and parameters: {'group_size_days': 73, 'within_decay_rate': 0.9, 'group_decay_rate': 0.4, 'nearest_neighbors_num': 400, 'alpha': 0.30000000000000004, 'use_log': True}. Best is trial 6 with value: 0.16284213604050088.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 2358/2358 [07:50<00:00,  5.02it/s]
100%|██████████| 2357/2357 [05:55<00:00,  6.64it/s]
[32m[I 2023-04-29 14:40:34,558][0m Trial 7 finished with value: 0.09626758524106753 and parameters: {'group_size_days': 61, 'within_decay_rate': 0.4, 'group_decay_rate': 0.1, 'nearest_neighbors_num': 425, 'alpha': 0.1, 'use_log': False}. Best is trial 6 with value: 0.16284213604050088.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 2358/2358 [08:07<00:00,  4.83it/s]
100%|██████████| 2357/2357 [03:06<00:00, 12.64it/s]
[32m[I 2023-04-29 14:51:56,029][0m Trial 8 finished with value: 0.14648294405977244 and parameters: {'group_size_days': 361, 'within_decay_rate': 0.5, 'group_decay_rate': 0.9, 'nearest_neighbors_num': 150, 'alpha': 0.6000000000000001, 'use_log': True}. Best is trial 6 with value: 0.16284213604050088.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 2358/2358 [07:49<00:00,  5.03it/s]
100%|██████████| 2357/2357 [02:41<00:00, 14.61it/s]
[32m[I 2023-04-29 15:02:34,748][0m Trial 9 finished with value: 0.0851127889571306 and parameters: {'group_size_days': 216, 'within_decay_rate': 0.1, 'group_decay_rate': 0.4, 'nearest_neighbors_num': 50, 'alpha': 0.30000000000000004, 'use_log': False}. Best is trial 6 with value: 0.16284213604050088.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 2358/2358 [08:16<00:00,  4.75it/s]
100%|██████████| 2357/2357 [06:07<00:00,  6.41it/s]
[32m[I 2023-04-29 15:17:13,526][0m Trial 10 finished with value: 0.11285031365714701 and parameters: {'group_size_days': 139, 'within_decay_rate': 1.0, 'group_decay_rate': 0.4, 'nearest_neighbors_num': 450, 'alpha': 0.0, 'use_log': True}. Best is trial 6 with value: 0.16284213604050088.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 2358/2358 [08:15<00:00,  4.76it/s]
100%|██████████| 2357/2357 [04:48<00:00,  8.16it/s]
[32m[I 2023-04-29 15:30:23,791][0m Trial 11 finished with value: 0.12878159788913204 and parameters: {'group_size_days': 9, 'within_decay_rate': 0.8, 'group_decay_rate': 0.5, 'nearest_neighbors_num': 300, 'alpha': 0.8, 'use_log': True}. Best is trial 6 with value: 0.16284213604050088.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 2358/2358 [08:17<00:00,  4.74it/s]
100%|██████████| 2357/2357 [06:34<00:00,  5.97it/s]
[32m[I 2023-04-29 15:45:22,003][0m Trial 12 finished with value: 0.14978445501781887 and parameters: {'group_size_days': 197, 'within_decay_rate': 1.0, 'group_decay_rate': 0.30000000000000004, 'nearest_neighbors_num': 500, 'alpha': 0.2, 'use_log': True}. Best is trial 6 with value: 0.16284213604050088.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 2358/2358 [08:01<00:00,  4.89it/s]
100%|██████████| 2357/2357 [06:38<00:00,  5.91it/s]
[32m[I 2023-04-29 16:00:08,604][0m Trial 13 finished with value: 0.15098328833174163 and parameters: {'group_size_days': 156, 'within_decay_rate': 1.0, 'group_decay_rate': 0.30000000000000004, 'nearest_neighbors_num': 500, 'alpha': 0.2, 'use_log': True}. Best is trial 6 with value: 0.16284213604050088.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 2358/2358 [08:25<00:00,  4.66it/s]
100%|██████████| 2357/2357 [06:29<00:00,  6.05it/s]
[32m[I 2023-04-29 16:15:11,123][0m Trial 14 finished with value: 0.15477669048356776 and parameters: {'group_size_days': 141, 'within_decay_rate': 0.9, 'group_decay_rate': 0.30000000000000004, 'nearest_neighbors_num': 475, 'alpha': 0.2, 'use_log': True}. Best is trial 6 with value: 0.16284213604050088.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 2358/2358 [08:06<00:00,  4.84it/s]
100%|██████████| 2357/2357 [05:47<00:00,  6.78it/s]
[32m[I 2023-04-29 16:29:11,706][0m Trial 15 finished with value: 0.11627823716458316 and parameters: {'group_size_days': 95, 'within_decay_rate': 0.8, 'group_decay_rate': 0.2, 'nearest_neighbors_num': 400, 'alpha': 0.0, 'use_log': True}. Best is trial 6 with value: 0.16284213604050088.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 2358/2358 [08:14<00:00,  4.77it/s]
100%|██████████| 2357/2357 [05:17<00:00,  7.42it/s]
[32m[I 2023-04-29 16:42:50,205][0m Trial 16 finished with value: 0.1269226552447571 and parameters: {'group_size_days': 6, 'within_decay_rate': 0.8, 'group_decay_rate': 0.6, 'nearest_neighbors_num': 325, 'alpha': 0.30000000000000004, 'use_log': True}. Best is trial 6 with value: 0.16284213604050088.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 2358/2358 [08:04<00:00,  4.87it/s]
100%|██████████| 2357/2357 [06:08<00:00,  6.40it/s]
[32m[I 2023-04-29 16:57:08,733][0m Trial 17 finished with value: 0.16478425144345862 and parameters: {'group_size_days': 158, 'within_decay_rate': 0.9, 'group_decay_rate': 0.5, 'nearest_neighbors_num': 450, 'alpha': 0.4, 'use_log': True}. Best is trial 17 with value: 0.16478425144345862.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 2358/2358 [08:14<00:00,  4.77it/s]
100%|██████████| 2357/2357 [04:11<00:00,  9.36it/s]
[32m[I 2023-04-29 17:09:42,264][0m Trial 18 finished with value: 0.1089153268398021 and parameters: {'group_size_days': 73, 'within_decay_rate': 0.6, 'group_decay_rate': 0.5, 'nearest_neighbors_num': 225, 'alpha': 0.7000000000000001, 'use_log': False}. Best is trial 17 with value: 0.16478425144345862.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 2358/2358 [08:05<00:00,  4.86it/s]
100%|██████████| 2357/2357 [05:46<00:00,  6.80it/s]
[32m[I 2023-04-29 17:23:45,356][0m Trial 19 finished with value: 0.16205686377039183 and parameters: {'group_size_days': 173, 'within_decay_rate': 0.9, 'group_decay_rate': 0.6, 'nearest_neighbors_num': 400, 'alpha': 0.4, 'use_log': True}. Best is trial 17 with value: 0.16478425144345862.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 2358/2358 [08:19<00:00,  4.72it/s]
100%|██████████| 2357/2357 [06:10<00:00,  6.37it/s]
[32m[I 2023-04-29 17:38:24,225][0m Trial 20 finished with value: 0.15954296490209074 and parameters: {'group_size_days': 120, 'within_decay_rate': 0.6, 'group_decay_rate': 0.4, 'nearest_neighbors_num': 450, 'alpha': 0.5, 'use_log': True}. Best is trial 17 with value: 0.16478425144345862.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 2358/2358 [08:03<00:00,  4.87it/s]
100%|██████████| 2357/2357 [05:47<00:00,  6.78it/s]
[32m[I 2023-04-29 17:52:23,711][0m Trial 21 finished with value: 0.16205686377039183 and parameters: {'group_size_days': 173, 'within_decay_rate': 0.9, 'group_decay_rate': 0.6, 'nearest_neighbors_num': 400, 'alpha': 0.4, 'use_log': True}. Best is trial 17 with value: 0.16478425144345862.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 2358/2358 [08:08<00:00,  4.83it/s]
100%|██████████| 2357/2357 [05:29<00:00,  7.16it/s]
[32m[I 2023-04-29 18:06:09,611][0m Trial 22 finished with value: 0.15809701774436274 and parameters: {'group_size_days': 234, 'within_decay_rate': 0.9, 'group_decay_rate': 0.7000000000000001, 'nearest_neighbors_num': 350, 'alpha': 0.4, 'use_log': True}. Best is trial 17 with value: 0.16478425144345862.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 2358/2358 [08:08<00:00,  4.83it/s]
100%|██████████| 2357/2357 [05:56<00:00,  6.61it/s]
[32m[I 2023-04-29 18:20:22,641][0m Trial 23 finished with value: 0.15990072490388244 and parameters: {'group_size_days': 186, 'within_decay_rate': 0.7000000000000001, 'group_decay_rate': 0.5, 'nearest_neighbors_num': 425, 'alpha': 0.30000000000000004, 'use_log': True}. Best is trial 17 with value: 0.16478425144345862.[0m


TIFUKNNTimeDays fitting...


100%|██████████| 2358/2358 [08:12<00:00,  4.79it/s]
100%|██████████| 2357/2357 [05:35<00:00,  7.02it/s]
[32m[I 2023-04-29 18:34:17,588][0m Trial 24 finished with value: 0.16399709518491712 and parameters: {'group_size_days': 36, 'within_decay_rate': 0.9, 'group_decay_rate': 0.6, 'nearest_neighbors_num': 375, 'alpha': 0.6000000000000001, 'use_log': True}. Best is trial 17 with value: 0.16478425144345862.[0m


Test:

In [9]:
trainer = NBRTrainer(
    corpus=corpus,
    max_epochs=None,
    topk=10,
    early_stop_num=None
)
params = {
    "model": TIFUKNNTimeDays(
        item_num=corpus.n_items,
        user_num=corpus.n_users,
        group_size_days=study.best_params["group_size_days"],
        within_decay_rate=study.best_params["within_decay_rate"],
        group_decay_rate=study.best_params["group_decay_rate"],
        nearest_neighbors_num=study.best_params["nearest_neighbors_num"],
        alpha=study.best_params["alpha"],
        use_log=study.best_params["use_log"],
        corpus=corpus
    )
}

trainer.init_hyperparams(**params)

train dataset preparing...


100%|██████████| 2358/2358 [00:14<00:00, 159.27it/s]


dev dataset preparing...


100%|██████████| 2357/2357 [00:13<00:00, 174.77it/s]


test dataset preparing...


100%|██████████| 2357/2357 [00:15<00:00, 149.19it/s]


TIFUKNNTimeDays fitting...


100%|██████████| 2358/2358 [07:50<00:00,  5.01it/s]


In [10]:
trainer.evaluate(mode="test")

100%|██████████| 2357/2357 [05:50<00:00,  6.73it/s]


{'precision': 0.1161646160373356,
 'recall': 0.17054997430939312,
 'ndcg': 0.1592542980590923}