In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity="all"

import os
import json
import argparse
import pandas as pd
import numpy as np
import time, datetime
from tqdm import tqdm
from logging import getLogger
import torch

from recbole.quick_start import run_recbole
import wandb

In [2]:
# train load
train = pd.read_csv("/opt/ml/input/data/train/train_ratings.csv")

# indexing save
user2idx = {v:k for k,v in enumerate(sorted(set(train.user)))}
item2idx = {v:k for k,v in enumerate(sorted(set(train.item)))}
uidx2user = {k:v for k,v in enumerate(sorted(set(train.user)))}
iidx2item = {k:v for k,v in enumerate(sorted(set(train.item)))}

In [3]:
train.user = train.user.map(user2idx)
train.item = train.item.map(item2idx)

train.columns=['user_id:token','item_id:token','timestamp:float']

outpath = f"dataset/train_data"
os.makedirs(outpath, exist_ok=True)
# sub_train=train.groupby("user").sample(n=10, random_state=SEED)
# sub_train.shape
train.to_csv(os.path.join(outpath,"train_data.inter"),sep='\t',index=False)

In [4]:
yamldata="""
USER_ID_FIELD: user_id
ITEM_ID_FIELD: item_id
TIME_FIELD: timestamp

load_col:
    inter: [user_id, item_id, timestamp]

show_progress : False
epochs : 30
device : torch.device("cuda" if torch.cuda.is_available() else "cpu")
eval_args:
    split: {'RS': [8, 1, 1]}
    group_by: user
    order: RO
    mode: full
metrics: ['Recall', 'MRR', 'NDCG', 'Hit', 'Precision', 'MAP']
topk: 10
valid_metric: Recall@10

log_wandb : True
wandb_project : Recbole

mlp_hidden_size : [600,400,200]
"""
with open("general.yaml", "w") as f:
    f.write(yamldata)

484

In [5]:
def run(model_name):
    if model_name in [
        "MultiVAE",
        "MultiDAE",
        "MacridVAE",
        "RecVAE",
        "GRU4Rec",
        "NARM",
        "STAMP",
        "NextItNet",
        "TransRec",
        "SASRec",
        "BERT4Rec",
        "SRGNN",
        "GCSAN",
        "GRU4RecF",
        "FOSSIL",
        "SHAN",
        "RepeatNet",
        "HRM",
        "NPE",
    ]:
        parameter_dict = {
            "neg_sampling": None,
        }
        return run_recbole(
            model=model_name,
            dataset='train_data',
            config_file_list=['general.yaml'],
            config_dict=parameter_dict,
        )
    else:
        return run_recbole(
            model=model_name,
            dataset='train_data',
            config_file_list=['general.yaml'],
        )

In [8]:
model_list = ['MultiVAE','NeuMF','MultiDAE','CDAE','RecVAE','EASE','SLIMElastic','ADMMSLIM']
for model_name in model_list:
    print(f"running {model_name}...")
    start = time.time()
    result = run(model_name)
    t = time.time() - start
    print(f"It took {t/60:.2f} mins")
    print(result)

running MultiVAE...


26 Dec 15:50    INFO  ['/opt/conda/lib/python3.8/site-packages/ipykernel_launcher.py', '--ip=127.0.0.1', '--stdin=9018', '--control=9016', '--hb=9015', '--Session.signature_scheme="hmac-sha256"', '--Session.key=b"499b6bdb-a389-4f27-ae92-17c23ffbdec9"', '--shell=9017', '--transport="tcp"', '--iopub=9019', '--f=/opt/ml/.local/share/jupyter/runtime/kernel-v2-34931OI93FR56AwQR.json']
26 Dec 15:50    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = dataset/train_data
checkpoint_dir = saved
show_progress = False
save_dataset = False
dataset_save_path = None
save_dataloaders = False
dataloaders_save_path = None
log_wandb = True

Training Hyper Parameters:
epochs = 30
train_batch_size = 2048
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_

26 Dec 15:51    INFO  epoch 0 training [time: 0.21s, train loss: 17059.0038]
26 Dec 15:52    INFO  epoch 0 evaluating [time: 47.75s, valid_score: 0.077600]
26 Dec 15:52    INFO  valid result: 
recall@10 : 0.0776    mrr@10 : 0.2652    ndcg@10 : 0.1186    hit@10 : 0.5677    precision@10 : 0.0993    map@10 : 0.0525
26 Dec 15:52    INFO  Saving current: saved/MultiVAE-Dec-26-2022_15-51-27.pth
26 Dec 15:52    INFO  epoch 1 training [time: 0.21s, train loss: 16411.2237]
26 Dec 15:53    INFO  epoch 1 evaluating [time: 48.52s, valid_score: 0.077500]
26 Dec 15:53    INFO  valid result: 
recall@10 : 0.0775    mrr@10 : 0.2671    ndcg@10 : 0.1193    hit@10 : 0.5676    precision@10 : 0.0998    map@10 : 0.053
26 Dec 15:53    INFO  epoch 2 training [time: 0.20s, train loss: 16398.4192]
26 Dec 15:53    INFO  epoch 2 evaluating [time: 48.10s, valid_score: 0.080500]
26 Dec 15:53    INFO  valid result: 
recall@10 : 0.0805    mrr@10 : 0.2739    ndcg@10 : 0.1239    hit@10 : 0.5743    precision@10 : 0.1034 

It took 26.35 mins
{'best_valid_score': 0.1223, 'valid_score_bigger': True, 'best_valid_result': OrderedDict([('recall@10', 0.1223), ('mrr@10', 0.3717), ('ndcg@10', 0.1819), ('hit@10', 0.7391), ('precision@10', 0.1537), ('map@10', 0.0866)]), 'test_result': OrderedDict([('recall@10', 0.1365), ('mrr@10', 0.4322), ('ndcg@10', 0.2182), ('hit@10', 0.7603), ('precision@10', 0.1806), ('map@10', 0.1145)])}
running NeuMF...


26 Dec 16:16    INFO  ['/opt/conda/lib/python3.8/site-packages/ipykernel_launcher.py', '--ip=127.0.0.1', '--stdin=9018', '--control=9016', '--hb=9015', '--Session.signature_scheme="hmac-sha256"', '--Session.key=b"499b6bdb-a389-4f27-ae92-17c23ffbdec9"', '--shell=9017', '--transport="tcp"', '--iopub=9019', '--f=/opt/ml/.local/share/jupyter/runtime/kernel-v2-34931OI93FR56AwQR.json']
26 Dec 16:16    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = dataset/train_data
checkpoint_dir = saved
show_progress = False
save_dataset = False
dataset_save_path = None
save_dataloaders = False
dataloaders_save_path = None
log_wandb = True

Training Hyper Parameters:
epochs = 30
train_batch_size = 2048
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_

It took 23.56 mins
{'best_valid_score': 0.1119, 'valid_score_bigger': True, 'best_valid_result': OrderedDict([('recall@10', 0.1119), ('mrr@10', 0.3491), ('ndcg@10', 0.169), ('hit@10', 0.7149), ('precision@10', 0.1448), ('map@10', 0.0787)]), 'test_result': OrderedDict([('recall@10', 0.1245), ('mrr@10', 0.4052), ('ndcg@10', 0.2019), ('hit@10', 0.7391), ('precision@10', 0.1697), ('map@10', 0.1032)])}
running MultiDAE...


26 Dec 16:40    INFO  ['/opt/conda/lib/python3.8/site-packages/ipykernel_launcher.py', '--ip=127.0.0.1', '--stdin=9018', '--control=9016', '--hb=9015', '--Session.signature_scheme="hmac-sha256"', '--Session.key=b"499b6bdb-a389-4f27-ae92-17c23ffbdec9"', '--shell=9017', '--transport="tcp"', '--iopub=9019', '--f=/opt/ml/.local/share/jupyter/runtime/kernel-v2-34931OI93FR56AwQR.json']
26 Dec 16:40    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = dataset/train_data
checkpoint_dir = saved
show_progress = False
save_dataset = False
dataset_save_path = None
save_dataloaders = False
dataloaders_save_path = None
log_wandb = True

Training Hyper Parameters:
epochs = 30
train_batch_size = 2048
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_

It took 27.11 mins
{'best_valid_score': 0.1207, 'valid_score_bigger': True, 'best_valid_result': OrderedDict([('recall@10', 0.1207), ('mrr@10', 0.3699), ('ndcg@10', 0.1803), ('hit@10', 0.7344), ('precision@10', 0.1523), ('map@10', 0.0858)]), 'test_result': OrderedDict([('recall@10', 0.1349), ('mrr@10', 0.4294), ('ndcg@10', 0.2154), ('hit@10', 0.7584), ('precision@10', 0.1778), ('map@10', 0.1125)])}
running CDAE...


26 Dec 17:07    INFO  ['/opt/conda/lib/python3.8/site-packages/ipykernel_launcher.py', '--ip=127.0.0.1', '--stdin=9018', '--control=9016', '--hb=9015', '--Session.signature_scheme="hmac-sha256"', '--Session.key=b"499b6bdb-a389-4f27-ae92-17c23ffbdec9"', '--shell=9017', '--transport="tcp"', '--iopub=9019', '--f=/opt/ml/.local/share/jupyter/runtime/kernel-v2-34931OI93FR56AwQR.json']
26 Dec 17:07    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = dataset/train_data
checkpoint_dir = saved
show_progress = False
save_dataset = False
dataset_save_path = None
save_dataloaders = False
dataloaders_save_path = None
log_wandb = True

Training Hyper Parameters:
epochs = 30
train_batch_size = 2048
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_

It took 21.32 mins
{'best_valid_score': 0.1159, 'valid_score_bigger': True, 'best_valid_result': OrderedDict([('recall@10', 0.1159), ('mrr@10', 0.3736), ('ndcg@10', 0.1789), ('hit@10', 0.7169), ('precision@10', 0.1495), ('map@10', 0.0858)]), 'test_result': OrderedDict([('recall@10', 0.1296), ('mrr@10', 0.4379), ('ndcg@10', 0.2148), ('hit@10', 0.7431), ('precision@10', 0.1751), ('map@10', 0.1133)])}
running RecVAE...


26 Dec 17:28    INFO  ['/opt/conda/lib/python3.8/site-packages/ipykernel_launcher.py', '--ip=127.0.0.1', '--stdin=9018', '--control=9016', '--hb=9015', '--Session.signature_scheme="hmac-sha256"', '--Session.key=b"499b6bdb-a389-4f27-ae92-17c23ffbdec9"', '--shell=9017', '--transport="tcp"', '--iopub=9019', '--f=/opt/ml/.local/share/jupyter/runtime/kernel-v2-34931OI93FR56AwQR.json']
26 Dec 17:28    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = dataset/train_data
checkpoint_dir = saved
show_progress = False
save_dataset = False
dataset_save_path = None
save_dataloaders = False
dataloaders_save_path = None
log_wandb = True

Training Hyper Parameters:
epochs = 30
train_batch_size = 2048
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_

It took 32.12 mins
{'best_valid_score': 0.1457, 'valid_score_bigger': True, 'best_valid_result': OrderedDict([('recall@10', 0.1457), ('mrr@10', 0.4278), ('ndcg@10', 0.2174), ('hit@10', 0.7997), ('precision@10', 0.1822), ('map@10', 0.1084)]), 'test_result': OrderedDict([('recall@10', 0.166), ('mrr@10', 0.5118), ('ndcg@10', 0.2683), ('hit@10', 0.8247), ('precision@10', 0.2192), ('map@10', 0.1496)])}
running EASE...


26 Dec 18:00    INFO  ['/opt/conda/lib/python3.8/site-packages/ipykernel_launcher.py', '--ip=127.0.0.1', '--stdin=9018', '--control=9016', '--hb=9015', '--Session.signature_scheme="hmac-sha256"', '--Session.key=b"499b6bdb-a389-4f27-ae92-17c23ffbdec9"', '--shell=9017', '--transport="tcp"', '--iopub=9019', '--f=/opt/ml/.local/share/jupyter/runtime/kernel-v2-34931OI93FR56AwQR.json']
26 Dec 18:00    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = dataset/train_data
checkpoint_dir = saved
show_progress = False
save_dataset = False
dataset_save_path = None
save_dataloaders = False
dataloaders_save_path = None
log_wandb = True

Training Hyper Parameters:
epochs = 30
train_batch_size = 2048
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_

It took 2.99 mins
{'best_valid_score': 0.1644, 'valid_score_bigger': True, 'best_valid_result': OrderedDict([('recall@10', 0.1644), ('mrr@10', 0.4709), ('ndcg@10', 0.2437), ('hit@10', 0.8396), ('precision@10', 0.2025), ('map@10', 0.1248)]), 'test_result': OrderedDict([('recall@10', 0.1886), ('mrr@10', 0.5723), ('ndcg@10', 0.3056), ('hit@10', 0.8628), ('precision@10', 0.2459), ('map@10', 0.1764)])}
running SLIMElastic...


26 Dec 18:03    INFO  ['/opt/conda/lib/python3.8/site-packages/ipykernel_launcher.py', '--ip=127.0.0.1', '--stdin=9018', '--control=9016', '--hb=9015', '--Session.signature_scheme="hmac-sha256"', '--Session.key=b"499b6bdb-a389-4f27-ae92-17c23ffbdec9"', '--shell=9017', '--transport="tcp"', '--iopub=9019', '--f=/opt/ml/.local/share/jupyter/runtime/kernel-v2-34931OI93FR56AwQR.json']
26 Dec 18:03    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = dataset/train_data
checkpoint_dir = saved
show_progress = False
save_dataset = False
dataset_save_path = None
save_dataloaders = False
dataloaders_save_path = None
log_wandb = True

Training Hyper Parameters:
epochs = 30
train_batch_size = 2048
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_

It took 112.28 mins
{'best_valid_score': 0.1493, 'valid_score_bigger': True, 'best_valid_result': OrderedDict([('recall@10', 0.1493), ('mrr@10', 0.4546), ('ndcg@10', 0.2263), ('hit@10', 0.8078), ('precision@10', 0.1852), ('map@10', 0.1145)]), 'test_result': OrderedDict([('recall@10', 0.1699), ('mrr@10', 0.5464), ('ndcg@10', 0.2797), ('hit@10', 0.833), ('precision@10', 0.2215), ('map@10', 0.1579)])}
running ADMMSLIM...


26 Dec 19:56    INFO  ['/opt/conda/lib/python3.8/site-packages/ipykernel_launcher.py', '--ip=127.0.0.1', '--stdin=9018', '--control=9016', '--hb=9015', '--Session.signature_scheme="hmac-sha256"', '--Session.key=b"499b6bdb-a389-4f27-ae92-17c23ffbdec9"', '--shell=9017', '--transport="tcp"', '--iopub=9019', '--f=/opt/ml/.local/share/jupyter/runtime/kernel-v2-34931OI93FR56AwQR.json']
26 Dec 19:56    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = dataset/train_data
checkpoint_dir = saved
show_progress = False
save_dataset = False
dataset_save_path = None
save_dataloaders = False
dataloaders_save_path = None
log_wandb = True

Training Hyper Parameters:
epochs = 30
train_batch_size = 2048
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_

It took 19.62 mins
{'best_valid_score': 0.1649, 'valid_score_bigger': True, 'best_valid_result': OrderedDict([('recall@10', 0.1649), ('mrr@10', 0.4762), ('ndcg@10', 0.2473), ('hit@10', 0.8431), ('precision@10', 0.2058), ('map@10', 0.1274)]), 'test_result': OrderedDict([('recall@10', 0.1896), ('mrr@10', 0.5797), ('ndcg@10', 0.3124), ('hit@10', 0.8618), ('precision@10', 0.2517), ('map@10', 0.1833)])}
