# Эксперименты на датасете TTRS

Все эксперименты были запущены на NVIDIA Tesla V100 GPU

In [1]:
from typing import Dict
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

In [2]:
import os
import sys
import pickle
import logging
import argparse
import numpy as np
import torch

from helpers import *
from models.general import *
from models.sequential import *
from models.developing import *
from utils import utils

def parse_global_args(parser):
    parser.add_argument('--gpu', type=str, default='0',
                        help='Set CUDA_VISIBLE_DEVICES')
    parser.add_argument('--verbose', type=int, default=logging.INFO,
                        help='Logging Level, 0, 10, ..., 50')
    parser.add_argument('--log_file', type=str, default='',
                        help='Logging file path')
    parser.add_argument('--random_seed', type=int, default=0,
                        help='Random seed of numpy and pytorch.')
    parser.add_argument('--load', type=int, default=0,
                        help='Whether load model and continue to train')
    parser.add_argument('--train', type=int, default=1,
                        help='To train the model or not.')
    parser.add_argument('--regenerate', type=int, default=0,
                        help='Whether to regenerate intermediate files.')
    return parser

In [3]:
def parse_args():

    init_parser = argparse.ArgumentParser(description='Model')
    init_parser.add_argument('--model_name', 
                             type=str, 
                             default=MODEL_NAME,   
                             help='Choose a model to run.')
    init_parser.add_argument('--dataset', 
                             type=str, 
                             default=DATASET,  
                             help='Choose a dataset to run.')
    init_args, init_extras = init_parser.parse_known_args()

    model_name = eval('{0}.{0}'.format(MODEL_NAME))
    reader_name = eval('{0}.{0}'.format(model_name.reader))
    runner_name = eval('{0}.{0}'.format(model_name.runner))

    # # Args
    parser = argparse.ArgumentParser(description='')
    parser = parse_global_args(parser)
    parser = reader_name.parse_data_args(parser)
    parser = runner_name.parse_runner_args(parser)
    parser = model_name.parse_model_args(parser)
    args, extras = parser.parse_known_args()

    # Logging configuration
    log_args = [init_args.model_name, init_args.dataset, str(RANDOM_SEED), str(POOLING)]
    for arg in ['lr', 'l2'] + model_name.extra_log_args:
        log_args.append(arg + '=' + str(eval('args.' + arg)))
    log_file_name = '__'.join(log_args).replace(' ', '__') 
    if args.log_file == '':
        args.log_file = '../log/{}/{}.txt'.format(init_args.model_name, log_file_name)
    if args.model_path == '':
        args.model_path = '../model/{}/{}.pt'.format(init_args.model_name, log_file_name)

    utils.check_dir(args.log_file)
    logging.basicConfig(filename=args.log_file, level=args.verbose)
    logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
    logging.info(init_args)
    
    return args


def parse_args_kda():

    init_parser = argparse.ArgumentParser(description='Model')
    init_parser.add_argument('--model_name', 
                             type=str, 
                             default=MODEL_NAME,   
                             help='Choose a model to run.')
    init_parser.add_argument('--dataset', 
                             type=str, 
                             default=DATASET,  
                             help='Choose a dataset to run.')
    init_parser.add_argument('--rnn_model', 
                             type=str, 
                             default=RNN, # 'lstm'
                             help='Choose a rnn to run.')
    init_args, init_extras = init_parser.parse_known_args()

    model_name = eval('{0}.{0}'.format(MODEL_NAME))
    reader_name = eval('{0}.{0}'.format(model_name.reader))
    runner_name = eval('{0}.{0}'.format(model_name.runner))

    # # Args
    parser = argparse.ArgumentParser(description='')
    parser = parse_global_args(parser)
    parser = reader_name.parse_data_args(parser)
    parser = runner_name.parse_runner_args(parser)
    parser = model_name.parse_model_args(parser)
    args, extras = parser.parse_known_args()

    # Logging configuration
    log_args = [init_args.model_name, init_args.dataset, init_args.rnn_model, str(RANDOM_SEED), str(POOLING)]
    
    for arg in ['lr', 'l2'] + model_name.extra_log_args:
        log_args.append(arg + '=' + str(eval('args.' + arg)))
    log_file_name = '__'.join(log_args).replace(' ', '__') 
    if args.log_file == '':
        args.log_file = '../log/{}/{}.txt'.format(init_args.model_name, log_file_name)
    if args.model_path == '':
        args.model_path = '../model/{}/{}.pt'.format(init_args.model_name, log_file_name)

    utils.check_dir(args.log_file)
    logging.basicConfig(filename=args.log_file, level=args.verbose)
    logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
    logging.info(init_args)
    
    return args

In [4]:
def run_model():
    logging.info('-' * 45 + ' BEGIN: ' + utils.get_time() + ' ' + '-' * 45)
    exclude = ['check_epoch', 'log_file', 'model_path', 'path', 'pin_memory', 'load',
               'regenerate', 'sep', 'train', 'verbose', 'metric', 'test_epoch', 'buffer']
    logging.info(utils.format_arg_str(args, exclude_lst=exclude))

    # Random seed
    np.random.seed(args.random_seed)
    torch.manual_seed(args.random_seed)
    torch.cuda.manual_seed(args.random_seed)
    torch.backends.cudnn.deterministic = True

    # GPU
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    logging.info('GPU available: {}'.format(torch.cuda.is_available()))

    # # Read data
    corpus_path = os.path.join(args.path, args.dataset, model_name.reader + '.pkl')
    if not args.regenerate and os.path.exists(corpus_path):
        logging.info('Load corpus from {}'.format(corpus_path))
        corpus = pickle.load(open(corpus_path, 'rb'))
    else:
        corpus = reader_name(args)
        logging.info('Save corpus to {}'.format(corpus_path))
        pickle.dump(corpus, open(corpus_path, 'wb'))

    # # Define model
    model = model_name(args, corpus)
    logging.info(model)
    model.apply(model.init_weights)
    model.actions_before_train()
    model.to(model.device)

    # Run model
    data_dict = dict()
    for phase in ['train', 'dev', 'test']:
        data_dict[phase] = model_name.Dataset(model, corpus, phase)
    runner = runner_name(args)
    if args.load > 0:
        model.load_model()
    if args.train > 0:
        runner.train(data_dict)
    logging.info(os.linesep + 'Test After Training: ' + runner.print_res(data_dict['test']))

    model.actions_after_train()
    logging.info(os.linesep + '-' * 45 + ' END: ' + utils.get_time() + ' ' + '-' * 45)

### KDA by default

In [None]:
MODEL_NAME  = 'KDA' # 'KDA'
EMB_SIZE    = 40  
LR          = 1e-3
L2          = 1e-6
DATASET     = 'ttrs'   
BATCH_SIZE  = 64  
EPOCH       = 10   0
TOP_K       = '5,10,15'  
VERBOSE     = 20   
NUM_WORKERS = 4    
RANDOM_SEED = 3500   
TIME_SCALAR = (60 * 60 * 24) * 14 # 14 days per interval
CAT_COL     = 'i_category'
POOLING     = 'average'

In [None]:
args = parse_args()
args.batch_size      = BATCH_SIZE
args.dataset         = DATASET        
args.emb_size        = EMB_SIZE
args.epoch           = EPOCH
args.topk            = TOP_K
args.verbose         = VERBOSE
args.eval_batch_size = BATCH_SIZE  
args.num_workers     = NUM_WORKERS
args.random_seed     = RANDOM_SEED
args.time_scalar     = TIME_SCALAR
args.category_col    = CAT_COL   
args.test_all        = 1   
args.pooling         = POOLING

model_name = eval('{0}.{0}'.format(MODEL_NAME))
reader_name = eval('{0}.{0}'.format(model_name.reader))
runner_name = eval('{0}.{0}'.format(model_name.runner))

In [8]:
run_model()



--------------------------------------------- BEGIN: 2022-05-29 19:02:01 ---------------------------------------------

 Arguments       | Values     
 attention_size  | 10        
 batch_size      | 64        
 category_col    | i_category
 dataset         | ttrs      
 dropout         | 0         
 early_stop      | 10        
 emb_size        | 40        
 epoch           | 10        
 eval_batch_size | 64        
 freq_rand       | 0         
 gamma           | -1        
 gpu             | 0         
 history_max     | 20        
 include_attr    | 0         
 include_val     | 1         
 l2              | 0         
 lr              | 0.001     
 n_dft           | 64        
 neg_head_p      | 0.5       
 num_heads       | 1         
 num_layers      | 1         
 num_neg         | 1         
 num_workers     | 4         
 optimizer       | Adam      
 pooling         | average   
 random_seed     | 3500      
 t_scalar        | 60        
 test_all        | 1         
 time_sca

### KDA+LSTM

In [6]:
MODEL_NAME  = 'KDA_RNN' 
EMB_SIZE    = 40  
LR          = 1e-3
L2          = 1e-6
DATASET     = 'ttrs' 
BATCH_SIZE  = 64  
EPOCH       = 10   
TOP_K       = '5,10,15'  
VERBOSE     = 20   
NUM_WORKERS = 4    
RANDOM_SEED = 3500   
TIME_SCALAR = (60 * 60 * 24) * 14  
CAT_COL     = 'i_category'
POOLING     = 'average'
RNN         = 'lstm'

In [7]:
args = parse_args()
args.batch_size      = BATCH_SIZE
args.dataset         = DATASET        
args.emb_size        = EMB_SIZE
args.epoch           = EPOCH
args.topk            = TOP_K
args.verbose         = VERBOSE
args.eval_batch_size = BATCH_SIZE  
args.num_workers     = NUM_WORKERS
args.random_seed     = RANDOM_SEED
args.model_name      = MODEL_NAME     
args.time_scalar     = TIME_SCALAR
args.category_col    = CAT_COL   
args.test_all        = 1  
args.pooling         = POOLING
args.time_max        = 20
args.rnn_model       = 'lstm'

model_name = eval('{0}.{0}'.format(MODEL_NAME))
reader_name = eval('{0}.{0}'.format(model_name.reader))
runner_name = eval('{0}.{0}'.format(model_name.runner))

Namespace(dataset='ttrs', model_name='KDA_RNN')


In [8]:
run_model()



--------------------------------------------- BEGIN: 2022-05-29 22:21:31 ---------------------------------------------

 Arguments       | Values     
 attention_size  | 10        
 batch_size      | 64        
 category_col    | i_category
 dataset         | ttrs      
 dropout         | 0         
 early_stop      | 10        
 emb_size        | 40        
 epoch           | 10        
 eval_batch_size | 64        
 freq_rand       | 0         
 gamma           | -1        
 gpu             | 0         
 history_max     | 20        
 include_attr    | 0         
 include_val     | 1         
 l2              | 0         
 lr              | 0.001     
 model_name      | KDA_RNN   
 n_dft           | 64        
 neg_head_p      | 0.5       
 num_heads       | 1         
 num_layers      | 1         
 num_neg         | 1         
 num_workers     | 4         
 optimizer       | Adam      
 pooling         | average   
 random_seed     | 3500      
 rnn_model       | lstm      
 t_scalar

### KDA+GRU

In [5]:
MODEL_NAME  = 'KDA_RNN' 
EMB_SIZE    = 40  
LR          = 1e-3
L2          = 1e-6
DATASET     = 'ttrs' 
BATCH_SIZE  = 64  
EPOCH       = 10   
TOP_K       = '5,10,15'  
VERBOSE     = 20   
NUM_WORKERS = 4    
RANDOM_SEED = 3500   
TIME_SCALAR = (60 * 60 * 24) * 14  
CAT_COL     = 'i_category'
POOLING     = 'average'
RNN         = 'gru'

In [6]:
args = parse_args()
args.batch_size      = BATCH_SIZE
args.dataset         = DATASET        
args.emb_size        = EMB_SIZE
args.epoch           = EPOCH
args.topk            = TOP_K
args.verbose         = VERBOSE
args.eval_batch_size = BATCH_SIZE  
args.num_workers     = NUM_WORKERS
args.random_seed     = RANDOM_SEED
args.model_name      = MODEL_NAME     
args.time_scalar     = TIME_SCALAR
args.category_col    = CAT_COL   
args.test_all        = 1  
args.pooling         = POOLING
args.time_max        = 20
args.rnn_model       = RNN

model_name = eval('{0}.{0}'.format(MODEL_NAME))
reader_name = eval('{0}.{0}'.format(model_name.reader))
runner_name = eval('{0}.{0}'.format(model_name.runner))

Namespace(dataset='ttrs', model_name='KDA_RNN')


In [7]:
run_model()



--------------------------------------------- BEGIN: 2022-05-30 02:15:46 ---------------------------------------------

 Arguments       | Values     
 attention_size  | 10        
 batch_size      | 64        
 category_col    | i_category
 dataset         | ttrs      
 dropout         | 0         
 early_stop      | 10        
 emb_size        | 40        
 epoch           | 10        
 eval_batch_size | 64        
 freq_rand       | 0         
 gamma           | -1        
 gpu             | 0         
 history_max     | 20        
 include_attr    | 0         
 include_val     | 1         
 l2              | 0         
 lr              | 0.001     
 model_name      | KDA_RNN   
 n_dft           | 64        
 neg_head_p      | 0.5       
 num_heads       | 1         
 num_layers      | 1         
 num_neg         | 1         
 num_workers     | 4         
 optimizer       | Adam      
 pooling         | average   
 random_seed     | 3500      
 rnn_model       | gru       
 t_scalar

### KDA+GRU+MAX

In [5]:
MODEL_NAME  = 'KDA_RNN' 
EMB_SIZE    = 40  
LR          = 1e-3
L2          = 1e-6
DATASET     = 'ttrs' 
BATCH_SIZE  = 64  
EPOCH       = 10   
TOP_K       = '5,10,15'  
VERBOSE     = 20   
NUM_WORKERS = 4    
RANDOM_SEED = 3500   
TIME_SCALAR = (60 * 60 * 24) * 14  
CAT_COL     = 'i_category'
POOLING     = 'max'
RNN         = 'gru'

In [6]:
args = parse_args()
args.batch_size      = BATCH_SIZE
args.dataset         = DATASET        
args.emb_size        = EMB_SIZE
args.epoch           = EPOCH
args.topk            = TOP_K
args.verbose         = VERBOSE
args.eval_batch_size = BATCH_SIZE  
args.num_workers     = NUM_WORKERS
args.random_seed     = RANDOM_SEED
args.model_name      = MODEL_NAME     
args.time_scalar     = TIME_SCALAR
args.category_col    = CAT_COL   
args.test_all        = 1  
args.pooling         = POOLING
args.time_max        = 20
args.rnn_model       = RNN

model_name = eval('{0}.{0}'.format(MODEL_NAME))
reader_name = eval('{0}.{0}'.format(model_name.reader))
runner_name = eval('{0}.{0}'.format(model_name.runner))

Namespace(dataset='ttrs', model_name='KDA_RNN')


In [7]:
run_model()



--------------------------------------------- BEGIN: 2022-05-30 12:33:40 ---------------------------------------------

 Arguments       | Values     
 attention_size  | 10        
 batch_size      | 64        
 category_col    | i_category
 dataset         | ttrs      
 dropout         | 0         
 early_stop      | 10        
 emb_size        | 40        
 epoch           | 10        
 eval_batch_size | 64        
 freq_rand       | 0         
 gamma           | -1        
 gpu             | 0         
 history_max     | 20        
 include_attr    | 0         
 include_val     | 1         
 l2              | 0         
 lr              | 0.001     
 model_name      | KDA_RNN   
 n_dft           | 64        
 neg_head_p      | 0.5       
 num_heads       | 1         
 num_layers      | 1         
 num_neg         | 1         
 num_workers     | 4         
 optimizer       | Adam      
 pooling         | max       
 random_seed     | 3500      
 rnn_model       | gru       
 t_scalar

In [9]:
# For Blending
# Random seed
np.random.seed(args.random_seed)
torch.manual_seed(args.random_seed)
torch.cuda.manual_seed(args.random_seed)
torch.backends.cudnn.deterministic = True

# GPU
os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
logging.info('GPU available: {}'.format(torch.cuda.is_available()))

# # Read data
corpus_path = os.path.join(args.path, args.dataset, model_name.reader + '.pkl')
if not args.regenerate and os.path.exists(corpus_path):
    logging.info('Load corpus from {}'.format(corpus_path))
    corpus = pickle.load(open(corpus_path, 'rb'))
else:
    corpus = reader_name(args)
    logging.info('Save corpus to {}'.format(corpus_path))
    pickle.dump(corpus, open(corpus_path, 'wb'))

# # Define model
model = model_name(args, corpus)
logging.info(model)
model.apply(model.init_weights)
model.actions_before_train()
model.to(model.device)

# # Run model
data_dict = dict()
for phase in ['test']:
    data_dict[phase] = model_name.Dataset(model, corpus, phase)
runner = runner_name(args)
logging.info(os.linesep + 'Загрузка модели')
model.load_model()

runner = runner_name(args)
logging.info(os.linesep + 'Получение предсказаний')
predictions = runner.predict(data_dict['test'])
logging.info(os.linesep + 'Сохранение предсказаний')
pd.DataFrame.from_records(predictions).to_csv(f'{MODEL_NAME}_prob.csv', sep='\t', index=False)

GPU available: True
Load corpus from ../data/ttrs/DFTReader.pkl
#params: 953073
KDA_RNN(
  (user_embeddings): Embedding(20465, 40, padding_idx=0)
  (entity_embeddings): Embedding(1355, 40)
  (relation_embeddings): Embedding(3, 40)
  (relational_dynamic_aggregation): RelationalDynamicAggregation(
    (relation_embeddings): Embedding(3, 40)
    (freq_real): Embedding(3, 33)
    (freq_imag): Embedding(3, 33)
  )
  (attn_head): MultiHeadAttention(
    (q_linear): Linear(in_features=40, out_features=40, bias=False)
    (k_linear): Linear(in_features=40, out_features=40, bias=False)
    (v_linear): Linear(in_features=40, out_features=40, bias=False)
  )
  (W1): Linear(in_features=40, out_features=40, bias=True)
  (W2): Linear(in_features=40, out_features=40, bias=True)
  (dropout_layer): Dropout(p=0, inplace=False)
  (layer_norm): LayerNorm((40,), eps=1e-05, elementwise_affine=True)
  (item_bias): Embedding(1355, 1)
  (emb_dropout): Dropout(p=0.1, inplace=False)
  (rnn): GRU(40, 80, num_laye



### KDA+GRU+SUM

In [5]:
MODEL_NAME  = 'KDA_RNN' 
EMB_SIZE    = 40  
LR          = 1e-3
L2          = 1e-6
DATASET     = 'ttrs' 
BATCH_SIZE  = 64  
EPOCH       = 10   
TOP_K       = '5,10,15'  
VERBOSE     = 20   
NUM_WORKERS = 4    
RANDOM_SEED = 3500   
TIME_SCALAR = (60 * 60 * 24) * 14  
CAT_COL     = 'i_category'
POOLING     = 'sum'
RNN         = 'gru'

In [6]:
args = parse_args_kda()
args.batch_size      = BATCH_SIZE
args.dataset         = DATASET        
args.emb_size        = EMB_SIZE
args.epoch           = EPOCH
args.topk            = TOP_K
args.verbose         = VERBOSE
args.eval_batch_size = BATCH_SIZE  
args.num_workers     = NUM_WORKERS
args.random_seed     = RANDOM_SEED
args.model_name      = MODEL_NAME     
args.time_scalar     = TIME_SCALAR
args.category_col    = CAT_COL   
args.test_all        = 1  
args.pooling         = POOLING
args.time_max        = 20
args.rnn_model       = RNN

model_name = eval('{0}.{0}'.format(MODEL_NAME))
reader_name = eval('{0}.{0}'.format(model_name.reader))
runner_name = eval('{0}.{0}'.format(model_name.runner))

Namespace(dataset='ttrs', model_name='KDA_RNN', rnn_model='gru')


In [7]:
run_model()



--------------------------------------------- BEGIN: 2022-05-30 13:57:14 ---------------------------------------------

 Arguments       | Values     
 attention_size  | 10        
 batch_size      | 64        
 category_col    | i_category
 dataset         | ttrs      
 dropout         | 0         
 early_stop      | 10        
 emb_size        | 40        
 epoch           | 10        
 eval_batch_size | 64        
 freq_rand       | 0         
 gamma           | -1        
 gpu             | 0         
 history_max     | 20        
 include_attr    | 0         
 include_val     | 1         
 l2              | 0         
 lr              | 0.001     
 model_name      | KDA_RNN   
 n_dft           | 64        
 neg_head_p      | 0.5       
 num_heads       | 1         
 num_layers      | 1         
 num_neg         | 1         
 num_workers     | 4         
 optimizer       | Adam      
 pooling         | sum       
 random_seed     | 3500      
 rnn_model       | gru       
 t_scalar

### Blending

In [5]:
slrc_prob = pd.read_csv('SLRCPlus_prob.csv', sep='\t')
kda_prob = pd.read_csv('KDA_RNN_prob.csv', sep='\t')
tsr_prob = pd.read_csv('TiSASRec_prob.csv', sep='\t')

In [6]:
slrc_prob.shape, kda_prob.shape, tsr_prob.shape

((20464, 1355), (20464, 1355), (20464, 1355))

In [52]:
def evaluate_method(predictions: np.ndarray, topk: list, metrics: list) -> Dict[str, float]:
        """
        :param predictions: (-1, n_candidates) shape, the first column is the score for ground-truth item
        :param topk: top-K value list
        :param metrics: metric string list
        :return: a result dict, the keys are metric@topk
        """
        evaluations = dict()
        sort_idx = (-predictions).argsort(axis=1)
        gt_rank = np.argwhere(sort_idx == 0)[:, 1] + 1
        for k in topk:
            hit = (gt_rank <= k)
            for metric in metrics:
                key = '{}@{}'.format(metric, k)
                if metric == 'HR':
                    evaluations[key] = round(hit.mean(), 4)
                elif metric == 'NDCG':
                    evaluations[key] = round((hit / np.log2(gt_rank + 1)).mean(), 4)
                else:
                    raise ValueError('Undefined evaluation metric: {}.'.format(metric))
        return evaluations

In [53]:
mean_prob = np.stack([(slrc_prob.values, kda_prob.values, tsr_prob.values)]).squeeze().mean(0)
mean_prob.shape

(20464, 1355)

### SLRC+

In [5]:
MODEL_NAME  = 'SLRCPlus' 
EMB_SIZE    = 40  
LR          = 1e-3
L2          = 1e-6
DATASET     = 'ttrs'
BATCH_SIZE  = 64  
EPOCH       = 10   
TOP_K       = '5,10,15'  
VERBOSE     = 20   
NUM_WORKERS = 4    
RANDOM_SEED = 3500   
TIME_SCALAR = (60 * 60 * 24) * 14 # 14 days per interval
CAT_COL     = 'i_category'
POOLING     = 'average'

In [6]:
args = parse_args()
args.batch_size      = BATCH_SIZE
args.dataset         = DATASET        
args.emb_size        = EMB_SIZE
args.epoch           = EPOCH
args.topk            = TOP_K
args.verbose         = VERBOSE
args.eval_batch_size = BATCH_SIZE 
args.num_workers     = NUM_WORKERS
args.random_seed     = RANDOM_SEED
args.time_scalar     = TIME_SCALAR
args.category_col    = CAT_COL   
args.test_all        = 1  
args.pooling         = POOLING

Namespace(dataset='ttrs', model_name='SLRCPlus')


In [7]:
args

Namespace(batch_size=64, buffer=1, category_col='i_category', check_epoch=1, dataset='ttrs', dropout=0, early_stop=10, emb_size=40, epoch=10, eval_batch_size=64, gpu='0', history_max=20, include_attr=0, l2=0, load=0, log_file='../log/SLRCPlus/SLRCPlus__ttrs__3500__average__lr=0.001__l2=0__emb_size=64.txt', lr=0.001, metric='NDCG,HR', model_path='../model/SLRCPlus/SLRCPlus__ttrs__3500__average__lr=0.001__l2=0__emb_size=64.pt', num_neg=1, num_workers=4, optimizer='Adam', path='../data/', pin_memory=0, pooling='average', random_seed=3500, regenerate=0, sep='\t', test_all=1, test_epoch=-1, time_scalar=1209600, topk='5,10,15', train=1, verbose=20)

In [8]:
model_name = eval('{0}.{0}'.format(MODEL_NAME))
reader_name = eval('{0}.{0}'.format(model_name.reader))
runner_name = eval('{0}.{0}'.format(model_name.runner))

In [17]:
run_model()



--------------------------------------------- BEGIN: 2022-05-29 19:34:47 ---------------------------------------------

 Arguments       | Values     
 batch_size      | 64        
 category_col    | i_category
 dataset         | ttrs      
 dropout         | 0         
 early_stop      | 10        
 emb_size        | 40        
 epoch           | 10        
 eval_batch_size | 64        
 gpu             | 0         
 history_max     | 20        
 include_attr    | 0         
 l2              | 0         
 lr              | 0.001     
 num_neg         | 1         
 num_workers     | 4         
 optimizer       | Adam      
 pooling         | average   
 random_seed     | 3500      
 test_all        | 1         
 time_scalar     | 1209600   
 topk            | 5,10,15   
GPU available: True
Reading data from "../data/", dataset = "ttrs" 
Counting dataset statistics...
"# user": 20464, "# item": 1354, "# entry": 730393
Appending history info...
Constructing relation triplets...
Item-item

In [9]:
# For Blending
# Random seed
np.random.seed(args.random_seed)
torch.manual_seed(args.random_seed)
torch.cuda.manual_seed(args.random_seed)
torch.backends.cudnn.deterministic = True

# GPU
os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
logging.info('GPU available: {}'.format(torch.cuda.is_available()))

# # Read data
corpus_path = os.path.join(args.path, args.dataset, model_name.reader + '.pkl')
if not args.regenerate and os.path.exists(corpus_path):
    logging.info('Load corpus from {}'.format(corpus_path))
    corpus = pickle.load(open(corpus_path, 'rb'))
else:
    corpus = reader_name(args)
    logging.info('Save corpus to {}'.format(corpus_path))
    pickle.dump(corpus, open(corpus_path, 'wb'))

# # Define model
model = model_name(args, corpus)
logging.info(model)
model.apply(model.init_weights)
model.actions_before_train()
model.to(model.device)

# # Run model
data_dict = dict()
for phase in ['test']:
    data_dict[phase] = model_name.Dataset(model, corpus, phase)
runner = runner_name(args)
logging.info(os.linesep + 'Загрузка модели')
model.load_model()

runner = runner_name(args)
logging.info(os.linesep + 'Получение предсказаний')
predictions = runner.predict(data_dict['test'])
logging.info(os.linesep + 'Сохранение предсказаний')
pd.DataFrame.from_records(predictions).to_csv(f'{MODEL_NAME}_prob.csv', sep='\t', index=False)

GPU available: True
Load corpus from ../data/ttrs/KGReader.pkl
#params: 914946
SLRCPlus(
  (u_embeddings): Embedding(20465, 40)
  (i_embeddings): Embedding(1355, 40)
  (user_bias): Embedding(20465, 1)
  (item_bias): Embedding(1355, 1)
  (alphas): Embedding(1355, 3)
  (pis): Embedding(1355, 3)
  (betas): Embedding(1355, 3)
  (sigmas): Embedding(1355, 3)
  (mus): Embedding(1355, 3)
)

Загрузка модели
Load model from ../model/SLRCPlus/SLRCPlus__ttrs__3500__average__lr=0.001__l2=0__emb_size=64.pt

Получение предсказаний

Сохранение предсказаний




#### TiSASRec

In [5]:
MODEL_NAME  = 'TiSASRec' 
EMB_SIZE    = 40  
LR          = 1e-3
L2          = 1e-6
DATASET     = 'ttrs' #  
BATCH_SIZE  = 64  
EPOCH       = 10   
TOP_K       = '5,10,15'  
VERBOSE     = 20   
NUM_WORKERS = 4    
RANDOM_SEED = 3500   
TIME_SCALAR = (60 * 60 * 24) * 14  
CAT_COL     = 'i_category'
POOLING     = 'average'

In [6]:
args = parse_args()

Namespace(dataset='ttrs', model_name='TiSASRec')


In [7]:
args.batch_size      = BATCH_SIZE
args.dataset         = DATASET        
args.emb_size        = EMB_SIZE
args.epoch           = EPOCH
args.topk            = TOP_K
args.verbose         = VERBOSE
args.eval_batch_size = BATCH_SIZE  
args.num_workers     = NUM_WORKERS
args.random_seed     = RANDOM_SEED
args.model_name      = MODEL_NAME     
args.time_scalar     = TIME_SCALAR
args.category_col    = CAT_COL   
args.test_all        = 1  
args.pooling         = POOLING
args.time_max        = 20
# args.t_scalar        = #??? 

In [8]:
args

Namespace(batch_size=64, buffer=1, category_col='i_category', check_epoch=1, dataset='ttrs', dropout=0, early_stop=10, emb_size=40, epoch=10, eval_batch_size=64, gpu='0', history_max=20, l2=0, load=0, log_file='../log/TiSASRec/TiSASRec__ttrs__3500__average__lr=0.001__l2=0__emb_size=64__num_layers=1__num_heads=4__time_max=512.txt', lr=0.001, metric='NDCG,HR', model_name='TiSASRec', model_path='../model/TiSASRec/TiSASRec__ttrs__3500__average__lr=0.001__l2=0__emb_size=64__num_layers=1__num_heads=4__time_max=512.pt', num_heads=4, num_layers=1, num_neg=1, num_workers=4, optimizer='Adam', path='../data/', pin_memory=0, pooling='average', random_seed=3500, regenerate=0, sep='\t', test_all=1, test_epoch=-1, time_max=20, time_scalar=1209600, topk='5,10,15', train=1, verbose=20)

In [9]:
model_name = eval('{0}.{0}'.format(MODEL_NAME))
reader_name = eval('{0}.{0}'.format(model_name.reader))
runner_name = eval('{0}.{0}'.format(model_name.runner))

In [11]:
run_model()



--------------------------------------------- BEGIN: 2022-05-29 20:09:26 ---------------------------------------------

 Arguments       | Values     
 batch_size      | 64        
 category_col    | i_category
 dataset         | ttrs      
 dropout         | 0         
 early_stop      | 10        
 emb_size        | 40        
 epoch           | 10        
 eval_batch_size | 64        
 gpu             | 0         
 history_max     | 20        
 l2              | 0         
 lr              | 0.001     
 num_heads       | 4         
 num_layers      | 1         
 num_neg         | 1         
 num_workers     | 4         
 optimizer       | Adam      
 pooling         | average   
 random_seed     | 3500      
 test_all        | 1         
 time_max        | 20        
 time_scalar     | 1209600   
 topk            | 5,10,15   
GPU available: True
Reading data from "../data/", dataset = "ttrs" 
Counting dataset statistics...
"# user": 20464, "# item": 1354, "# entry": 730393
Appending

In [10]:
# For Blending
# Random seed
np.random.seed(args.random_seed)
torch.manual_seed(args.random_seed)
torch.cuda.manual_seed(args.random_seed)
torch.backends.cudnn.deterministic = True

# GPU
os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
logging.info('GPU available: {}'.format(torch.cuda.is_available()))

# # Read data
corpus_path = os.path.join(args.path, args.dataset, model_name.reader + '.pkl')
if not args.regenerate and os.path.exists(corpus_path):
    logging.info('Load corpus from {}'.format(corpus_path))
    corpus = pickle.load(open(corpus_path, 'rb'))
else:
    corpus = reader_name(args)
    logging.info('Save corpus to {}'.format(corpus_path))
    pickle.dump(corpus, open(corpus_path, 'wb'))

# # Define model
model = model_name(args, corpus)
logging.info(model)
model.apply(model.init_weights)
model.actions_before_train()
model.to(model.device)

# # Run model
data_dict = dict()
for phase in ['test']:
    data_dict[phase] = model_name.Dataset(model, corpus, phase)
runner = runner_name(args)
logging.info(os.linesep + 'Загрузка модели')
model.load_model()

runner = runner_name(args)
logging.info(os.linesep + 'Получение предсказаний')
predictions = runner.predict(data_dict['test'])
logging.info(os.linesep + 'Сохранение предсказаний')
pd.DataFrame.from_records(predictions).to_csv(f'{MODEL_NAME}_prob.csv', sep='\t', index=False)

GPU available: True
Load corpus from ../data/ttrs/BaseReader.pkl
#params: 65920
TiSASRec(
  (i_embeddings): Embedding(1355, 40)
  (p_k_embeddings): Embedding(21, 40)
  (p_v_embeddings): Embedding(21, 40)
  (t_k_embeddings): Embedding(21, 40)
  (t_v_embeddings): Embedding(21, 40)
  (transformer_block): ModuleList(
    (0): TimeIntervalTransformerLayer(
      (masked_attn_head): TimeIntervalMultiHeadAttention(
        (v_linear): Linear(in_features=40, out_features=40, bias=True)
        (k_linear): Linear(in_features=40, out_features=40, bias=True)
        (q_linear): Linear(in_features=40, out_features=40, bias=True)
      )
      (layer_norm1): LayerNorm((40,), eps=1e-05, elementwise_affine=True)
      (dropout1): Dropout(p=0, inplace=False)
      (linear1): Linear(in_features=40, out_features=40, bias=True)
      (linear2): Linear(in_features=40, out_features=40, bias=True)
      (layer_norm2): LayerNorm((40,), eps=1e-05, elementwise_affine=True)
      (dropout2): Dropout(p=0, inplac



#### SASRec

In [5]:
MODEL_NAME  = 'SASRec' 
EMB_SIZE    = 40  
LR          = 1e-3
L2          = 1e-6
DATASET     = 'ttrs' #  
BATCH_SIZE  = 64  
EPOCH       = 10   
TOP_K       = '5,10,15'  
VERBOSE     = 20   
NUM_WORKERS = 4    
RANDOM_SEED = 3500   
TIME_SCALAR = (60 * 60 * 24) * 14  
CAT_COL     = 'i_category'
POOLING     = 'average'

In [6]:
args = parse_args()
args.batch_size      = BATCH_SIZE
args.dataset         = DATASET        
args.emb_size        = EMB_SIZE
args.epoch           = EPOCH
args.topk            = TOP_K
args.verbose         = VERBOSE
args.eval_batch_size = BATCH_SIZE  
args.num_workers     = NUM_WORKERS
args.random_seed     = RANDOM_SEED
args.model_name      = MODEL_NAME     
args.time_scalar     = TIME_SCALAR
args.category_col    = CAT_COL   
args.test_all        = 1  
args.pooling         = POOLING
args.time_max        = 20

model_name = eval('{0}.{0}'.format(MODEL_NAME))
reader_name = eval('{0}.{0}'.format(model_name.reader))
runner_name = eval('{0}.{0}'.format(model_name.runner))

Namespace(dataset='ttrs', model_name='SASRec')


In [7]:
run_model()



--------------------------------------------- BEGIN: 2022-05-29 20:31:08 ---------------------------------------------

 Arguments       | Values     
 batch_size      | 64        
 category_col    | i_category
 dataset         | ttrs      
 dropout         | 0         
 early_stop      | 10        
 emb_size        | 40        
 epoch           | 10        
 eval_batch_size | 64        
 gpu             | 0         
 history_max     | 20        
 l2              | 0         
 lr              | 0.001     
 model_name      | SASRec    
 num_heads       | 4         
 num_layers      | 1         
 num_neg         | 1         
 num_workers     | 4         
 optimizer       | Adam      
 pooling         | average   
 random_seed     | 3500      
 test_all        | 1         
 time_max        | 20        
 time_scalar     | 1209600   
 topk            | 5,10,15   
GPU available: True
Load corpus from ../data/ttrs/BaseReader.pkl
#params: 63400
SASRec(
  (i_embeddings): Embedding(1355, 40)
  (

#### Caser

In [None]:
MODEL_NAME  = 'Caser' 
EMB_SIZE    = 40  
LR          = 1e-3
L2          = 1e-6
DATASET     = 'ttrs'
BATCH_SIZE  = 64  
EPOCH       = 10   
TOP_K       = '5,10,15'  
VERBOSE     = 20   
NUM_WORKERS = 4    
RANDOM_SEED = 3500   
TIME_SCALAR = (60 * 60 * 24) * 14  
CAT_COL     = 'i_category'
POOLING     = 'average'

In [None]:
args = parse_args()
args.batch_size      = BATCH_SIZE
args.dataset         = DATASET        
args.emb_size        = EMB_SIZE
args.epoch           = EPOCH
args.topk            = TOP_K
args.verbose         = VERBOSE
args.eval_batch_size = BATCH_SIZE  
args.num_workers     = NUM_WORKERS
args.random_seed     = RANDOM_SEED
args.model_name      = MODEL_NAME     
args.time_scalar     = TIME_SCALAR
args.category_col    = CAT_COL   
args.test_all        = 1  
args.pooling         = POOLING
args.time_max        = 20

model_name = eval('{0}.{0}'.format(MODEL_NAME))
reader_name = eval('{0}.{0}'.format(model_name.reader))
runner_name = eval('{0}.{0}'.format(model_name.runner))

In [None]:
run_model()

### Chorus (for Ta_Feng)

_STAGE 1: Learn KG embeddings_

In [5]:
MODEL_NAME  = 'Chorus' 
EMB_SIZE    = 40  
LR          = 1e-3
L2          = 1e-6
DATASET     = 'ta_feng' 
BATCH_SIZE  = 64  
EPOCH       = 10   
TOP_K       = '5,10,15'  
VERBOSE     = 20   
NUM_WORKERS = 4    
RANDOM_SEED = 3500   
TIME_SCALAR = (60 * 60 * 24) * 14  
CAT_COL     = 'i_category'
POOLING     = 'average'

In [6]:
args = parse_args()
args.batch_size      = BATCH_SIZE
args.dataset         = DATASET        
args.emb_size        = EMB_SIZE
args.epoch           = EPOCH
args.topk            = TOP_K
args.verbose         = VERBOSE
args.eval_batch_size = BATCH_SIZE  
args.num_workers     = NUM_WORKERS
args.random_seed     = RANDOM_SEED
args.model_name      = MODEL_NAME     
args.time_scalar     = TIME_SCALAR
args.category_col    = CAT_COL   
args.test_all        = 1  
args.pooling         = POOLING
args.time_max        = 20
args.stage           = 1

model_name = eval('{0}.{0}'.format(MODEL_NAME))
reader_name = eval('{0}.{0}'.format(model_name.reader))
runner_name = eval('{0}.{0}'.format(model_name.runner))

Namespace(dataset='ta_feng', model_name='Chorus')


In [7]:
run_model()



--------------------------------------------- BEGIN: 2022-05-30 17:43:29 ---------------------------------------------

 Arguments       | Values     
 base_method     | BPR       
 batch_size      | 64        
 category_col    | i_category
 dataset         | ta_feng   
 dropout         | 0         
 early_stop      | 10        
 emb_size        | 40        
 epoch           | 10        
 eval_batch_size | 64        
 gpu             | 0         
 history_max     | 20        
 include_attr    | 0         
 l2              | 0         
 lr              | 0.001     
 lr_scale        | 0.1       
 margin          | 1         
 model_name      | Chorus    
 num_neg         | 1         
 num_workers     | 4         
 optimizer       | Adam      
 pooling         | average   
 random_seed     | 3500      
 stage           | 1         
 test_all        | 1         
 time_max        | 20        
 time_scalar     | 1209600   
 topk            | 5,10,15   
GPU available: True
Load corpus from ..

_STAGE 2: Predict_

См. в ноутбуке **exp_ta_feng.ipynb**