## Package Imports and Global Variables

In [12]:
import os

from recommenders.utils.timer import Timer
from recommenders.models.deeprec.deeprec_utils import prepare_hparams
from recommenders.models.deeprec.io.sequential_iterator import SequentialIterator
from recommenders.models.deeprec.models.sequential.sli_rec import SLI_RECModel as SeqModel
from recommenders.datasets.amazon_reviews import download_and_extract, data_preprocessing, _create_vocab

In [8]:
DATA_PATH = "../data/amazon"
YAML_PATH = "../../recommenders/recommenders/models/deeprec/config/sli_rec.yaml"

EPOCHS = 10
BATCH_SIZE = 400
RANDOM_SEED = 42

## Data Loading and Processing

In [3]:
if os.path.exists(DATA_PATH): 
    os.system(f"rm -r {DATA_PATH}")

os.mkdir(DATA_PATH)

In [4]:
# for test
train_file = os.path.join(DATA_PATH, r'train_data')
valid_file = os.path.join(DATA_PATH, r'valid_data')
test_file = os.path.join(DATA_PATH, r'test_data')
user_vocab = os.path.join(DATA_PATH, r'user_vocab.pkl')
item_vocab = os.path.join(DATA_PATH, r'item_vocab.pkl')
cate_vocab = os.path.join(DATA_PATH, r'category_vocab.pkl')
output_file = os.path.join(DATA_PATH, r'output.txt')

reviews_name = 'reviews_Movies_and_TV_5.json'
meta_name = 'meta_Movies_and_TV.json'
reviews_file = os.path.join(DATA_PATH, reviews_name)
meta_file = os.path.join(DATA_PATH, meta_name)
train_num_ngs = 4 # number of negative instances with a positive instance for training
valid_num_ngs = 4 # number of negative instances with a positive instance for validation
test_num_ngs = 9 # number of negative instances with a positive instance for testing
sample_rate = 0.01 # sample a small item set for training and testing here for fast example

input_files = [reviews_file, meta_file, train_file, valid_file, test_file, user_vocab, item_vocab, cate_vocab]

if not os.path.exists(train_file):
    download_and_extract(reviews_name, reviews_file)
    download_and_extract(meta_name, meta_file)
    data_preprocessing(*input_files, sample_rate=sample_rate, valid_num_ngs=valid_num_ngs, test_num_ngs=test_num_ngs)
    #### uncomment this for the NextItNet model, because it does not need to unfold the user history
    # data_preprocessing(*input_files, sample_rate=sample_rate, valid_num_ngs=valid_num_ngs, test_num_ngs=test_num_ngs, is_history_expanding=False)


100%|██████████| 692k/692k [00:32<00:00, 21.2kKB/s] 
100%|██████████| 97.5k/97.5k [00:07<00:00, 12.7kKB/s]


In [5]:
##_create_vocab(train_file, user_vocab, item_vocab, cate_vocab)

In [6]:
input_creator = SequentialIterator

## Model Definition

In [9]:
### NOTE:  
### remember to use `_create_vocab(train_file, user_vocab, item_vocab, cate_vocab)` to generate the user_vocab, item_vocab and cate_vocab files, if you are using your own dataset rather than using our demo Amazon dataset.
hparams = prepare_hparams(YAML_PATH, 
                          embed_l2=0., 
                          layer_l2=0., 
                          learning_rate=0.001,  # set to 0.01 if batch normalization is disable
                          epochs=EPOCHS,
                          batch_size=BATCH_SIZE,
                          show_step=20,
                          MODEL_DIR=os.path.join(DATA_PATH, "model/"),
                          SUMMARIES_DIR=os.path.join(DATA_PATH, "summary/"),
                          user_vocab=user_vocab,
                          item_vocab=item_vocab,
                          cate_vocab=cate_vocab,
                          need_sample=True,
                          train_num_ngs=train_num_ngs, # provides the number of negative instances for each positive instance for loss computation.
            )

In [10]:
model = SeqModel(hparams, input_creator, seed=RANDOM_SEED)

Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Colocations handled automatically by placer.


2022-06-30 14:37:23.966885: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-06-30 14:37:23.966993: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory
2022-06-30 14:37:23.967065: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory
2022-06-30 14:37:23.967136: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcufft.so.10'; dlerror: libcufft.so.10: cannot open shared object file: No such file or directory
2022-06-30 14:37:23.967207: W tensorflow/stream_executor/platform/default/dso_loader.cc:64

## Training and Validation

In [13]:
with Timer() as train_time:
    model = model.fit(train_file, valid_file, valid_num_ngs=valid_num_ngs) 

# valid_num_ngs is the number of negative lines after each positive line in your valid_file 
# we will evaluate the performance of model on valid_file every epoch
print('Time cost for training is {0:.2f} mins'.format(train_time.interval/60.0))

step 20 , total_loss: 1.6102, data_loss: 1.6102
step 40 , total_loss: 1.6100, data_loss: 1.6100
eval valid at epoch 1: auc:0.5108,logloss:0.6953,mean_mrr:0.4616,ndcg@2:0.3334,ndcg@4:0.5197,ndcg@6:0.5935,group_auc:0.5083
INFO:tensorflow:../data/amazon/model/epoch_1.index
INFO:tensorflow:0
INFO:tensorflow:../data/amazon/model/epoch_1.meta
INFO:tensorflow:1900
INFO:tensorflow:../data/amazon/model/epoch_1.data-00000-of-00001
INFO:tensorflow:2500
INFO:tensorflow:../data/amazon/model/best_model.meta
INFO:tensorflow:1900
INFO:tensorflow:../data/amazon/model/best_model.data-00000-of-00001
INFO:tensorflow:2500
INFO:tensorflow:../data/amazon/model/best_model.index
INFO:tensorflow:2500
step 20 , total_loss: 1.5652, data_loss: 1.5652
step 40 , total_loss: 1.5011, data_loss: 1.5011
eval valid at epoch 2: auc:0.5589,logloss:0.6955,mean_mrr:0.4976,ndcg@2:0.3841,ndcg@4:0.555,ndcg@6:0.621,group_auc:0.5484
INFO:tensorflow:../data/amazon/model/epoch_2.index
INFO:tensorflow:0
INFO:tensorflow:../data/amazo