In [None]:
!pip install -q recommenders
!pip install -U -q "jupyter_server>=1.11.0"
!pip install -q fastparquet
!pip install -q git+https://github.com/beomso0/custom_SASRec.git

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [44]:
path = './custom_SASRec_example/'

In [4]:
%load_ext autoreload
%autoreload 2

In [45]:
import re
import sys
import os
from tempfile import TemporaryDirectory
import numpy as np
import pandas as pd 
pd.set_option('display.max_columns',None)

from collections import defaultdict
import tensorflow as tf
tf.get_logger().setLevel('ERROR') # only show error messages

from recommenders.utils.timer import Timer
from recommenders.datasets.split_utils import filter_k_core

#customized SASRec methods
from custom_SASRec.custom_model import sas_train, sas_evaluate, sas_predict
from custom_SASRec.custom_util import save_sasrec_model,load_sasrec_model

# Transformer Based Models
from recommenders.models.sasrec.model import SASREC
from recommenders.models.sasrec.ssept import SSEPT

# Sampler for sequential prediction
from recommenders.models.sasrec.sampler import WarpSampler
from recommenders.models.sasrec.util import SASRecDataSet

print("System version: {}".format(sys.version))
print("Tensorflow version: {}".format(tf.__version__))

System version: 3.7.13 (default, Apr 24 2022, 01:04:09) 
[GCC 7.5.0]
Tensorflow version: 2.8.0


# Train test_model

In [50]:
data = SASRecDataSet(filename='new_sas_df.txt', col_sep="\t")

In [51]:
data.split()

In [64]:
num_epochs = 1
batch_size = 256
RANDOM_SEED = 100  # Set None for non-deterministic result

lr = 0.001             # learning rate
maxlen = 50            # maximum sequence length for each user
num_blocks = 6         # number of transformer blocks
hidden_units = 256     # number of units in the attention calculation
num_heads = 2          # number of attention heads
dropout_rate = 0.5     # dropout rate
l2_emb = 0.0           # L2 regularization coefficient

In [53]:
num_steps = int(len(data.user_train) / batch_size)
cc = 0.0
for u in data.user_train:
    cc += len(data.user_train[u])
print('%g Users and %g items' % (data.usernum, data.itemnum))
print('average sequence length: %.2f' % (cc / len(data.user_train)))
print('num_steps: ', num_steps)

365160 Users and 1735 items
average sequence length: 11.06
num_steps:  1426


In [66]:
test_model = SASREC(item_num=data.itemnum,
                   seq_max_len=maxlen,
                   num_blocks=num_blocks,
                   embedding_dim=hidden_units,
                   attention_dim=hidden_units,
                   attention_num_heads=num_heads,
                   dropout_rate=dropout_rate,
                   conv_dims = [256, 256],
                   l2_reg=l2_emb
    )

In [67]:
sampler = WarpSampler(data.user_train, data.usernum, data.itemnum, batch_size=batch_size, maxlen=maxlen, n_workers=5)

In [68]:
sas_train(test_model,data,sampler,num_epochs=num_epochs, batch_size=batch_size, learning_rate=lr, val_epoch=1,target_user_n=1000, target_item_n=-1)

epoch 1 / 1 started---------------------




Evaluating...




epoch: 1, time: 0.0,  test (NDCG@10: 0.1181148505546366, HR@10: 0.205)


In [71]:
ndcg_at5, hr_at5 = sas_evaluate(test_model,data, target_user_n=1000, target_item_n=100,rank_threshold=5)



In [72]:
print(f'NDCG@5: {ndcg_at5}, HR@5: {hr_at5}')

NDCG@5: 0.3479047102577054, HR@5: 0.474


# save and load test_model

In [73]:
save_sasrec_model(test_model, path, exp_name='save_test')

In [74]:
loaded_model = load_sasrec_model(path, exp_name='save_test')

In [75]:
sas_evaluate(loaded_model,data, target_user_n=1000, target_item_n=100,rank_threshold=5)



(0.3580831323142538, 0.486)

**done!** 🏖