In [1]:
import sys
import os
import argparse, json, math
from pathlib import Path
import pandas as pd
import numpy as np
import tensorflow as tf, os
from tqdm import tqdm
tf.get_logger().setLevel('ERROR') # only show error messages


from recommenders.models.sasrec.model import SASREC
from recommenders.models.sasrec.ssept import SSEPT
from recommenders.models.sasrec.sampler import WarpSampler
from recommenders.models.sasrec.util import SASRecDataSet
from recommenders.utils.notebook_utils import store_metadata
from recommenders.utils.timer import Timer


print(f"System version: {sys.version}")
print(f"Tensorflow version: {tf.__version__}")

2025-04-29 18:27:54.614562: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-29 18:27:54.917232: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-04-29 18:27:54.917257: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-04-29 18:27:54.918490: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-29 18:27:55.037617: I tensorflow/core/platform/cpu_feature_g

System version: 3.9.18 | packaged by conda-forge | (main, Dec 23 2023, 16:33:10) 
[GCC 12.3.0]
Tensorflow version: 2.14.0


In [2]:
gpus = tf.config.list_physical_devices('GPU')
assert gpus, "❌ No GPU visible"
print("✅ GPU device:", gpus[0].name)       # should contain “A100”


# faster allocator (TensorFlow >= 2.10)
os.environ["TF_GPU_ALLOCATOR"] = "cuda_malloc_async"

✅ GPU device: /physical_device:GPU:0


2025-04-29 18:27:57.231526: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-04-29 18:27:57.272974: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-04-29 18:27:57.273084: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

In [3]:
num_epochs = 5
batch_size = 512
seed = 100  # Set None for non-deterministic result

# data_dir = os.path.join("tests", "recsys_data", "RecSys", "SASRec-tf2", "data")
DATA_DIR = Path("data")
SEQ_FILE = DATA_DIR / "SASRec_data.txt"    # final tab–separated file
MAP_JSON = DATA_DIR / "id_maps.json"      # save raw→int mappings here
CKPT_DIR = DATA_DIR / "ckpt"

lr = 0.001             # learning rate
maxlen = 50            # maximum sequence length for each user
num_blocks = 2         # number of transformer blocks
hidden_units = 100     # number of units in the attention calculation
num_heads = 1          # number of attention heads
dropout_rate = 0.1     # dropout rate
l2_emb = 0.0           # L2 regularization coefficient
num_neg_test = 100     # number of negative examples per positive example
model_name = 'sasrec'  # 'sasrec' or 'ssept'

In [4]:
dataset = SASRecDataSet(filename=str(SEQ_FILE), col_sep="\t")
dataset.split()                                       # train / valid / test dicts

num_steps = int(len(dataset.user_train) / batch_size)
cc = 0.0
for u in dataset.user_train:
    cc += len(dataset.user_train[u])
print('%g Users and %g items' % (dataset.usernum, dataset.itemnum))
print('average sequence length: %.2f' % (cc / len(dataset.user_train)))

26003 Users and 3738 items
average sequence length: 16.75


In [5]:
sampler = WarpSampler(dataset.user_train,
                      dataset.usernum, dataset.itemnum,
                      batch_size=batch_size,
                      maxlen=maxlen, n_workers=3)

In [6]:

if model_name == 'sasrec':
    model = SASREC(item_num=dataset.itemnum,
                   seq_max_len=maxlen,
                   num_blocks=num_blocks,
                   embedding_dim=hidden_units,
                   attention_dim=hidden_units,
                   attention_num_heads=num_heads,
                   dropout_rate=dropout_rate,
                   conv_dims = [100, 100],
                   l2_reg=l2_emb,
                   num_neg_test=num_neg_test
    )
elif model_name == "ssept":
    model = SSEPT(item_num=dataset.itemnum,
                  user_num=dataset.usernum,
                  seq_max_len=maxlen,
                  num_blocks=num_blocks,
                  embedding_dim=hidden_units,
                  user_embedding_dim=10,
                  item_embedding_dim=hidden_units,
                  attention_dim=hidden_units,
                  attention_num_heads=num_heads,
                  dropout_rate=dropout_rate,
                  conv_dims = [110, 110],
                  l2_reg=l2_emb,
                  num_neg_test=num_neg_test
    )
else:
    print(f"Model-{model_name} not found")

2025-04-29 18:27:57.672763: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-04-29 18:27:57.672910: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-04-29 18:27:57.672975: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

In [7]:

with Timer() as train_time:
    t_test = model.train(dataset, sampler,
                         num_epochs=num_epochs,
                         batch_size=batch_size,
                         lr=lr, val_epoch=6,
                         model_path = str(CKPT_DIR))

print('Time cost for training is {0:.2f} mins'.format(train_time.interval/60.0))

  0%|                                           | 0/50 [00:00<?, ?b/s]2025-04-29 18:28:00.443083: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:442] Loaded cuDNN version 8700
2025-04-29 18:28:00.905528: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7478b0196970 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-04-29 18:28:00.905549: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 3070 Ti Laptop GPU, Compute Capability 8.6
2025-04-29 18:28:00.911015: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-04-29 18:28:00.972121: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
                                                                      


epoch: 5, test (NDCG@10: 0.5737525615761228, HR@10: 0.7513)
Time cost for training is 2.85 mins




In [8]:
store_metadata("ndcg@10", t_test[0])
store_metadata("Hit@10", t_test[1])

In [9]:
# after training:
ckpt = tf.train.Checkpoint(model=model)
ckpt.write(str(CKPT_DIR / "sasrec.ckpt"))


'data/ckpt/sasrec.ckpt'

In [10]:
import json

config = {
    "item_num": dataset.itemnum,
    "seq_max_len": maxlen,
    "num_blocks": num_blocks,
    "embedding_dim": hidden_units,
    "attention_dim": hidden_units,
    "attention_num_heads": num_heads,
    "dropout_rate": dropout_rate,
    "conv_dims": [100, 100],
    "l2_reg": l2_emb,
    "num_neg_test": num_neg_test
}
with open(CKPT_DIR / "sasrec_config.json", "w") as f:
    json.dump(config, f)