# Sequence modeling for ranking task

# Set up

In [1]:
%load_ext autoreload
%autoreload 2
%load_ext tensorboard

In [2]:
import os
import sys

import lightning as L
import numpy as np
import pandas as pd
import torch
from dotenv import load_dotenv
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.loggers import MLFlowLogger
from loguru import logger
from mlflow.exceptions import MlflowException
from mlflow.models.signature import infer_signature
from pydantic import BaseModel
from torch.utils.data import DataLoader

import mlflow

sys.path.insert(0, "..")

from src.dataset import UserItemBinaryDFDataset as UserItemRatingDFDataset
from src.id_mapper import IDMapper
from src.sequence.inference import SequenceRatingPredictionInferenceWrapper
from src.eval.compare_runs import ModelMetricsComparisonVisualizer
from src.sequence.model import TwoTowerSequenceModel
from src.sequence.trainer import LitSequenceRatingPrediction
from src.sequence.utils import generate_item_sequences
from src.viz import custom_style_plotly

load_dotenv()
custom_style_plotly()



# Controller

In [3]:
# This is a parameter cell used by papermill
max_epochs = 100

In [4]:
class Args(BaseModel):
    testing: bool = False
    author: str = "quy.dinh"
    log_to_mlflow: bool = True
    experiment_name: str = "Retrieve - Binary"
    run_name: str = "003-two-tower-sequence-modeling"
    notebook_persist_dp: str = None
    random_seed: int = 41
    device: str = None

    max_epochs: int = max_epochs
    batch_size: int = 128

    user_col: str = "user_id"
    item_col: str = "parent_asin"
    rating_col: str = "rating"
    timestamp_col: str = "timestamp"

    top_k_retrieve: int = 100
    top_k_rerank: int = 10

    batch_size: int = 128

    embedding_dim: int = 128
    dropout: float = 0.3
    early_stopping_patience: int = 5
    learning_rate: float = 0.001
    l2_reg: float = 1e-5

    mlf_model_name: str = "two_tower_sequence"
    min_roc_auc: float = 0.7

    best_checkpoint_path: str = None

    def init(self):
        self.notebook_persist_dp = os.path.abspath(f"data/{self.run_name}")
        os.makedirs(self.notebook_persist_dp, exist_ok=True)

        if not (mlflow_uri := os.environ.get("MLFLOW_TRACKING_URI")):
            logger.warning(
                "Environment variable MLFLOW_TRACKING_URI is not set. Setting self.log_to_mlflow to false."
            )
            self.log_to_mlflow = False

        if self.log_to_mlflow:
            logger.info(
                f"Setting up MLflow experiment {self.experiment_name} - run {self.run_name}..."
            )
            self._mlf_logger = MLFlowLogger(
                experiment_name=self.experiment_name,
                run_name=self.run_name,
                tracking_uri=mlflow_uri,
                log_model=True,
            )

        if self.device is None:
            self.device = (
                "cuda"
                if torch.cuda.is_available()
                else "mps" if torch.backends.mps.is_available() else "cpu"
            )

        return self


args = Args().init()

print(args.model_dump_json(indent=2))

[32m2025-03-08 21:39:08.921[0m | [1mINFO    [0m | [36m__main__[0m:[36minit[0m:[36m46[0m - [1mSetting up MLflow experiment Retrieve - Binary - run 003-two-tower-sequence-modeling...[0m


{
  "testing": false,
  "author": "quy.dinh",
  "log_to_mlflow": true,
  "experiment_name": "Retrieve - Binary",
  "run_name": "003-two-tower-sequence-modeling",
  "notebook_persist_dp": "/home/dvq/frostmourne/recsys-blog/1-seq-model/notebooks/data/003-two-tower-sequence-modeling",
  "random_seed": 41,
  "device": "cuda",
  "max_epochs": 100,
  "batch_size": 128,
  "user_col": "user_id",
  "item_col": "parent_asin",
  "rating_col": "rating",
  "timestamp_col": "timestamp",
  "top_k_retrieve": 100,
  "top_k_rerank": 10,
  "embedding_dim": 128,
  "dropout": 0.3,
  "early_stopping_patience": 5,
  "learning_rate": 0.001,
  "l2_reg": 0.00001,
  "mlf_model_name": "two_tower_sequence",
  "min_roc_auc": 0.7,
  "best_checkpoint_path": null
}


# Implement

In [5]:
def init_model(n_users, n_items, embedding_dim, dropout):
    model = TwoTowerSequenceModel(
        n_users, n_items, embedding_dim, dropout=dropout
    )
    return model

# Test implementation

In [6]:
embedding_dim = 8
batch_size = 2

# Mock data
user_indices = [0, 0, 1, 2, 2]
item_indices = [0, 1, 2, 3, 4]
timestamps = [0, 1, 2, 3, 4]
ratings = [0, 4, 5, 3, 0]
item_sequences = [
    [-1, -1, 2, 3],
    [-1, -1, 2, 3],
    [-1, -1, 1, 3],
    [-1, -1, 2, 1],
    [-1, -1, 2, 1],
]

n_users = len(set(user_indices))
n_items = len(set(item_indices))

train_df = pd.DataFrame(
    {
        "user_indice": user_indices,
        "item_indice": item_indices,
        args.timestamp_col: timestamps,
        args.rating_col: ratings,
        "item_sequence": item_sequences,
    }
)

model = init_model(n_users, n_items, embedding_dim, args.dropout)

# Example forward pass
model.eval()
user = torch.tensor([0])
item_sequence = torch.tensor([[-1, -1, -1, 0, 1]])
target_item = torch.tensor([2])
predictions = model.predict(user, item_sequence, target_item)
print(predictions)
model.train()

tensor([0.8122], grad_fn=<DivBackward0>)


TwoTowerSequenceModel(
  (item_embedding): Embedding(6, 8, padding_idx=5)
  (user_embedding): Embedding(3, 8)
  (query_fc): Sequential(
    (0): Linear(in_features=16, out_features=8, bias=True)
    (1): BatchNorm1d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.3, inplace=False)
  )
  (candidate_fc): Sequential(
    (0): Linear(in_features=8, out_features=8, bias=True)
    (1): BatchNorm1d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.3, inplace=False)
  )
)

In [7]:
rating_dataset = UserItemRatingDFDataset(
    train_df, "user_indice", "item_indice", args.rating_col, args.timestamp_col
)

train_loader = DataLoader(
    rating_dataset, batch_size=batch_size, shuffle=False, drop_last=True
)

In [8]:
for batch_input in train_loader:
    print(batch_input)

{'user': tensor([0, 0]), 'item': tensor([0, 1]), 'rating': tensor([0., 1.]), 'item_sequence': tensor([[-1, -1,  2,  3],
        [-1, -1,  2,  3]]), 'item_sequence_ts_bucket': tensor([], size=(2, 0), dtype=torch.int64), 'item_feature': tensor([], size=(2, 0))}
{'user': tensor([1, 2]), 'item': tensor([2, 3]), 'rating': tensor([1., 1.]), 'item_sequence': tensor([[-1, -1,  1,  3],
        [-1, -1,  2,  1]]), 'item_sequence_ts_bucket': tensor([], size=(2, 0), dtype=torch.int64), 'item_feature': tensor([], size=(2, 0))}


In [9]:
# model
lit_model = LitSequenceRatingPrediction(model, log_dir=args.notebook_persist_dp)

# train model
trainer = L.Trainer(
    default_root_dir=f"{args.notebook_persist_dp}/test",
    max_epochs=2,
    accelerator=args.device if args.device else "auto",
)
trainer.fit(
    model=lit_model, train_dataloaders=train_loader, val_dataloaders=train_loader
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 SUPER') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name               | Type                   | Params | Mode 
----------------------------------------------------------------------
0 | model              | TwoTowerSequenceModel  | 312    | train
1 | val_roc_auc_metric | BinaryAUROC            | 0      | train
2 | val_pr_auc_metric  | BinaryAveragePrecision | 0      | train
----------------------------------------------------------------------
312       Trainable params
0         Non-trainable params
312       T

Sanity Checking: |                                                                          | 0/? [00:00<?, ?i…

/home/dvq/frostmourne/recsys-blog/1-seq-model/.venv/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.
/home/dvq/frostmourne/recsys-blog/1-seq-model/.venv/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.
/home/dvq/frostmourne/recsys-blog/1-seq-model/.venv/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (2) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |                                                                                 | 0/? [00:00<?, ?i…

Validation: |                                                                               | 0/? [00:00<?, ?i…

Validation: |                                                                               | 0/? [00:00<?, ?i…

`Trainer.fit` stopped: `max_epochs=2` reached.
[32m2025-03-08 21:39:09.460[0m | [1mINFO    [0m | [36msrc.sequence.trainer[0m:[36mon_fit_end[0m:[36m172[0m - [1mLogging classification metrics...[0m
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [10]:
users = torch.tensor([0, 0, 0, 0])
item_sequences = torch.tensor(
    [[-1, -1, 2, 3], [-1, -1, 2, 3], [-1, -1, 1, 3], [-1, -1, 2, 1]]
)
items = torch.tensor([0, 1, 2, 3])
predictions = model.predict(users, item_sequences, items)
print(predictions)

tensor([0.7589, 0.7723, 0.8796, 0.6960], grad_fn=<DivBackward0>)


In [11]:
def create_predict_df(
    train_df,
    val_user_indices,
    val_timestamp,
    rating_col,
    timestamp_col,
    sequence_length=10,
):
    predict_df = pd.DataFrame(
        {
            "user_indice": val_user_indices,
            "item_indice": -1,  # placeholder
            "timestamp": val_timestamp,
            "source": "predict",
        }
    )

    predict_df = (
        pd.concat(
            [
                train_df.loc[lambda df: df[rating_col].gt(0)][
                    ["user_indice", "item_indice", timestamp_col]
                ].assign(source="train"),
                predict_df,
            ],
            axis=0,
        )
        .pipe(
            generate_item_sequences,
            "user_indice",
            "item_indice",
            timestamp_col,
            sequence_length=sequence_length,
            padding=True,
            padding_value=-1,
        )
        .loc[lambda df: df["source"].eq("predict")]
        .assign(item_sequence=lambda df: df["item_sequence"].apply(np.array))
    )

    return predict_df


predict_df = create_predict_df(
    train_df,
    user_indices,
    timestamps[-1],
    args.rating_col,
    args.timestamp_col,
    sequence_length=10,
)

predict_df

Unnamed: 0,user_indice,item_indice,timestamp,source,item_sequence
0,0,-1,4,predict,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, 1]"
1,0,-1,4,predict,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, 1]"
2,1,-1,4,predict,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, 2]"
3,2,-1,4,predict,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, 3]"
4,2,-1,4,predict,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, 3]"


In [12]:
recommendations = model.recommend(
    torch.tensor(predict_df["user_indice"].values),
    torch.tensor(predict_df["item_sequence"].values.tolist()),
    k=2,
    batch_size=4,
)
recommendations


Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at /pytorch/torch/csrc/utils/tensor_new.cpp:254.)



Generating recommendations:   0%|          | 0/2 [00:00<?, ?it/s]

{'user_indice': [0, 0, 0, 0, 1, 1, 2, 2, 2, 2],
 'recommendation': [1, 3, 1, 3, 3, 1, 3, 1, 3, 1],
 'score': [0.8522368669509888,
  0.6080646514892578,
  0.8522368669509888,
  0.6080646514892578,
  0.6066123247146606,
  0.5357617735862732,
  1.086543321609497,
  0.3857460618019104,
  1.0865432024002075,
  0.385746031999588]}

# Prep data

In [13]:
train_df = pd.read_parquet("../data/train_features_neg_df.parquet")
val_df = pd.read_parquet("../data/val_features_neg_df.parquet")
idm_fp = "../data/idm.json"
idm = IDMapper().load(idm_fp)

assert (
    train_df[args.user_col].map(lambda s: idm.get_user_index(s))
    != train_df["user_indice"]
).sum() == 0, "Mismatch IDM"
assert (
    val_df[args.user_col].map(lambda s: idm.get_user_index(s)) != val_df["user_indice"]
).sum() == 0, "Mismatch IDM"

In [14]:
user_indices = train_df["user_indice"].unique()
item_indices = train_df["item_indice"].unique()

logger.info(f"{len(user_indices)=:,.0f}, {len(item_indices)=:,.0f}")

[32m2025-03-08 21:39:09.933[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mlen(user_indices)=19,734, len(item_indices)=7,388[0m


In [15]:
train_df

Unnamed: 0,user_id,parent_asin,rating,timestamp,user_indice,item_indice,item_sequence
0,AH377TAQBGVIUD75XIOSNRRBUMSA,B000OIZSLE,0.0,1603811722410,3520,5441,"[-1.0, -1.0, -1.0, -1.0, 4238.0, 3266.0, 4316...."
1,AGPAMWVXLFFWCBO3J4JNOYBB5CJQ,B01CXE9Q8C,4.0,1520074315327,6133,2684,"[-1.0, -1.0, -1.0, -1.0, -1.0, 2863.0, 5665.0,..."
2,AGSXDUEY3XZJVJSRBQCTAPOSY2NA,1451681755,0.0,1446855213000,15471,3848,"[-1.0, -1.0, -1.0, 2423.0, 5224.0, 6745.0, 360..."
3,AECGNMOCHNIEKROWI6NCZQE7QV3A,B001MSMULG,0.0,1412051625000,1717,4696,"[241.0, 7334.0, 3433.0, 7247.0, 1840.0, 5215.0..."
4,AHJ4X46OBBFQFQEGKO6CYQSL7A6Q,0441016995,0.0,1540233594357,13004,4750,"[3132.0, 4886.0, 3905.0, 2187.0, 3951.0, 6240...."
...,...,...,...,...,...,...,...
388843,AEEAI7QJ6HFCN43V543MOTKNBQOA,B0043M6L22,0.0,1528288422975,5335,7093,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 4528.0, 5..."
388844,AGMWE3EQOAKN467EMLZFXS5FD7FQ,0525577947,0.0,1364849772000,1895,5383,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 5802.0, 6..."
388845,AGBD2QKG2VULRDA4OKNJZUII44JA,B00UXX5BAS,0.0,1313415114000,12423,4075,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 287..."
388846,AGQ3G5TPEQV5AF4UPHKKCPK4C27Q,B00AEDDSZW,4.0,1381858732000,10563,1139,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1...."


# Train

In [16]:
rating_dataset = UserItemRatingDFDataset(
    train_df, "user_indice", "item_indice", args.rating_col, args.timestamp_col
)
val_rating_dataset = UserItemRatingDFDataset(
    val_df, "user_indice", "item_indice", args.rating_col, args.timestamp_col
)

train_loader = DataLoader(
    rating_dataset, batch_size=args.batch_size, shuffle=True, drop_last=True
)
val_loader = DataLoader(
    val_rating_dataset, batch_size=args.batch_size, shuffle=False, drop_last=False
)

In [17]:
n_items = len(item_indices)
n_users = len(user_indices)

model = init_model(n_users, n_items, args.embedding_dim, args.dropout)

#### Predict before train

In [18]:
model.item_embedding

Embedding(7389, 128, padding_idx=7388)

In [19]:
val_df = val_rating_dataset.df
val_df.sample(10)

Unnamed: 0,user_id,parent_asin,rating,timestamp,user_indice,item_indice,item_sequence
1786,AFZTF5FPSVCDSGP26JOH4YEQNCEA,B00413QAAQ,0.0,1654453216826,14276,335,"[-1, -1, 2325, 5456, 4211, 5519, 2377, 949, 62..."
3980,AHLTOYNQMZTHCMH7CQOF7NMLOLZQ,0061537934,0.0,1653774386933,9613,6222,"[3498, 6801, 4053, 6365, 2512, 4793, 230, 6366..."
5929,AFTMTH6CJABHUGAIJIH63XQNHW6Q,B00GS8IRBC,0.0,1629825965677,18857,2127,"[6966, 4566, 4896, 479, 5118, 1286, 1547, 856,..."
4363,AFBTAIIHMEFJ4HMM2SJME53EESXQ,B01FRSZAUO,0.0,1635228280407,12989,4943,"[-1, -1, -1, 6282, 443, 5468, 112, 3338, 4193,..."
4471,AGJ6UBMA4ZOJBZC3YO7YLA3B4QSA,B07XSB35SS,0.0,1640356332345,17496,7211,"[7324, 2138, 3112, 3028, 6484, 3912, 5240, 144..."
7018,AHUSHU2Q4CBYSCW4PVHJIQ2OMY2A,B075LK442Z,1.0,1633619630976,14187,1686,"[219, 2659, 7100, 5629, 4812, 1762, 5291, 244,..."
2812,AGKH2ABQSQB5IEM7E3BFF2PSTH2Q,B00FH1IBYE,1.0,1642118555178,12887,6229,"[5033, 5638, 4391, 1475, 3200, 5680, 6289, 267..."
669,AECDOYKVV7RQUD5BVDQGSYPNGOVQ,B000T0G2DA,1.0,1647129480117,11515,3394,"[2385, 3636, 3169, 3943, 3291, 2812, 1816, 14,..."
1796,AFVGJBPRCY3YGHTDUFXA6EIUNPGQ,B08KH8YT2S,1.0,1629850034499,7451,506,"[1777, 4131, 1788, 3629, 3889, 4528, 512, 2395..."
3398,AGAW6UMSRL2JDTYR5VA4GMSRHU2A,B089GSGGRL,1.0,1657857178162,18194,6039,"[5964, 2551, 1191, 3912, 3061, 1949, 1171, 370..."


In [20]:
user_id = val_df.sample(1)[args.user_col].values[0]
# user_id = "AH4AOFTTDPHPAFAAVFMAF25H2LIQ"
test_df = val_df.loc[lambda df: df[args.user_col].eq(user_id)]
with pd.option_context("display.max_colwidth", None):
    display(test_df)

Unnamed: 0,user_id,parent_asin,rating,timestamp,user_indice,item_indice,item_sequence
2413,AE6F6I77VA3ZOVOFHS3WEL5CXSJA,B00395ZYVS,0.0,1646616227324,11699,4579,"[-1, -1, 7208, 3943, 4223, 6669, 585, 5870, 2671, 3193]"
2676,AE6F6I77VA3ZOVOFHS3WEL5CXSJA,B08JKC299M,1.0,1646616227324,11699,6575,"[-1, -1, 7208, 3943, 4223, 6669, 585, 5870, 2671, 3193]"


In [21]:
test_row = test_df.loc[lambda df: df[args.rating_col].gt(0)].iloc[0]
item_id = test_row[args.item_col]
item_sequence = test_row["item_sequence"]
logger.info(
    f"Test predicting before training with {args.user_col} = {user_id} and {args.item_col} = {item_id}"
)
user_indice = idm.get_user_index(user_id)
item_indice = idm.get_item_index(item_id)
user = torch.tensor([user_indice])
item_sequence = torch.tensor([item_sequence])
item = torch.tensor([item_indice])

model.eval()
model.predict(user, item_sequence, item)
model.train()

[32m2025-03-08 21:39:10.136[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mTest predicting before training with user_id = AE6F6I77VA3ZOVOFHS3WEL5CXSJA and parent_asin = B08JKC299M[0m


TwoTowerSequenceModel(
  (item_embedding): Embedding(7389, 128, padding_idx=7388)
  (user_embedding): Embedding(19734, 128)
  (query_fc): Sequential(
    (0): Linear(in_features=256, out_features=128, bias=True)
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.3, inplace=False)
  )
  (candidate_fc): Sequential(
    (0): Linear(in_features=128, out_features=128, bias=True)
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.3, inplace=False)
  )
)

#### Training loop

##### Overfit 1 batch

In [22]:
early_stopping = EarlyStopping(
    monitor="val_roc_auc", patience=10, mode="max", verbose=False
)

model = init_model(n_users, n_items, args.embedding_dim, dropout=0)
lit_model = LitSequenceRatingPrediction(
    model,
    learning_rate=args.learning_rate,
    l2_reg=0.0,
    log_dir=args.notebook_persist_dp,
    accelerator=args.device,
)

log_dir = f"{args.notebook_persist_dp}/logs/overfit"

# train model
trainer = L.Trainer(
    default_root_dir=log_dir,
    accelerator=args.device if args.device else "auto",
    max_epochs=100,
    overfit_batches=1,
    callbacks=[early_stopping],
)
trainer.fit(
    model=lit_model,
    train_dataloaders=train_loader,
    val_dataloaders=train_loader,
)
logger.info(f"Logs available at {trainer.log_dir}")

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(overfit_batches=1)` was configured so 1 batch will be used.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name               | Type                   | Params | Mode 
----------------------------------------------------------------------
0 | model              | TwoTowerSequenceModel  | 3.5 M  | train
1 | val_roc_auc_metric | BinaryAUROC            | 0      | train
2 | val_pr_auc_metric  | BinaryAveragePrecision | 0      | train
----------------------------------------------------------------------
3.5 M     Trainable params
0         Non-trainable params
3.5 M     Total params
14.087    Total estimated model params size (MB)
15        Modules in train mode
0         Modules in eval mode


Sanity Checking: |                                                                          | 0/? [00:00<?, ?i…


You requested to overfit but enabled val dataloader shuffling. We are turning off the val dataloader shuffling for you.


The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


You requested to overfit but enabled train dataloader shuffling. We are turning off the train dataloader shuffling for you.


The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.



Training: |                                                                                 | 0/? [00:00<?, ?i…

Validation: |                                                                               | 0/? [00:00<?, ?i…

Validation: |                                                                               | 0/? [00:00<?, ?i…

Validation: |                                                                               | 0/? [00:00<?, ?i…

Validation: |                                                                               | 0/? [00:00<?, ?i…

Validation: |                                                                               | 0/? [00:00<?, ?i…

Validation: |                                                                               | 0/? [00:00<?, ?i…

Validation: |                                                                               | 0/? [00:00<?, ?i…

Validation: |                                                                               | 0/? [00:00<?, ?i…

Validation: |                                                                               | 0/? [00:00<?, ?i…

Validation: |                                                                               | 0/? [00:00<?, ?i…

Validation: |                                                                               | 0/? [00:00<?, ?i…

Validation: |                                                                               | 0/? [00:00<?, ?i…

Validation: |                                                                               | 0/? [00:00<?, ?i…

Validation: |                                                                               | 0/? [00:00<?, ?i…

[32m2025-03-08 21:39:11.233[0m | [1mINFO    [0m | [36msrc.sequence.trainer[0m:[36mon_fit_end[0m:[36m172[0m - [1mLogging classification metrics...[0m

Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m2025-03-08 21:39:27.374[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m29[0m - [1mLogs available at /home/dvq/frostmourne/recsys-blog/1-seq-model/notebooks/data/003-two-tower-sequence-modeling/logs/overfit/lightning_logs/version_1[0m


In [23]:
# Need to make sure port 6006 at local is accessible
%tensorboard --logdir $trainer.log_dir

##### Fit on all data

In [24]:
# papermill_description=fit-model
early_stopping = EarlyStopping(
    monitor="val_roc_auc", patience=args.early_stopping_patience, mode="max", verbose=False
)

checkpoint_callback = ModelCheckpoint(
    dirpath=f"{args.notebook_persist_dp}/checkpoints",
    filename="best-checkpoint",
    save_top_k=1,
    monitor="val_roc_auc",
    mode="max",
)

model = init_model(
    n_users,
    n_items,
    args.embedding_dim,
    dropout=args.dropout
)
lit_model = LitSequenceRatingPrediction(
    model,
    learning_rate=args.learning_rate,
    l2_reg=args.l2_reg,
    log_dir=args.notebook_persist_dp,
    evaluate_ranking=True,
    idm=idm,
    args=args,
    accelerator=args.device,
    checkpoint_callback=checkpoint_callback,
)

log_dir = f"{args.notebook_persist_dp}/logs/run"

# train model
trainer = L.Trainer(
    default_root_dir=log_dir,
    max_epochs=args.max_epochs,
    callbacks=[early_stopping, checkpoint_callback],
    accelerator=args.device if args.device else "auto",
    logger=args._mlf_logger if args.log_to_mlflow else None,
)
trainer.fit(
    model=lit_model,
    train_dataloaders=train_loader,
    val_dataloaders=val_loader,
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name               | Type                   | Params | Mode 
----------------------------------------------------------------------
0 | model              | TwoTowerSequenceModel  | 3.5 M  | train
1 | val_roc_auc_metric | BinaryAUROC            | 0      | train
2 | val_pr_auc_metric  | BinaryAveragePrecision | 0      | train
----------------------------------------------------------------------
3.5 M     Trainable params
0         Non-trainable params
3.5 M     Total params
14.087    Total estimated model params size (MB)
15        Modules in train mode
0         Modules in eval mode


Sanity Checking: |                                                                          | 0/? [00:00<?, ?i…


The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.



Training: |                                                                                 | 0/? [00:00<?, ?i…

Validation: |                                                                               | 0/? [00:00<?, ?i…

Validation: |                                                                               | 0/? [00:00<?, ?i…

Validation: |                                                                               | 0/? [00:00<?, ?i…

Validation: |                                                                               | 0/? [00:00<?, ?i…

Validation: |                                                                               | 0/? [00:00<?, ?i…

Validation: |                                                                               | 0/? [00:00<?, ?i…

Validation: |                                                                               | 0/? [00:00<?, ?i…

Validation: |                                                                               | 0/? [00:00<?, ?i…

Validation: |                                                                               | 0/? [00:00<?, ?i…

Validation: |                                                                               | 0/? [00:00<?, ?i…

Validation: |                                                                               | 0/? [00:00<?, ?i…

Validation: |                                                                               | 0/? [00:00<?, ?i…

[32m2025-03-08 21:43:08.670[0m | [1mINFO    [0m | [36msrc.sequence.trainer[0m:[36mon_fit_end[0m:[36m165[0m - [1mLoading best model from /home/dvq/frostmourne/recsys-blog/1-seq-model/notebooks/data/003-two-tower-sequence-modeling/checkpoints/best-checkpoint.ckpt...[0m
[32m2025-03-08 21:43:08.835[0m | [1mINFO    [0m | [36msrc.sequence.trainer[0m:[36mon_fit_end[0m:[36m172[0m - [1mLogging classification metrics...[0m

Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m2025-03-08 21:43:09.958[0m | [1mINFO    [0m | [36msrc.sequence.trainer[0m:[36mon_fit_end[0m:[36m175[0m - [1mLogging ranki

Generating recommendations:   0%|          | 0/434 [00:00<?, ?it/s]

🏃 View run 003-two-tower-sequence-modeling at: http://localhost:5002/#/experiments/3/runs/e71802367e2b45d0889b384c6243d00e
🧪 View experiment at: http://localhost:5002/#/experiments/3


In [25]:
logger.info(
    f"Test predicting after training with {args.user_col} = {user_id} and {args.item_col} = {item_id}"
)
model.eval()
model = model.to(user.device)
model.predict(user, item_sequence, item)
model.train()

[32m2025-03-08 21:43:14.137[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mTest predicting after training with user_id = AE6F6I77VA3ZOVOFHS3WEL5CXSJA and parent_asin = B08JKC299M[0m


TwoTowerSequenceModel(
  (item_embedding): Embedding(7389, 128, padding_idx=7388)
  (user_embedding): Embedding(19734, 128)
  (query_fc): Sequential(
    (0): Linear(in_features=256, out_features=128, bias=True)
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.3, inplace=False)
  )
  (candidate_fc): Sequential(
    (0): Linear(in_features=128, out_features=128, bias=True)
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.3, inplace=False)
  )
)

# Load best checkpoint

In [26]:
logger.info(f"Loading best checkpoint from {checkpoint_callback.best_model_path}...")
args.best_checkpoint_path = checkpoint_callback.best_model_path

best_trainer = LitSequenceRatingPrediction.load_from_checkpoint(
    checkpoint_callback.best_model_path,
    model=init_model(n_users, n_items, args.embedding_dim, dropout=0),
)

[32m2025-03-08 21:43:14.171[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mLoading best checkpoint from /home/dvq/frostmourne/recsys-blog/1-seq-model/notebooks/data/003-two-tower-sequence-modeling/checkpoints/best-checkpoint.ckpt...[0m


In [27]:
best_model = best_trainer.model.to(lit_model.device)

In [28]:
best_model.eval()
best_model.predict(user, item_sequence, item)

tensor([0.5831], grad_fn=<DivBackward0>)

### Persist id mapping

In [29]:
if args.log_to_mlflow:
    # Persist id_mapping so that at inference we can predict based on item_ids (string) instead of item_index
    run_id = trainer.logger.run_id
    mlf_client = trainer.logger.experiment
    mlf_client.log_artifact(run_id, idm_fp)

### Wrap inference function and register best checkpoint as MLflow model

In [30]:
inferrer = SequenceRatingPredictionInferenceWrapper(best_model)

In [31]:
sample_input = {
    "user_ids": [idm.get_user_id(0)],
    "item_sequences": [[idm.get_item_id(0), idm.get_item_id(1)]],
    "item_ids": [idm.get_item_id(0)],
}
sample_output = inferrer.infer([0], [[0, 1]], [0])
sample_output

array([0.9204059], dtype=float32)

In [32]:
if args.log_to_mlflow:
    run_id = trainer.logger.run_id
    sample_output_np = sample_output
    signature = infer_signature(sample_input, sample_output_np)
    idm_filename = idm_fp.split("/")[-1]
    with mlflow.start_run(run_id=run_id):
        mlflow.pyfunc.log_model(
            python_model=inferrer,
            artifact_path="inferrer",
            # We log the id_mapping to the predict function so that it can accept item_id and automatically convert ot item_indice for PyTorch model to use
            artifacts={"idm": mlflow.get_artifact_uri(idm_filename)},
            signature=signature,
            input_example=sample_input,
            registered_model_name=args.mlf_model_name,
        )

2025/03/08 21:43:14 INFO mlflow.pyfunc: Validating input example against model signature


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Successfully registered model 'two_tower_sequence'.
2025/03/08 21:43:17 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: two_tower_sequence, version 1


🏃 View run 003-two-tower-sequence-modeling at: http://localhost:5002/#/experiments/3/runs/e71802367e2b45d0889b384c6243d00e
🧪 View experiment at: http://localhost:5002/#/experiments/3


Created version '1' of model 'two_tower_sequence'.


# Set the newly trained model as champion

In [33]:
if args.log_to_mlflow:
    # Get current champion
    deploy_alias = "champion"
    curr_model_run_id = None

    min_roc_auc = args.min_roc_auc

    try:
        curr_champion_model = mlf_client.get_model_version_by_alias(
            args.mlf_model_name, deploy_alias
        )
        curr_model_run_id = curr_champion_model.run_id
    except MlflowException as e:
        if "not found" in str(e).lower():
            logger.info(
                f"There is no {deploy_alias} alias for model {args.mlf_model_name}"
            )

    # Compare new vs curr models
    new_mlf_run = trainer.logger.experiment.get_run(trainer.logger.run_id)
    new_metrics = new_mlf_run.data.metrics
    roc_auc = new_metrics["roc_auc"]
    if curr_model_run_id:
        curr_model_run_info = mlf_client.get_run(curr_model_run_id)
        curr_metrics = curr_model_run_info.data.metrics
        if (curr_roc_auc := curr_metrics["roc_auc"]) > min_roc_auc:
            logger.info(
                f"Current {deploy_alias} model has {curr_roc_auc:,.4f} ROC-AUC. Setting it to the deploy baseline..."
            )
            min_roc_auc = curr_roc_auc

        top_metrics = ["roc_auc"]
        vizer = ModelMetricsComparisonVisualizer(curr_metrics, new_metrics, top_metrics)
        print(f"Comparing metrics between new run and current champion:")
        display(vizer.compare_metrics_df())
        vizer.create_metrics_comparison_plot(n_cols=5)
        vizer.plot_diff()

    # Register new champion
    if roc_auc < min_roc_auc:
        logger.info(
            f"Current run has ROC-AUC = {roc_auc:,.4f}, smaller than {min_roc_auc:,.4f}. Skip aliasing this model as the new {deploy_alias}.."
        )
    else:
        logger.info(f"Aliasing the new model as champion...")
        # Get the model version for current run by assuming it's the most recent registered version
        model_version = (
            mlf_client.get_registered_model(args.mlf_model_name)
            .latest_versions[0]
            .version
        )

        mlf_client.set_registered_model_alias(
            name=args.mlf_model_name, alias="champion", version=model_version
        )

        mlf_client.set_model_version_tag(
            name=args.mlf_model_name,
            version=model_version,
            key="author",
            value=args.author,
        )

[32m2025-03-08 21:43:17.160[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m15[0m - [1mThere is no champion alias for model two_tower_sequence[0m
[32m2025-03-08 21:43:17.165[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m45[0m - [1mAliasing the new model as champion...[0m


# Clean up

In [34]:
all_params = [args]

if args.log_to_mlflow:
    with mlflow.start_run(run_id=run_id):
        for params in all_params:
            params_dict = params.model_dump()
            params_ = dict()
            for k, v in params_dict.items():
                params_[f"{params.__repr_name__()}.{k}"] = v
            mlflow.log_params(params_)

🏃 View run 003-two-tower-sequence-modeling at: http://localhost:5002/#/experiments/3/runs/e71802367e2b45d0889b384c6243d00e
🧪 View experiment at: http://localhost:5002/#/experiments/3
