# Ranker that can takes into accound different features

# Set up

In [1]:
%load_ext autoreload
%autoreload 2
%load_ext tensorboard

In [2]:
import os
import sys
from typing import List

import dill
import lightning as L
import numpy as np
import pandas as pd
import torch
from tqdm.auto import tqdm
from dotenv import load_dotenv
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.loggers import MLFlowLogger
from loguru import logger
from mlflow.models.signature import infer_signature
from pydantic import BaseModel
from torch.utils.data import DataLoader
from qdrant_client import QdrantClient

import mlflow

load_dotenv()

sys.path.insert(0, "..")

from src.data_prep_utils import chunk_transform
from src.dataset import UserItemBinaryDFDataset
from src.id_mapper import IDMapper
from src.ranker.inference import RankerInferenceWrapper
from src.ranker.model import Ranker
from src.ranker.trainer import LitRanker
from src.ann import AnnIndex
from src.viz import blueq_colors

# Controller

In [3]:
max_epochs = 100

In [46]:
class Args(BaseModel):
    testing: bool = False
    log_to_mlflow: bool = True
    experiment_name: str = "RecSys MVP - Ranker"
    run_name: str = "017-item-popularity"
    notebook_persist_dp: str = None
    random_seed: int = 41
    device: str = None

    item_metadata_pipeline_fp: str = "../data/item_metadata_pipeline.dill"
    qdrant_url: str = None
    qdrant_collection_name: str = "item_desc_sbert"

    max_epochs: int = max_epochs
    batch_size: int = 128
    tfm_chunk_size: int = 10000
    neg_to_pos_ratio: int = 3

    user_col: str = "user_id"
    item_col: str = "parent_asin"
    rating_col: str = "rating"
    timestamp_col: str = "timestamp"
    item_feature_cols: List[str] = [
        "main_category",
        "categories",
        "price",
        "parent_asin_rating_cnt_365d",
        "parent_asin_rating_avg_prev_rating_365d",
        "parent_asin_rating_cnt_90d",
        "parent_asin_rating_avg_prev_rating_90d",
        "parent_asin_rating_cnt_30d",
        "parent_asin_rating_avg_prev_rating_30d",
        "parent_asin_rating_cnt_7d",
        "parent_asin_rating_avg_prev_rating_7d",
    ]

    top_K: int = 100
    top_k: int = 10

    embedding_dim: int = 128
    dropout: float = 0.3
    early_stopping_patience: int = 5
    learning_rate: float = 0.0003
    l2_reg: float = 1e-4

    mlf_item2vec_model_name: str = "item2vec"
    mlf_model_name: str = "ranker"
    min_roc_auc: float = 0.7

    best_checkpoint_path: str = None

    def init(self):
        self.notebook_persist_dp = os.path.abspath(f"data/{self.run_name}")
        os.makedirs(self.notebook_persist_dp, exist_ok=True)

        if not (qdrant_host := os.getenv("QDRANT_HOST")):
            raise Exception(f"Environment variable QDRANT_HOST is not set.")

        qdrant_port = os.getenv("QDRANT_PORT")
        self.qdrant_url = f"{qdrant_host}:{qdrant_port}"
        
        if not (mlflow_uri := os.environ.get("MLFLOW_TRACKING_URI")):
            logger.warning(
                f"Environment variable MLFLOW_TRACKING_URI is not set. Setting self.log_to_mlflow to false."
            )
            self.log_to_mlflow = False

        if self.log_to_mlflow:
            logger.info(
                f"Setting up MLflow experiment {self.experiment_name} - run {self.run_name}..."
            )
            self._mlf_logger = MLFlowLogger(
                experiment_name=self.experiment_name,
                run_name=self.run_name,
                tracking_uri=mlflow_uri,
                log_model=True,
            )

        return self


args = Args().init()

print(args.model_dump_json(indent=2))

[32m2024-10-27 23:18:09.018[0m | [1mINFO    [0m | [36m__main__[0m:[36minit[0m:[36m69[0m - [1mSetting up MLflow experiment RecSys MVP - Ranker - run 017-item-popularity...[0m


{
  "testing": false,
  "log_to_mlflow": true,
  "experiment_name": "RecSys MVP - Ranker",
  "run_name": "017-item-popularity",
  "notebook_persist_dp": "/Users/dvq/frostmourne/recsys-mvp/notebooks/data/017-item-popularity",
  "random_seed": 41,
  "device": null,
  "item_metadata_pipeline_fp": "../data/item_metadata_pipeline.dill",
  "qdrant_url": "localhost:6333",
  "qdrant_collection_name": "item_desc_sbert",
  "max_epochs": 100,
  "batch_size": 128,
  "tfm_chunk_size": 10000,
  "neg_to_pos_ratio": 3,
  "user_col": "user_id",
  "item_col": "parent_asin",
  "rating_col": "rating",
  "timestamp_col": "timestamp",
  "item_feature_cols": [
    "main_category",
    "categories",
    "price",
    "parent_asin_rating_cnt_365d",
    "parent_asin_rating_avg_prev_rating_365d",
    "parent_asin_rating_cnt_90d",
    "parent_asin_rating_avg_prev_rating_90d",
    "parent_asin_rating_cnt_30d",
    "parent_asin_rating_avg_prev_rating_30d",
    "parent_asin_rating_cnt_7d",
    "parent_asin_rating_avg

# Implement

In [5]:
def init_model(
    n_users, n_items, embedding_dim, item_feature_size, dropout, item_embedding=None
):
    model = Ranker(
        n_users,
        n_items,
        embedding_dim,
        item_feature_size=item_feature_size,
        dropout=dropout,
        item_embedding=item_embedding,
    )
    return model

## Load pretrained Item2Vec embeddings

In [6]:
mlf_client = mlflow.MlflowClient()
model = mlflow.pyfunc.load_model(
    model_uri=f"models:/{args.mlf_item2vec_model_name}@champion"
)
skipgram_model = model.unwrap_python_model().model
embedding_0 = skipgram_model.embeddings(torch.tensor(0))
embedding_dim = embedding_0.size()[0]
id_mapping = model.unwrap_python_model().id_mapping
pretrained_item_embedding = skipgram_model.embeddings

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

In [7]:
assert (
    pretrained_item_embedding.embedding_dim == args.embedding_dim
), "Mismatch pretrained item_embedding dimension"

## Load vectorized item features

In [8]:
with open(args.item_metadata_pipeline_fp, "rb") as f:
    item_metadata_pipeline = dill.load(f)

## Load ANN Index

In [9]:
ann_index = AnnIndex(args.qdrant_url, args.qdrant_collection_name)

In [10]:
vector = ann_index.get_vector_by_ids([0])[0]
embedding_dim = vector.shape[0]

  0%|          | 0/1 [00:00<?, ?it/s]

In [11]:
sbert_embedding_dim = vector.shape[0]
neighbors = ann_index.get_neighbors_by_ids([0])
neighbors

  0%|          | 0/1 [00:00<?, ?it/s]

[ScoredPoint(id=0, version=0, score=1.0, payload={'parent_asin': '0375869026', 'title': 'Wonder'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=1916, version=59, score=0.9272537, payload={'parent_asin': 'B005GFPZYK', 'title': 'American Sniper: The Autobiography of the Most Lethal Sniper in U.S. Military History'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=1968, version=61, score=0.9080587, payload={'parent_asin': 'B005ZBO4VA', 'title': 'Tekken Hybrid - Playstation 3'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=166, version=5, score=0.899632, payload={'parent_asin': 'B00005OARM', 'title': 'Golden Sun'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=3896, version=121, score=0.89472985, payload={'parent_asin': 'B07CD6F5PX', 'title': 'Dragon Quest Xi: Echoes of An Elusive Age - PlayStation 4'}, vector=None, shard_key=None, order_value=None)]

# Test implementation

In [12]:
embedding_dim = 8
batch_size = 2

# Mock data
user_indices = [0, 0, 1, 2, 2]
item_indices = [0, 1, 2, 3, 4]
timestamps = [0, 1, 2, 3, 4]
ratings = [0, 4, 5, 3, 0]
item_sequences = [
    [-1, -1, 2, 3],
    [-1, -1, 2, 3],
    [-1, -1, 1, 3],
    [-1, -1, 2, 1],
    [-1, -1, 2, 1],
]
main_category = [
    "All Electronics",
    "Video Games",
    "All Electronics",
    "Video Games",
    "Unknown",
]
categories = [[], ["Headsets"], ["Video Games"], [], ["blah blah"]]
title = ["World of Warcraft", "DotA 2", "Diablo IV", "Football Manager 2024", "Unknown"]
description = [[], [], ["Video games blah blah"], [], ["blah blah"]]
price = ["from 14.99", "14.99", "price: 9.99", "20 dollars", "None"]
parent_asin_rating_cnt_365d = [0,1,2,3,4]
parent_asin_rating_avg_prev_rating_365d = [4.0,3.5,4.5,5.0,2.0]
parent_asin_rating_cnt_90d = [0,1,2,3,4]
parent_asin_rating_avg_prev_rating_90d = [4.0,3.5,4.5,5.0,2.0]
parent_asin_rating_cnt_30d = [0,1,2,3,4]
parent_asin_rating_avg_prev_rating_30d = [4.0,3.5,4.5,5.0,2.0]
parent_asin_rating_cnt_7d = [0,1,2,3,4]
parent_asin_rating_avg_prev_rating_7d = [4.0,3.5,4.5,5.0,2.0]

train_df = pd.DataFrame(
    {
        "user_indice": user_indices,
        "item_indice": item_indices,
        args.timestamp_col: timestamps,
        args.rating_col: ratings,
        "item_sequence": item_sequences,
        "main_category": main_category,
        "title": title,
        "description": description,
        "categories": categories,
        "price": price,
        "parent_asin_rating_cnt_365d": parent_asin_rating_cnt_365d,
        "parent_asin_rating_avg_prev_rating_365d": parent_asin_rating_avg_prev_rating_365d,
        "parent_asin_rating_cnt_90d": parent_asin_rating_cnt_90d,
        "parent_asin_rating_avg_prev_rating_90d": parent_asin_rating_avg_prev_rating_90d,
        "parent_asin_rating_cnt_30d": parent_asin_rating_cnt_30d,
        "parent_asin_rating_avg_prev_rating_30d": parent_asin_rating_avg_prev_rating_30d,
        "parent_asin_rating_cnt_7d": parent_asin_rating_cnt_7d,
        "parent_asin_rating_avg_prev_rating_7d": parent_asin_rating_avg_prev_rating_7d,
    }
)
train_item_features = item_metadata_pipeline.transform(train_df).astype(np.float32)
sbert_vectors = ann_index.get_vector_by_ids(train_df['item_indice'].values.tolist()).astype(np.float32)
train_item_features = np.hstack([train_item_features, sbert_vectors])

n_users = len(set(user_indices))
n_items = len(set(item_indices))
item_feature_size = train_item_features.shape[1]

model = init_model(n_users, n_items, embedding_dim, item_feature_size, args.dropout)

# Example forward pass
model.eval()
users = torch.tensor(user_indices)
items = torch.tensor(item_indices)
item_sequences = torch.tensor(item_sequences)
item_features = torch.tensor(train_item_features)
predictions = model.predict(users, item_sequences, item_features, items)
print(predictions)

  0%|          | 0/1 [00:00<?, ?it/s]

tensor([[0.4374],
        [0.4554],
        [0.3982],
        [0.4192],
        [0.4361]], grad_fn=<SigmoidBackward0>)


In [13]:
rating_dataset = UserItemBinaryDFDataset(
    train_df,
    "user_indice",
    "item_indice",
    args.rating_col,
    args.timestamp_col,
    item_feature=train_item_features,
)

train_loader = DataLoader(rating_dataset, batch_size=batch_size, shuffle=False)

In [14]:
for batch_input in train_loader:
    print(batch_input)

{'user': tensor([0, 0]), 'item': tensor([0, 1]), 'rating': tensor([0., 1.]), 'item_sequence': tensor([[-1, -1,  2,  3],
        [-1, -1,  2,  3]]), 'item_feature': tensor([[-1.4698e-02,  5.6424e+00, -1.4698e-02,  ...,  2.9136e-02,
         -3.0617e-02,  1.4856e-03],
        [-1.4698e-02, -1.7723e-01, -1.4698e-02,  ..., -2.6627e-03,
         -3.4459e-02,  1.1402e-02]])}
{'user': tensor([1, 2]), 'item': tensor([2, 3]), 'rating': tensor([1., 1.]), 'item_sequence': tensor([[-1, -1,  1,  3],
        [-1, -1,  2,  1]]), 'item_feature': tensor([[-1.4698e-02,  5.6424e+00, -1.4698e-02,  ...,  2.4071e-03,
         -4.1083e-02,  4.6736e-04],
        [-1.4698e-02, -1.7723e-01, -1.4698e-02,  ..., -5.8320e-03,
         -6.7804e-02,  4.7192e-03]])}
{'user': tensor([2]), 'item': tensor([4]), 'rating': tensor([0.]), 'item_sequence': tensor([[-1, -1,  2,  1]]), 'item_feature': tensor([[-1.4698e-02, -1.7723e-01, -1.4698e-02, -2.5463e-02, -1.4698e-02,
         -1.4698e-02, -2.0788e-02, -8.2101e-02, -2.872

In [15]:
# Prepare all item features for recommendation
all_items_df = train_df.drop_duplicates(subset=["item_indice"])
all_items_indices = all_items_df["item_indice"].values
all_items_features = item_metadata_pipeline.transform(all_items_df).astype(np.float32)
all_sbert_vectors = ann_index.get_vector_by_ids(all_items_indices.tolist()).astype(np.float32)
all_items_features = np.hstack([all_items_features, all_sbert_vectors])

lit_model = LitRanker(
    model,
    log_dir=args.notebook_persist_dp,
    all_items_indices=all_items_indices,
    all_items_features=all_items_features
)

# train model
trainer = L.Trainer(
    default_root_dir=f"{args.notebook_persist_dp}/test",
    max_epochs=2,
    accelerator=args.device if args.device else "auto",
)
trainer.fit(
    model=lit_model, train_dataloaders=train_loader, val_dataloaders=train_loader
)

  0%|          | 0/1 [00:00<?, ?it/s]

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name  | Type   | Params | Mode
----------------------------------------
0 | model | Ranker | 8.2 K  | eval
----------------------------------------
8.2 K     Trainable params
0         Non-trainable params
8.2 K     Total params
0.033     Total estimated model params size (MB)
0         Modules in train mode
14        Modules in eval mode


Sanity Checking: |                                                                                            …

/Users/dvq/frostmourne/recsys-mvp/.venv/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=10` in the `DataLoader` to improve performance.
/Users/dvq/frostmourne/recsys-mvp/.venv/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=10` in the `DataLoader` to improve performance.
/Users/dvq/frostmourne/recsys-mvp/.venv/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (3) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |                                                                                                   …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=2` reached.
[32m2024-10-27 22:42:39.368[0m | [1mINFO    [0m | [36msrc.ranker.trainer[0m:[36mon_fit_end[0m:[36m134[0m - [1mLogging classification metrics...[0m
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [16]:
# After fitting
model.eval()
predictions = model.predict(users, item_sequences, item_features, items)
print(predictions)

tensor([[0.4297],
        [0.4749],
        [0.4110],
        [0.4287],
        [0.4394]], grad_fn=<SigmoidBackward0>)


In [17]:
# Get the last row of each item as input for recommendations (containing the most updated item_sequence)
to_rec_df = train_df.sort_values(args.timestamp_col, ascending=False).drop_duplicates(
    subset=["user_indice"]
)
recommendations = model.recommend(
    torch.tensor(to_rec_df["user_indice"].values.tolist()),
    torch.tensor(to_rec_df["item_sequence"].values.tolist()),
    torch.tensor(lit_model.all_items_features),
    torch.tensor(lit_model.all_items_indices),
    k=2,
    batch_size=4,
)
recommendations

Generating recommendations:   0%|          | 0/1 [00:00<?, ?it/s]

{'user_indice': [2, 2, 1, 1, 0, 0],
 'recommendation': [2, 4, 0, 2, 1, 4],
 'score': [0.44280707836151123,
  0.43935778737068176,
  0.4133801758289337,
  0.41103968024253845,
  0.47488585114479065,
  0.474662721157074]}

# Prep data

In [18]:
train_df = pd.read_parquet("../data/train_features_neg_df.parquet")
val_df = pd.read_parquet("../data/val_features_neg_df.parquet")
idm_fp = "../data/idm.json"
idm = IDMapper().load(idm_fp)

assert (
    train_df[args.user_col].map(lambda s: idm.get_user_index(s))
    != train_df["user_indice"]
).sum() == 0, "Mismatch IDM"
assert (
    val_df[args.user_col].map(lambda s: idm.get_user_index(s)) != val_df["user_indice"]
).sum() == 0, "Mismatch IDM"

In [19]:
train_df

Unnamed: 0,user_id,parent_asin,rating,timestamp,parent_asin_rating_cnt_365d,parent_asin_rating_avg_prev_rating_365d,parent_asin_rating_cnt_90d,parent_asin_rating_avg_prev_rating_90d,parent_asin_rating_cnt_30d,parent_asin_rating_avg_prev_rating_30d,...,item_indice,main_category,title,description,categories,price,user_rating_cnt_90d,user_rating_avg_prev_rating_90d,user_rating_list_10_recent_asin,item_sequence
0,AEKSUPM7CH53J3G5PA3JLWLJXUMQ,B00QXJFDZO,0.0,2017-10-30 14:23:22.389,6.0,3.666667,0.0,,0.0,,...,2919,Video Games,Godzilla - PlayStation 4,"[Godzilla, the King of the Monsters, first app...","[Video Games, PlayStation 4, Games]",,2,5.000000,"B005FVBYV8,B003FMTZSI,B01MS6WG9S,B073W2T5F6","[-1, -1, -1, -1, -1, -1, 1912, 1470, 3498, 3723]"
1,AHSNMFN6DUFTNEZAXBVPIYMXWIFQ,B075MYT126,0.0,2017-11-27 22:01:33.258,,,,,,,...,3777,Video Games,Nintendo Switch Pro Controller - Xenoblade Chr...,[Kick your game sessions up a notch with the N...,"[Video Games, Nintendo Switch, Accessories, Co...",94.98,2,4.000000,"B00CJ9OTNE,B0118YZG0A,B008M502H6,B003Y70W4U,B0...","[2391, 3100, 2176, 1588, 3161, 2133, 2906, 166..."
2,AFE47G5MX35LSHZHZXRYEJFMYPUA,B007VYW5K6,0.0,2017-03-23 21:41:18.000,12.0,4.166667,2.0,4.000000,0.0,,...,2086,Video Games,Crysis 3 [Instant Access],[The award-winning developer crytek is back wi...,"[Video Games, PC]",,1,,"B07YBX8RNF,B0166QDJDQ,B01CHU4IY4,B00Z9LUDX4,B0...","[4278, 3183, 3288, 3038, 4508, 3391, 3403, 368..."
3,AFJDWGBE3MGULXTO3FUZ5YB6FKDA,B07L5FKGQH,0.0,2017-01-18 15:50:12.000,,,,,,,...,4048,Video Games,Far Cry New Dawn - Deluxe Edition - Xbox [Digi...,"[Dive into a transformed, post-apocalyptic Hop...","[Video Games, Xbox One, Games]",49.88,25,4.416667,"B00I6E6SH6,B00O65I2VY,B005GISQQG,B00008KTNW,B0...","[2632, 2859, 1920, 253, 1428, 1053, 584, 732, ..."
4,AHFDYGJR3SM2D463ZWKGHJPNBKDA,B002BSA2LQ,0.0,2014-01-29 22:50:20.000,2.0,4.500000,1.0,5.000000,0.0,,...,1215,Video Games,Forza Motorsport 3 - Xbox 360,"[Product Description, Whether it’s an exotic s...","[Video Games, Legacy Systems, Xbox Systems, Xb...",31.49,4,5.000000,"B002I0K956,B008CZN458,B0050SXVK8","[-1, -1, -1, -1, -1, -1, -1, 1328, 2146, 1843]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
657187,AE5TQ7DBEX2L5T665M6ZDPGYZ32Q,B01LDUYTYS,0.0,2013-10-05 20:20:52.000,,,,,,,...,3442,Video Games,Poochy & Yoshi's Woolly World + Yarn Poochy am...,[Discover a handicraft world bursting at the s...,"[Video Games, Legacy Systems, Nintendo Systems...",249.99,1,,B07X1HF3V6,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, 4237]"
657188,AFDG3CXM4DP7X436YNOKTJHVKJQA,B087NNPYP3,5.0,2018-07-10 21:22:10.594,127.0,4.708661,20.0,4.350000,9.0,4.0,...,4342,Video Games,The Legend of Zelda: Breath of the Wild Master...,"[Woken from a 100-year slumber, Link must expl...","[Video Games, Nintendo Switch, Consoles]",,3,5.000000,"B002I0H79C,B00503E9FY,B00KVOVBGM,B00SHXKC8M,B0...","[1292, 1807, 2734, 2940, 3402, 2759, 2702, 104..."
657189,AFOUC3S3RH7AXMPZBZHLO4WMLLVA,B004AM65C6,0.0,2018-12-16 13:39:37.174,1.0,5.000000,0.0,,0.0,,...,1651,Video Games,Minute to Win It (Kinect) - Xbox 360,"[Product Description, Tackle unique challenges...","[Video Games, Legacy Systems, Xbox Systems, Xb...",12.48,2,5.000000,"B002BSA388,B00PIEI1DG,B08MBHYJP4,B071GPJVTQ,B0...","[1216, 2898, 4397, 3643, 3642, 3527, 3423, 367..."
657190,AEPOGF2QMAXO4W3TYP27DCQRITGA,B07X1HF3V6,0.0,2013-05-30 22:53:17.000,24.0,4.875000,12.0,4.833333,1.0,5.0,...,4237,Video Games,WB Games Mortal Kombat: Komplete Edition - Pla...,[Note:The extra downloadable content is a bonu...,"[Video Games, Legacy Systems, PlayStation Syst...",34.43,4,3.666667,"B0013OL0BK,B002D2Y3IS,B0044R8X9U,B07VLCRZ21,B0...","[-1, -1, -1, -1, -1, 652, 1240, 1629, 4207, 1707]"


In [20]:
user_indices = train_df["user_indice"].unique()
item_indices = train_df["item_indice"].unique()
all_sbert_vectors = ann_index.get_vector_by_ids(item_indices.tolist(), chunk_size=1000).astype(np.float32)

train_item_features = chunk_transform(
    train_df, item_metadata_pipeline, chunk_size=args.tfm_chunk_size
)
train_item_features = train_item_features.astype(np.float32)
train_sbert_vectors = all_sbert_vectors[train_df['item_indice'].values]
train_item_features = np.hstack([train_item_features, train_sbert_vectors])

val_item_features = chunk_transform(
    val_df, item_metadata_pipeline, chunk_size=args.tfm_chunk_size
)
val_item_features = val_item_features.astype(np.float32)
val_sbert_vectors = all_sbert_vectors[val_df['item_indice'].values]
val_item_features = np.hstack([val_item_features, val_sbert_vectors])

logger.info(f"{len(user_indices)=:,.0f}, {len(item_indices)=:,.0f}")

  0%|          | 0/5 [00:00<?, ?it/s]

Transforming chunks:   0%|          | 0/66 [00:00<?, ?it/s]

Transforming chunks:   0%|          | 0/1 [00:00<?, ?it/s]

[32m2024-10-27 22:42:50.271[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m19[0m - [1mlen(user_indices)=19,578, len(item_indices)=4,630[0m


# Train

In [21]:
rating_dataset = UserItemBinaryDFDataset(
    train_df,
    "user_indice",
    "item_indice",
    args.rating_col,
    args.timestamp_col,
    item_feature=train_item_features,
)
val_rating_dataset = UserItemBinaryDFDataset(
    val_df,
    "user_indice",
    "item_indice",
    args.rating_col,
    args.timestamp_col,
    item_feature=val_item_features,
)

train_loader = DataLoader(
    rating_dataset, batch_size=args.batch_size, shuffle=True, drop_last=True
)
val_loader = DataLoader(
    val_rating_dataset, batch_size=args.batch_size, shuffle=False, drop_last=False
)

In [22]:
n_items = len(item_indices)
n_users = len(user_indices)

model = init_model(
    n_users, n_items, args.embedding_dim, item_feature_size, args.dropout
)
model

Ranker(
  (item_embedding): Embedding(4631, 128, padding_idx=4630)
  (user_embedding): Embedding(19578, 128)
  (gru): GRU(128, 128, batch_first=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.3, inplace=False)
  (item_feature_tower): Sequential(
    (0): Linear(in_features=929, out_features=128, bias=True)
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.3, inplace=False)
  )
  (fc_rating): Sequential(
    (0): Linear(in_features=512, out_features=128, bias=True)
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.3, inplace=False)
    (4): Linear(in_features=128, out_features=1, bias=True)
    (5): Sigmoid()
  )
)

#### Predict before train

In [23]:
val_df = val_rating_dataset.df
val_df.sample(10)

Unnamed: 0,user_id,parent_asin,rating,timestamp,parent_asin_rating_cnt_365d,parent_asin_rating_avg_prev_rating_365d,parent_asin_rating_cnt_90d,parent_asin_rating_avg_prev_rating_90d,parent_asin_rating_cnt_30d,parent_asin_rating_avg_prev_rating_30d,...,item_indice,main_category,title,description,categories,price,user_rating_cnt_90d,user_rating_avg_prev_rating_90d,user_rating_list_10_recent_asin,item_sequence
3503,AF3GQJW534GEP3I4WKRGKXHBKE3Q,B00DJRLDMU,0.0,2021-12-27 03:58:04.746,0.0,,0.0,,0.0,,...,2448,Video Games,Tom Clancy’s The Division Underground - Xbox O...,"[In Expansion I, Underground, the factions of ...","[Video Games, Xbox One, Downloadable Content]",,1,,"B00DDILSBG,B01GY35HKE,B072C3VM5F,B01M6C7YE8,B0...","[-1, -1, -1, -1, -1, 2441, 3373, 3691, 3479, 4..."
777,AH2NODYUKDSKO36GWENCKJCA26CQ,B0002XL3BA,0.0,2022-04-05 18:58:50.338,1.0,5.0,1.0,5.0,0.0,,...,342,Video Games,God of War - PlayStation 2,[Unleash the power of the Gods and embark on a...,"[Video Games, Legacy Systems, PlayStation Syst...",17.98,1,,"B003DOSRSW,B002JTX78M,B01GY35YXO,B01GKFPFZS,B0...","[-1, -1, -1, -1, 1459, 1346, 3378, 3345, 2581,..."
2429,AGIUOPGPDP4YXZFQPSUPH4WDSP2A,B08SBWDBDB,1.0,2021-10-29 18:03:04.248,1.0,5.0,0.0,,0.0,,...,4431,Video Games,eXtremeRate Green Purple Chameleon Replacement...,[],"[Video Games, Nintendo Switch, Accessories]",13.99,1,,"B07L3D7C21,B08D3XL1KF,B0719W8QL3,B07CV6LH3V,B0...","[-1, -1, -1, 4043, 4368, 3632, 3908, 3729, 417..."
3558,AHEAWXZ7P75OLPT25RKZYPW4HKSQ,B001EYUSJ4,0.0,2021-12-07 16:23:39.578,1.0,4.0,0.0,,0.0,,...,955,Video Games,Final Fantasy XIII - Playstation 3,"[Product Description, Final Fantasy XIII is th...","[Video Games, Legacy Systems, PlayStation Syst...",17.97,1,,"B001EYUS7G,B00BQVXUOA,B00KIFM28A,B001SGZL2W,B0...","[949, 2356, 2705, 1149, 2721, 2448, 4284, 3357..."
3013,AGULGCFB6EZHH6LDKKEYR4QQ477Q,B08XD54VJY,1.0,2021-08-16 01:54:55.683,12.0,4.083333,12.0,4.083333,11.0,4.181818,...,4440,Video Games,The Legend of Zelda: Skyward Sword HD - Ninten...,"[Solve puzzles, explore dungeons, and soar the...","[Video Games, Nintendo Switch, Games]",48.49,1,,"B002BSH3K4,B0BWD2QV4N,B001ELJF8M,B07J2KVXY3,B0...","[-1, -1, -1, -1, -1, 1224, 4586, 796, 4016, 531]"
3774,AF5SJBTOYDT5LR7HPQ6OZ77CO3NA,B008HPAXZ2,0.0,2022-03-28 21:57:29.641,0.0,,0.0,,0.0,,...,2157,Video Games,HORI Nintendo 3DS XL Screen Protective Filter,[Officially licensed by Nintendo. This is the ...,"[Video Games, Legacy Systems, Nintendo Systems...",,1,,"B00000K1VA,B00001XDUB,B00004WMVL,B00000K1VE,B0...","[27, 37, 113, 28, 231, 427, 551, 2321, 4013, 4..."
487,AHCFZPQLIV7236UBXSGU3I5SMXBQ,B00QO4NAOO,1.0,2021-12-07 21:14:02.259,2.0,5.0,0.0,,0.0,,...,2916,Video Games,MLB 15: The Show (10th Anniversary Edition),[The 10th edition of “The Show” and the best w...,"[Video Games, PlayStation 4]",41.99,3,4.5,"B072V478NR,B076GXJNDZ,B0C5HQ6L7F,B072C3VM5F,B0...","[3705, 3798, 4612, 3691, 3087, 3479, 3723, 391..."
3834,AGEWEMFAC5HSC2DQF4PRJU3UISVA,B00K308KF4,0.0,2022-01-16 20:31:55.061,4.0,3.5,0.0,,0.0,,...,2694,Video Games,Call of Duty Advanced Warfare - Day Zero Edition,[Order and get access to the Call of Duty: Adv...,"[Video Games, Xbox One, Games]",29.47,1,,"B001EYUU2O,B00K5HTPR2,B07K3KHFSY,B00GN67PJ4,B0...","[-1, 977, 2697, 4028, 2578, 2391, 3007, 3077, ..."
2401,AGJFKYY6UEHVJNRG2IBKLWYHOMUQ,B0080CAO9C,0.0,2022-06-06 03:26:47.178,0.0,,0.0,,0.0,,...,2101,Video Games,Pro Evolution Soccer 2013 - Xbox 360,"[Product Description, This fall Pro Evolution ...","[Video Games, Legacy Systems, Xbox Systems, Xb...",20.25,1,,"B00267S2A0,B001G7PRLS,B003O6E620,B078FBVJMB,B0...","[-1, -1, -1, -1, -1, 1178, 1070, 1511, 3829, 3..."
1059,AEEFFIJ2GOSCLNBGCIDOF7HKXB7Q,B000E36ONK,0.0,2022-06-22 10:20:48.475,0.0,,0.0,,0.0,,...,434,Video Games,Harvest Moon Magical Melody - Gamecube,[Harvest Moon: Magical Melody is a unique new ...,"[Video Games, Legacy Systems, Nintendo Systems...",59.95,1,,"B00HM3QQVK,B0072A4GQK,B0144K8KQW,B0BN2FNKLM,B0...","[2609, 2033, 3138, 4569, 3527, 3687, 3729, 366..."


In [24]:
user_id = val_df.sample(1)[args.user_col].values[0]
# user_id = "AH4AOFTTDPHPAFAAVFMAF25H2LIQ"
test_df = val_df.loc[lambda df: df[args.user_col].eq(user_id)]
with pd.option_context("display.max_colwidth", None):
    display(test_df)

Unnamed: 0,user_id,parent_asin,rating,timestamp,parent_asin_rating_cnt_365d,parent_asin_rating_avg_prev_rating_365d,parent_asin_rating_cnt_90d,parent_asin_rating_avg_prev_rating_90d,parent_asin_rating_cnt_30d,parent_asin_rating_avg_prev_rating_30d,...,item_indice,main_category,title,description,categories,price,user_rating_cnt_90d,user_rating_avg_prev_rating_90d,user_rating_list_10_recent_asin,item_sequence
1883,AFNGUKYWUWCH6HAANP4DCND6FETA,B001EYUQ8M,0.0,2022-03-09 13:29:26.011,1.0,5.0,1.0,5.0,0.0,,...,902,Video Games,BioShock 2 Special Edition -Xbox 360,"[Product Description, The BioShock 2 Special Edition will celebrate the game’s unique music and art with contents inspired by the time period and the fantastic undersea world of Rapture., Amazon.com, Follow-up to, BioShock, , 2K Games' critically acclaimed and commercially successful 2007 release,, BioShock 2, is a first-person shooter set in the fictional underwater city of Rapture. As in the original game,, BioShock 2, features a blend of fast-paced action, exploration and puzzle-solving as players follow varying paths through the overarching storyline based on the decisions that they are forced to make at points in the game. In addition to a further fleshing out of the franchise's popular storyline, players can look forward to new characters, game mechanics, weapons, locations and a series first, multiplayer game options. Finally, bonus items included with the, BioShock 2, Special Edition (see contents list below), provide the most complete, BioShock 2, experience available and are destined to become collectors items. .caption { font-family: Verdana, Helvetica neue, Arial, serif; font-size: 10px; font-weight: bold; font-style: italic; } ul.indent { list-style: inside disc; text-indent: -15px; } table.callout { font-family: verdana; font-size: 11px; line-height: 1. 3em; } td.vgoverview { height: 125px; background: #9DC4D8 url(https://images-na.ssl-images-amazon.com/images/G/01/electronics/detail-page/callout-bg.png) repeat-x; border-left: 1px solid #999999; border-right: 1px solid #999999; padding-left: 20px; padding-right: 20px; padding-bottom: 10px; width: 250px; font-family: verdana; font-size: 12px; }, The new power in Rapture., View larger, ., Duel wield plasmids & weapons., View larger, ., New choices as Mr. B., View larger, ., Franchise first multiplayer options., View larger, ., Dig into the crazed history of Rapture., View larger, ., The Story, Set approximately 10 years after the events of the original, BioShock, , the halls of Rapture once again echo with sins of the past. Along the Atlantic coastline, a monster — somehow familiar, yet still quite different from anything ever seen — has been snatching little girls and bringing them back to the undersea city. It is a Big Sister, new denizens of Rapture who were once one of the forgotten little girls known as Little Sisters, known to inhabit the city's dank halls. No longer a pawn used to harvest ADAM, the dangerously powerful gene-altering lifeblood of Rapture, from the bodies of others and in turn run the risk of being harvested herself, the Big Sister is now the fastest and most powerful thing in Rapture. You, on the other hand are the very first Big Daddy, in fact the prototype, that for some reason has reactivated. You are similar to the Big Daddies familiar from the original, BioShock, , but also very different in that you possess free will and no memory of the events of the past ten years. The question is, as you travel through the decrepit and beautiful fallen city beneath the waves, hunting for answers and the solution to your own survival, are you really the hunter, or the hunted?, Gameplay and Multiplayer, In, BioShock 2, players will take on the role of the original Big Daddy, not that of game one protagonist, Jack. As a Big Daddy you will have access to all the strengths and weapons of a standard Big Daddy, including the drill and rivet gun. More importantly you also possess free will and the ability to use plasmids and gene tonics — genetic modifications allowed for through ADAM, a stem cell harvested from conquered enemies, or sea slugs outside the Rapture air lock, and powered by the in-game injectable serum known as EVE, which can be found, captured or purchased. Plasmids and gene tonics provide a wide range of aggressive and passive abilities which can be upgraded and arranged for quick use. The ability to use plasmids and tonics gives you a decided edge over other Big Daddies and most other denizens of Rapture, excluding the powerful Big Sisters. In addition, due to their role as a Big Daddy, players will experience a new relation to the Little Sisters. Upon defeating standard Big Daddys you are given the familiar choice as to whether to harvest or adopt them. Harvesting gains you ADAM immediately, but could alter your path through the game, while adopting makes you responsible for Little Sisters, who then accompany you through Rapture, but also provide aid and warning in times of danger. Additional gameplay features include: new plasmids, weapons and the ability to combine these two., The game also features the anticipated multiplayer modes. Several of these are team-based, allowing up to 10 players. Within these players are provided with a rich prequel experience that expands the origins of the, BioShock, fiction, and allows you to play as one of several characters pulled from Rapture's history before the events of the first game., Key Features, The Big Sister - No longer just something to be harvested or not, the Big Sister is the most powerful resident in Rapture., The Big Sister, - No longer just something to be harvested or not, the Big Sister is the most powerful resident in Rapture., You Are the Big Daddy - Take control with the original prototype Big Daddy, and experience the power and raw strength of Rapture’s most feared denizens as you battle powerful new enemies., You Are the Big Daddy, - Take control with the original prototype Big Daddy, and experience the power and raw strength of Rapture’s most feared denizens as you battle powerful new enemies., New Plasmids - New plasmids such as ""Aero Dash"" allowing for bursts of speed over short distances, and ""Geyser Trap"" a stream of water used as a jump pad and electrical conductor, join the ample list of Plasmids from the original game., New Plasmids, - New plasmids such as ""Aero Dash"" allowing for bursts of speed over short distances, and ""Geyser Trap"" a stream of water used as a jump pad and electrical conductor, join the ample list of Plasmids from the original game., New Game Mechanics - BioShock 2 contains many new gameplay mechanics. Just a few of these are: the ability to wield plasmids and weapons simultaneously; flashback missions detailing how you became the Big Daddy; the ability to walk outside the airlocks of Rapture to discover new play areas, and many more., New Game Mechanics, -, BioShock 2, contains many new gameplay mechanics. Just a few of these are: the ability to wield plasmids and weapons simultaneously; flashback missions detailing how you became the Big Daddy; the ability to walk outside the airlocks of Rapture to discover new play areas, and many more., New Locations - Just a few of the locations and environments debuting in BioShock 2 are Fontaine Futuristics, headquarters of Fontaine's business empire and the Kashmir Restaurant., New Locations, - Just a few of the locations and environments debuting in, BioShock 2, are Fontaine Futuristics, headquarters of Fontaine's business empire and the Kashmir Restaurant., Evolution of the Genetically Enhanced Shooter - Innovative advances bring new depth and dimension to each encounter, allowing players to create exciting combinations to fit their style of gameplay., Evolution of the Genetically Enhanced Shooter, - Innovative advances bring new depth and dimension to each encounter, allowing players to create exciting combinations to fit their style of gameplay., Return to Rapture - Set approximately 10 years after the events of the original BioShock, the story continues with an epic, more intense journey through one of the most captivating and terrifying fictional worlds ever created., Return to Rapture, - Set approximately 10 years after the events of the original BioShock, the story continues with an epic, more intense journey through one of the most captivating and terrifying fictional worlds ever created., Genetically Enhanced Multiplayer - Earn experience points during gameplay to earn access to new weapons, plasmids and tonics that can be used to create hundreds of different combinations., Genetically Enhanced Multiplayer, - Earn experience points during gameplay to earn access to new weapons, plasmids and tonics that can be used to create hundreds of different combinations., Experience Rapture’s Civil War - Players will step into the shoes of Rapture's citizens and take direct part in the civil war that tore Rapture apart., Experience Rapture’s Civil War, - Players will step into the shoes of Rapture's citizens and take direct part in the civil war that tore Rapture apart., See Rapture Before the Fall - Experience Rapture before it was reclaimed by the ocean and engage in combat over iconic environments in locations such as Kashmir Restaurant and Mercury Suites, all of which have been reworked from the ground up for multiplayer., See Rapture Before the Fall, - Experience Rapture before it was reclaimed by the ocean and engage in combat over iconic environments in locations such as Kashmir Restaurant and Mercury Suites, all of which have been reworked from the ground up for multiplayer., The, BioShock 2, Special Edition for Xbox 360 Includes:, View larger, ., BioShock 2 for Xbox 360, BioShock 2, for Xbox 360, Vinyl 180g LP with BioShock orchestral score, Vinyl 180g LP with, BioShock, orchestral score, Audio CD with BioShock 2 orchestral score, Audio CD with, BioShock 2, orchestral score, Three vintage Rapture advertisement posters (rolled), Three vintage Rapture advertisement posters (rolled), BioShock 2 Art Book, 164 pages and hardcover, BioShock 2 Art Book, 164 pages and hardcover]","[Video Games, Legacy Systems, Xbox Systems, Xbox 360, Games]",184.91,1,,"B0B2RQXT92,B08Y5DN2FX,B087SHFL9B,B087NNPYP3,B01GH9MVWW,B07P6MD9B7,B094WQR3H3","[-1, -1, -1, 4523, 4445, 4344, 4342, 3341, 4124, 4453]"
2965,AFNGUKYWUWCH6HAANP4DCND6FETA,B001E27DLM,0.0,2022-03-09 13:29:26.011,4.0,5.0,0.0,,0.0,,...,714,,Chrono Trigger,"[Product Description, Error, Amazon.com, After 13 long years, the role playing game of the ages finally returns with Chrono Trigger for the Nintendo DS. This chapter begins when a newly developed teleportation device malfunctions, and young Crono must journey through time to rescue a mysterious girl from an intricate web of past and present perils. Enhanced with Nintendo DS's dual-screen presentation, stylus controls, and a host of great new features, this classic tale returns to a modern, portable platform., And so the Story Goes. . ., Through a chance encounter amid the festivities of Guardia's Millenial Fair in Leene Square, the young hero, Crono, meets an adventurous girl named Marle. The two decide to explore the fair together and soon find themselves at an exhibition of the Telepod -- the latest invention by Crono's long-time friend, Lucca., Marle, fearless and brimming with curiosity, volunteers to assist in a demo. However, an unanticipated malfunction sends her hurtling through a rift in the dimensions. Taking hold of the girls pendant just before she's whisked away,, Crono bravely follows in pursuit, but the world into which he emerges is one of four centuries ago. In Chrono Trigger prepare yourself to journey into the forgotten past, distant future, and even to the very End of Time., The Worlds of Past, Present and Future, In Chrono Trigger you'll journey back to Prehistory (65,000,000 B.C.) where humans and reptiles battle to wipe each other from existence. Antiquity (12,000 B.C.) is an age where the world is divided between people whose continent is buried in snow, and the magical kingdom of Zeal, a highly advanced civilization. The Middle Ages (600 A.D.) is an era of swords and sorcery, a dark time when the armies of Fiendlord rule over the land., The Present (1000 A.D.) is the time period in which Crono, Lucca and Marle live. It is a bright and peaceful age. However, in the Future (2300 A.D.), an era of despair has taken hold with rogue machines ruling the world. After the day of the apocalypse in 1999 A.D., the prosperous civilization of humanity crumbled and the remaining people struggle to stay alive. And, finally, there remains the End of Time -- a place with no era to call its own. This confluence of time's streams transcends spatiotemporal boundaries. It is here at the gravitational center for all temporal flotsam that Spekkio -- the Master of War -- serves as your guide on time's treacherous roads., Battle Systems and gameplay, This game utilizes a revised version of the groundbreaking Active Time Battle (ATB) System. Chrono Trigger features exhilarating combat in which the clock is constantly ticking. Characters must first wait as the ATB gauges charge, and then perform an action. This makes strategic timing a crucial element in your battle plan. In addition to standard attacks, each character has an array of special Tech skills and powerful combos known as Dual and Triple Techs. Cooperate with other characters to unleash over 50 unique and devastating moves!, Chrono Trigger utilizes great story-telling, interesting characters, action-packed gameplay, and the unique controls of the Nintendo DS to deliver a fun and well-rounded role-playing game on a portable platform.]","[Video Games, Legacy Systems, Nintendo Systems, Nintendo DS]",264.7,1,,"B0B2RQXT92,B08Y5DN2FX,B087SHFL9B,B087NNPYP3,B01GH9MVWW,B07P6MD9B7,B094WQR3H3","[-1, -1, -1, 4523, 4445, 4344, 4342, 3341, 4124, 4453]"
3420,AFNGUKYWUWCH6HAANP4DCND6FETA,B004WMM2NA,0.0,2022-03-09 13:29:26.011,1.0,5.0,1.0,5.0,1.0,5.0,...,1792,Video Games,Animal Crossing: City Folk (Nintendo Selects),"[Product Description, If life were an endless vacation, what would you do? Go fishing, collect shells, or watch fireworks with friends? Build a snowman, exchange presents with family, or decorate your house for the holidays? Take a trip to the city, go on a shopping spree, or visit friends from all over the globe? In Animal Crossing, life moves at a relaxed pace, but the world brims with endless possibilities. There's always something new to do! In the living, breathing world of Animal Crossing, days and seasons pass in real time, so there's always something to discover. Catch fireflies in the summer, go trick-or-treating on Halloween, or hunt for eggs on Bunny Day. If you're in the mood for something a little faster paced, take a trip into the all-new city and catch a show at the theater, check out the sales at Gracie's boutique, or change up your look at the salon! But if you don't show your face for too long, your neighbors will miss you! The perfect place to hang out with friends! The heart of Animal Crossing is building relationships with other players as well as the animals in your town. Live with up to three other people from your household and build the perfect town together! Design clothes and patterns, write letters, and post messages on the bulletin board for each other, or invite up to three friends to visit your town using Nintendo Wi-Fi Connection-with the new Wii Speak, it's like you're all in the same room! Get to know your neighbors! Befriend your animal neighbors by exchanging letters, gifts, and favors. Animals can also move from town to town, and when they do, they bring their memories of life in your friends' towns with them. And since animals are notoriously loose-lipped, they spill all the juicy details! Express your personal style! Customize your town, your house, and yourself by collecting bugs, fish, fossils, art, furniture, clothes, and accessories., Amazon.com, If you were given the keys to your own community, what would you do? Go fishing, collect shells or watch fireworks with friends? Build a snowman, exchange presents with family or decorate your house for the holidays? Take a trip to the city, go on a shopping spree or visit friends from all over the globe? In, Animal Crossing: City Folk, , life moves at a relaxed pace, but the world brims with endless possibilities., Gameplay, You make the whole story, as you and up to three other players move into a town and just live life. Befriend your animal neighbors, decorate your house with cool furnishings, fill up your wardrobe, get to know the local wildlife, hop on a bus to visit the new city and just explore the world. There are a million different ways to play. Every charming animal character has a personality: some are grouches while others are chatterboxes. And there's no final goal or high score to hit. The game keeps going for as long as you want to play, and your town will always be there when you return. Move into town, buy a house and then do whatever you want. Time and seasons pass as they do in the real world, so there's always something different happening. Collect more than 2,400 items, go fishing for rare and interesting fish, catch all kind of cool bugs, dig up dinosaur fossils and buried treasure, hang out with other players or spend the day in the city. There's so much to do, and you have all the time in the world to explore it all., DS Suitcase Mode, The DS Suitcase lets you carry your character from your Wii console to a friend's, thus giving people without an Internet connection the ability to experience multiplayer modes. Additionally, you can move your character from, Animal Crossing: Wild World, on Nintendo DS and play as him/her in, Animal Crossing: City Folk, ., Key Game Features, There's Always Something New To Do: In the living, breathing world of Animal Crossing: City Folk, days and seasons pass in real time, so there's always something to discover. Catch fireflies in the summer, go trick-or-treating on Halloween or hunt for eggs on Bunny Day. If you're in the mood for something a little faster paced, take a bus to a new urban city area that's unique to Animal Crossing: City Folk. There you can catch a show at the theater or check out the sales at Gracie's boutique. But if you don't show your face back home for too long, your neighbors will miss you., There's Always Something New To Do, : In the living, breathing world of, Animal Crossing: City Folk, , days and seasons pass in real time, so there's always something to discover. Catch fireflies in the summer, go trick-or-treating on Halloween or hunt for eggs on Bunny Day. If you're in the mood for something a little faster paced, take a bus to a new urban city area that's unique to, Animal Crossing: City Folk, . There you can catch a show at the theater or check out the sales at Gracie's boutique. But if you don't show your face back home for too long, your neighbors will miss you., Play With and Hear Up to Four Friends: Up to four people from your household can live and work together to build the perfect town. Design clothes and patterns, write letters and post messages on the bulletin board for each other, or play online using your broadband connection and invite up to three friends to visit your town using Nintendo Wi-Fi Connection. With the new optional Wii Speak microphone (sold separately), it's like you're all in the same room. The microphone sits atop the sensor bar and picks up the conversation of everyone in the room to encourage a more inclusive experience., Play With and Hear Up to Four Friends, : Up to four people from your household can live and work together to build the perfect town. Design clothes and patterns, write letters and post messages on the bulletin board for each other, or play online using your broadband connection and invite up to three friends to visit your town using Nintendo Wi-Fi Connection. With the new optional Wii Speak microphone (sold separately), it's like you're all in the same room. The microphone sits atop the sensor bar and picks up the conversation of everyone in the room to encourage a more inclusive experience., Get to Know Your Neighbors: The heart of Animal Crossing: City Folk is building relationships with the animals in your town as well as with other players. Befriend your animal neighbors by exchanging letters, gifts and favors. Animals can also move from town to town, bringing their memories and stories from their old towns with them. And since animals are notoriously loose-lipped, they spill all the juicy details., Get to Know Your Neighbors, : The heart of, Animal Crossing: City Folk, is building relationships with the animals in your town as well as with other players. Befriend your animal neighbors by exchanging letters, gifts and favors. Animals can also move from town to town, bringing their memories and stories from their old towns with them. And since animals are notoriously loose-lipped, they spill all the juicy details., Express Your Personal Style: Customize your town, your house and yourself by collecting bugs, fish, fossils, art, furniture, clothes and accessories. You can also go to the salon in the city to change your hairstyle and get a Mii makeover. Plus, if you design clothes in the tailor's shop, animals will wear them and maybe even bring them to other towns., Express Your Personal Style, : Customize your town, your house and yourself by collecting bugs, fish, fossils, art, furniture, clothes and accessories. You can also go to the salon in the city to change your hairstyle and get a Mii makeover. Plus, if you design clothes in the tailor's shop, animals will wear them and maybe even bring them to other towns., Your Neighbors, Familiar faces such as K.K. Slider, Tom Nook, Blathers and Mr. Resetti all appear, as well as a bunch of new characters like Festivale host Pavé and Bug-Off judge Bud. Many characters who occasionally visited your town in previous Animal Crossing games have now set up permanent shop in the city, so you can see them anytime., Special Powers, Weapons, Moves & Features, : Use the Wii Remote pointer to type letters, use items, draw designs for clothing or wallpaper, drag clothing or items onto your characters, interact with animals or objects, or lead your character around the world. Use Nintendo Wi-Fi Connection to hang out in real time with up to three of your friends. You can also send them e-mails and text messages from the game. Play at different times of the year to experience different activities, holidays and seasons. And when visiting a friend in another country, experience the holidays native to their culture., Nintendo Wi-Fi Connection, Up to four people can play together in real time via Nintendo Wi-Fi Connection. The host opens his or her gate to allow friends into the town, where they can perform all sorts of activities: fish, write letters to townsfolk, shop at the store, swap items, play hide-and-seek ... anything. Up to four players can interact in real-time, communicating via text chat, mic chat and emoticons., WiiConnect24, : Using WiiConnect24, you can buy and sell items to friends by participating in silent auctions, view actual players' homes in the Happy Room Academy office or send letters to other players' towns.]","[Video Games, Legacy Systems, Nintendo Systems, Wii, Games]",87.79,1,,"B0B2RQXT92,B08Y5DN2FX,B087SHFL9B,B087NNPYP3,B01GH9MVWW,B07P6MD9B7,B094WQR3H3","[-1, -1, -1, 4523, 4445, 4344, 4342, 3341, 4124, 4453]"
3638,AFNGUKYWUWCH6HAANP4DCND6FETA,B079FPFV3X,1.0,2022-03-09 13:29:26.011,7.0,4.285714,1.0,5.0,1.0,5.0,...,3851,All Electronics,"OIVO PS4 Stand Cooling Fan Station for Playstation 4/PS4 Slim/PS4 Pro, PS4 Pro Vertical Stand with Dual Controller EXT Port Charger Dock Station and 12 Game Slots",[],"[Video Games, PlayStation 4, Accessories, Cooling Systems]",29.99,1,,"B0B2RQXT92,B08Y5DN2FX,B087SHFL9B,B087NNPYP3,B01GH9MVWW,B07P6MD9B7,B094WQR3H3","[-1, -1, -1, 4523, 4445, 4344, 4342, 3341, 4124, 4453]"


In [25]:
test_row = test_df.loc[lambda df: df[args.rating_col].gt(0)].iloc[0]
item_id = test_row[args.item_col]
item_sequence = test_row["item_sequence"]
row_idx = test_row.name
item_feature = val_item_features[row_idx]
logger.info(
    f"Test predicting before training with {args.user_col} = {user_id} and {args.item_col} = {item_id}"
)
user_indice = idm.get_user_index(user_id)
item_indice = idm.get_item_index(item_id)
user = torch.tensor([user_indice])
item_sequence = torch.tensor([item_sequence])
item_feature = torch.tensor([item_feature])
item = torch.tensor([item_indice])

model.eval()
model.predict(user, item_sequence, item_feature, item)

[32m2024-10-27 22:42:52.873[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m6[0m - [1mTest predicting before training with user_id = AFNGUKYWUWCH6HAANP4DCND6FETA and parent_asin = B079FPFV3X[0m

Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at /Users/runner/work/pytorch/pytorch/pytorch/torch/csrc/utils/tensor_new.cpp:281.)



tensor([[0.4902]], grad_fn=<SigmoidBackward0>)

#### Training loop

##### Overfit 1 batch

In [26]:
early_stopping = EarlyStopping(
    monitor="val_loss", patience=10, mode="min", verbose=False
)

model = init_model(n_users, n_items, args.embedding_dim, item_feature_size, dropout=0)
lit_model = LitRanker(
    model,
    learning_rate=args.learning_rate,
    l2_reg=0.0,
    log_dir=args.notebook_persist_dp,
)

log_dir = f"{args.notebook_persist_dp}/logs/overfit"

# train model
trainer = L.Trainer(
    default_root_dir=log_dir,
    accelerator=args.device if args.device else "auto",
    max_epochs=100,
    overfit_batches=1,
    callbacks=[early_stopping],
)
trainer.fit(
    model=lit_model,
    train_dataloaders=train_loader,
    val_dataloaders=train_loader,
)
logger.info(f"Logs available at {trainer.log_dir}")

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(overfit_batches=1)` was configured so 1 batch will be used.

  | Name  | Type   | Params | Mode 
-----------------------------------------
0 | model | Ranker | 3.4 M  | train
-----------------------------------------
3.4 M     Trainable params
0         Non-trainable params
3.4 M     Total params
13.533    Total estimated model params size (MB)
14        Modules in train mode
0         Modules in eval mode


Sanity Checking: |                                                                                            …


You requested to overfit but enabled val dataloader shuffling. We are turning off the val dataloader shuffling for you.


The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=10` in the `DataLoader` to improve performance.


You requested to overfit but enabled train dataloader shuffling. We are turning off the train dataloader shuffling for you.


The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=10` in the `DataLoader` to improve performance.


The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.



Training: |                                                                                                   …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=100` reached.
[32m2024-10-27 22:43:10.183[0m | [1mINFO    [0m | [36msrc.ranker.trainer[0m:[36mon_fit_end[0m:[36m134[0m - [1mLogging classification metrics...[0m
[32m2024-10-27 22:44:00.625[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m28[0m - [1mLogs available at /Users/dvq/frostmourne/recsys-mvp/notebooks/data/017-item-popularity/logs/overfit/lightning_logs/version_5[0m


In [27]:
%tensorboard --logdir $trainer.log_dir

##### Fit on all data

In [28]:
all_items_df = train_df.drop_duplicates(subset=["item_indice"])
all_items_indices = all_items_df["item_indice"].values
all_items_features = item_metadata_pipeline.transform(all_items_df).astype(np.float32)
all_sbert_vectors = ann_index.get_vector_by_ids(all_items_indices.tolist()).astype(np.float32)
all_items_features = np.hstack([all_items_features, all_sbert_vectors])

  0%|          | 0/47 [00:00<?, ?it/s]

In [29]:
len(all_items_indices)

4630

In [30]:
all_items_features.shape

(4630, 929)

In [31]:
# papermill_description=fit-model
early_stopping = EarlyStopping(
    monitor="val_loss", patience=args.early_stopping_patience, mode="min", verbose=False
)

checkpoint_callback = ModelCheckpoint(
    dirpath=f"{args.notebook_persist_dp}/checkpoints",
    filename="best-checkpoint",
    save_top_k=1,
    monitor="val_loss",
    mode="min",
)

model = init_model(
    n_users,
    n_items,
    args.embedding_dim,
    item_feature_size,
    dropout=args.dropout,
    item_embedding=pretrained_item_embedding,
)
lit_model = LitRanker(
    model,
    learning_rate=args.learning_rate,
    l2_reg=args.l2_reg,
    log_dir=args.notebook_persist_dp,
    evaluate_ranking=True,
    idm=idm,
    all_items_indices=all_items_indices,
    all_items_features=all_items_features,
    args=args,
    neg_to_pos_ratio=args.neg_to_pos_ratio,
)

log_dir = f"{args.notebook_persist_dp}/logs/run"

# train model
trainer = L.Trainer(
    default_root_dir=log_dir,
    max_epochs=args.max_epochs,
    callbacks=[early_stopping, checkpoint_callback],
    accelerator=args.device if args.device else "auto",
    logger=args._mlf_logger if args.log_to_mlflow else None,
)
trainer.fit(
    model=lit_model,
    train_dataloaders=train_loader,
    val_dataloaders=val_loader,
)

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

Checkpoint directory /Users/dvq/frostmourne/recsys-mvp/notebooks/data/017-item-popularity/checkpoints exists and is not empty.


  | Name  | Type   | Params | Mode 
-----------------------------------------
0 | model | Ranker | 3.4 M  | train
-----------------------------------------
3.4 M     Trainable params
0         Non-trainable params
3.4 M     Total params
13.533    Total estimated model params size (MB)
14        Modules in train mode
0         Modules in eval mode


Sanity Checking: |                                                                                            …


The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=10` in the `DataLoader` to improve performance.


The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=10` in the `DataLoader` to improve performance.



Training: |                                                                                                   …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

[32m2024-10-27 23:08:39.430[0m | [1mINFO    [0m | [36msrc.ranker.trainer[0m:[36mon_fit_end[0m:[36m134[0m - [1mLogging classification metrics...[0m
[32m2024-10-27 23:08:40.540[0m | [1mINFO    [0m | [36msrc.ranker.trainer[0m:[36mon_fit_end[0m:[36m137[0m - [1mLogging ranking metrics...[0m


Generating recommendations:   0%|          | 0/177 [00:00<?, ?it/s]

2024/10/27 23:09:23 INFO mlflow.tracking._tracking_service.client: 🏃 View run 017-item-popularity at: http://localhost:5002/#/experiments/3/runs/c5a5c02ddd594693b90788abae264d36.
2024/10/27 23:09:23 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5002/#/experiments/3.


In [32]:
logger.info(
    f"Test predicting after training with {args.user_col} = {user_id} and {args.item_col} = {item_id}"
)
model.eval()
model.predict(user, item_sequence, item_feature, item)

[32m2024-10-27 23:09:24.361[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mTest predicting after training with user_id = AFNGUKYWUWCH6HAANP4DCND6FETA and parent_asin = B079FPFV3X[0m


tensor([[0.8907]], grad_fn=<SigmoidBackward0>)

# Load best checkpoint

In [33]:
logger.info(f"Loading best checkpoint from {checkpoint_callback.best_model_path}...")
args.best_checkpoint_path = checkpoint_callback.best_model_path

best_trainer = LitRanker.load_from_checkpoint(
    checkpoint_callback.best_model_path,
    model=init_model(
        n_users, n_items, args.embedding_dim, item_feature_size, dropout=0
    ),
)

[32m2024-10-27 23:09:24.421[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mLoading best checkpoint from /Users/dvq/frostmourne/recsys-mvp/notebooks/data/017-item-popularity/checkpoints/best-checkpoint-v5.ckpt...[0m


In [34]:
best_model = best_trainer.model.to(lit_model.device)

In [35]:
best_model.eval()
best_model.predict(user, item_sequence, item_feature, item)

tensor([[0.7696]], grad_fn=<SigmoidBackward0>)

### Persist id mapping

In [36]:
if args.log_to_mlflow:
    # Persist id_mapping so that at inference we can predict based on item_ids (string) instead of item_index
    run_id = trainer.logger.run_id
    mlf_client = trainer.logger.experiment
    mlf_client.log_artifact(run_id, idm_fp)
    # Persist item_feature_metadata pipeline
    mlf_client.log_artifact(run_id, args.item_metadata_pipeline_fp)

### Wrap inference function and register best checkpoint as MLflow model

In [37]:
inferrer = RankerInferenceWrapper(best_model)

In [47]:
sample_input = {
    "user_ids": [idm.get_user_id(0)],
    "item_sequences": [[idm.get_item_id(0), idm.get_item_id(1)]],
    **{col: [train_df.iloc[0].fillna(0)[col]] for col in args.item_feature_cols},
    "item_ids": [idm.get_item_id(0)],
}
sample_output = inferrer.infer([0], [[0, 1]], [train_item_features[0]], [0])
sample_output

array([0.63961154], dtype=float32)

In [48]:
sample_input

{'user_ids': ['AE225O22SA7DLBOGOEIFL7FT5VYQ'],
 'item_sequences': [['0375869026', '9625990674']],
 'main_category': ['Video Games'],
 'categories': [array(['Video Games', 'PlayStation 4', 'Games'], dtype=object)],
 'price': ['None'],
 'parent_asin_rating_cnt_365d': [6.0],
 'parent_asin_rating_avg_prev_rating_365d': [3.6666666666666665],
 'parent_asin_rating_cnt_90d': [0.0],
 'parent_asin_rating_avg_prev_rating_90d': [0],
 'parent_asin_rating_cnt_30d': [0.0],
 'parent_asin_rating_avg_prev_rating_30d': [0],
 'parent_asin_rating_cnt_7d': [0.0],
 'parent_asin_rating_avg_prev_rating_7d': [0],
 'item_ids': ['0375869026']}

In [49]:
if args.log_to_mlflow:
    run_id = trainer.logger.run_id
    sample_output_np = sample_output
    signature = infer_signature(sample_input, sample_output_np)
    idm_filename = idm_fp.split("/")[-1]
    item_metadata_pipeline_filename = args.item_metadata_pipeline_fp.split("/")[-1]
    with mlflow.start_run(run_id=run_id):
        mlflow.pyfunc.log_model(
            python_model=inferrer,
            artifact_path="inferrer",
            # We log the id_mapping to the predict function so that it can accept item_id and automatically convert ot item_indice for PyTorch model to use
            artifacts={
                "idm": mlflow.get_artifact_uri(idm_filename),
                "item_metadata_pipeline": mlflow.get_artifact_uri(
                    item_metadata_pipeline_filename
                ),
            },
            signature=signature,
            input_example=sample_input,
            registered_model_name=args.mlf_model_name,
        )


Since MLflow 2.16.0, we no longer convert dictionary input example to pandas Dataframe, and directly save it as a json object. If the model expects a pandas DataFrame input instead, please pass the pandas DataFrame as input example directly.



Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Registered model 'ranker' already exists. Creating a new version of this model...
2024/10/27 23:18:21 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: ranker, version 15
Created version '15' of model 'ranker'.


Downloading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

  "inputs": {
    "user_ids": [
      "AE225O22SA7DLBOGOEIFL7FT5VYQ"
    ],
    "item_sequences": [
      [
        "0375869026",
        "9625990674"
      ]
    ],
    "main_category": [
      "Video Games"
    ],
    "categories": [
      [
        "Video Games",
        "PlayStation 4",
        "Games"
      ]
    ],
    "price": [
      "None"
    ],
    "parent_asin_rating_cnt_365d": [
      6.0
    ],
    "parent_asin_rating_avg_prev_rating_365d": [
      3.6666666666666665
    ],
    "parent_asin_rating_cnt_90d": [
      0.0
    ],
    "parent_asin_rating_avg_prev_rating_90d": [
      0
    ],
    "parent_asin_rating_cnt_30d": [
      0.0
    ],
    "parent_asin_rating_avg_prev_rating_30d": [
      0
    ],
    "parent_asin_rating_cnt_7d": [
      0.0
    ],
    "parent_asin_rating_avg_prev_rating_7d": [
      0
    ],
    "item_ids": [
      "0375869026"
    ]
  }
}. Alternatively, you can avoid passing input example and pass model signature instead when logging the model. To 

# Set the newly trained model as champion

In [50]:
if args.log_to_mlflow:
    val_roc_auc = trainer.logger.experiment.get_run(trainer.logger.run_id).data.metrics[
        "val_roc_auc"
    ]

    if val_roc_auc > args.min_roc_auc:
        logger.info(f"Aliasing the new model as champion...")
        model_version = (
            mlf_client.get_registered_model(args.mlf_model_name)
            .latest_versions[0]
            .version
        )

        mlf_client.set_registered_model_alias(
            name=args.mlf_model_name, alias="champion", version=model_version
        )

        mlf_client.set_model_version_tag(
            name=args.mlf_model_name,
            version=model_version,
            key="author",
            value="quy.dinh",
        )

[32m2024-10-27 23:18:27.661[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mAliasing the new model as champion...[0m


# Clean up

In [51]:
all_params = [args]

if args.log_to_mlflow:
    with mlflow.start_run(run_id=run_id):
        for params in all_params:
            params_dict = params.dict()
            params_ = dict()
            for k, v in params_dict.items():
                if k == "top_K":
                    k = "top_big_K"
                if k == "top_k":
                    k = "top_small_k"
                params_[f"{params.__repr_name__()}.{k}"] = v
            mlflow.log_params(params_)

2024/10/27 23:18:29 INFO mlflow.tracking._tracking_service.client: 🏃 View run 017-item-popularity at: http://localhost:5002/#/experiments/3/runs/c5a5c02ddd594693b90788abae264d36.
2024/10/27 23:18:29 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5002/#/experiments/3.
