In [None]:
import os
import pickle
from datetime import datetime

import numpy as np
import pandas as pd

from main.data.session_dataset import SessionDataset
from main.eval.evaluation import Evaluation, EvaluationReport, metrics
from main.eval.metrics.ndcg import NormalizedDiscountedCumulativeGain
from main.grurec.grurec import GRURec
from main.grurec.grurec_with_embeddings import GRURecWithEmbeddings
from main.llm_based.similarity_model.llm_seq_sim import LLMSeqSim
from main.popularity.session_popular import SessionBasedPopular
from main.sknn.sknn import SessionBasedCF
from main.transformer.bert.bert import BERT
from main.transformer.bert.bert_with_embeddings import BERTWithEmbeddings
from main.transformer.sasrec.sasrec import SASRec
from main.transformer.sasrec.sasrec_with_embeddings import SASRecWithEmbeddings
from main.utils.config_util import extract_config

In [None]:
DATASET_FILENAME = "beauty/dataset.pickle"
OPENAI_DATASET_FILENAME = "beauty/openai_augmented_dataset.pickle"
PALM_DATASET_FILENAME = "beauty/palm_augmented_dataset.pickle"

OPENAI_EMBEDDINGS_PATH = "beauty/product_embeddings_openai.csv.gz"
PALM_EMBEDDINGS_PATH = "beauty/product_embeddings_palm.csv.gz"
EXPERIMENTS_FOLDER = "results/beauty/main"

# Model configuration
CORES = 15
EARLY_STOPPING_PATIENCE = 2
IS_VERBOSE = True
FILTER_PROMPT_ITEMS = True
MAX_SESSION_LENGTH_FOR_DECAY_PRECOMPUTATION = 500
PRED_BATCH_SIZE = 5000
PRED_SEEN = False
TRAIN_VAL_FRACTION = 0.1
TOP_Ks = [10, 20]

In [None]:
INCLUDE = {
    "GRU4Rec",
    "LLM2GRU4Rec",
    "LLMSeqSim",
    "Popular",
    "V-SKNN",
    "SKNN_EMB",
    "BERT4Rec",
    "LLM2BERT4Rec",
    "SASRec",
    "LLM2SASRec",
}

## Load datasets

In [None]:
dataset = SessionDataset.from_pickle(DATASET_FILENAME)
openai_dataset = SessionDataset.from_pickle(OPENAI_DATASET_FILENAME)
palm_dataset = SessionDataset.from_pickle(PALM_DATASET_FILENAME)

## Initialize, train and store all models

In [None]:
def train_and_predict_n(model_class, model_config, dataset, with_item_data, n_trials=5):
    model_for_info = model_class(**model_config)
    model_name = model_for_info.name()
    
    best_metric = 0
    best_predictions = None
    for i in range(n_trials):
        model = model_class(**model_config)
        if with_item_data:
            model.train(dataset.get_train_data(), dataset.get_item_data())
        else:
            model.train(dataset.get_train_data())
        model_predictions = model.predict(dataset.get_test_prompts(), top_k=max(TOP_Ks))

        report = Evaluation.eval(
            predictions=model_predictions,
            ground_truths=dataset.get_test_ground_truths(),
            model_name=model_name,
            top_k=max(TOP_Ks),
            metrics=[
                NormalizedDiscountedCumulativeGain(),
            ],
            dependencies={
                metrics.MetricDependency.NUM_ITEMS: dataset.get_unique_item_count(),
            },
            metrics_per_sample=False,
        )
        trial_df = report.to_df()
        ndcg = trial_df.at[model_name, "NDCG@20"]
        if ndcg > best_metric:
            best_metric = ndcg
            best_predictions = model_predictions

    # Generate a unique name for each model.
    cur_timestamp = datetime.today().strftime("%Y%m%d%H%M%S")
    base_filename = f"{EXPERIMENTS_FOLDER}/recs_{model_name}_{cur_timestamp}"
    recs_filename = f"{base_filename}.pickle"

    # Make sure the directory for the experiments exists.
    os.makedirs(os.path.dirname(recs_filename), exist_ok=True)

    # Store recommendations as pickle.
    with open(recs_filename, mode="wb") as write_file:
        pickle.dump(best_predictions, write_file)

    with open(f"{base_filename}_config.txt", "w") as f:
        f.write(str(extract_config(model_for_info)))

In [None]:
if "GRU4Rec" in INCLUDE:
    grurec_config = {
        "N": 20,
        "activation": "relu",
        "emb_dim": 80,
        "fit_batch_size": 32,
        "hidden_dim": 32,
        "optimizer_kwargs": {
            "learning_rate": 0.0072,
            "weight_decay": 0.08929,
        },
    }
    grurec_config.update(
        {
            "cores": CORES,
            "early_stopping_patience": EARLY_STOPPING_PATIENCE,
            "is_verbose": IS_VERBOSE,
            "pred_batch_size": PRED_BATCH_SIZE,
            "pred_seen": PRED_SEEN,
            "train_val_fraction": TRAIN_VAL_FRACTION,
        }
    )
    train_and_predict_n(GRURec, grurec_config, dataset, False)

In [None]:
if "LLM2GRU4Rec" in INCLUDE:
    llm2grurec_config = {
        "N": 20,
        "activation": "relu",
        "emb_dim": 192,
        "fit_batch_size": 96,
        "hidden_dim": 496,
        "optimizer_kwargs": {
            "learning_rate": 0.0013,
            "weight_decay": 0.05719,
        },
    }
    llm2grurec_config.update(
        {
            "cores": CORES,
            "early_stopping_patience": EARLY_STOPPING_PATIENCE,
            "is_verbose": IS_VERBOSE,
            "pred_batch_size": PRED_BATCH_SIZE,
            "pred_seen": PRED_SEEN,
            "train_val_fraction": TRAIN_VAL_FRACTION,
        }
    )
    llm2grurec_config.update(
        {
            "product_embeddings_location": OPENAI_EMBEDDINGS_PATH,
            "red_method": "RANDOM",
            "red_params": {},
        }
    )
    train_and_predict_n(GRURecWithEmbeddings, llm2grurec_config, dataset, False)

In [None]:
if "LLMSeqSim" in INCLUDE:
    llmseqsim_config = {
        "batch_size": 500,
        "combination_decay": "harmonic",
        "dim_reduction_config": {
            "normalize": True,
            "reduced_dim_size": 512,
            "reduction_config": {
                "reduction_technique": "pca",
                "config": {},
            },
        },
        "embedding_combination_strategy": "mean",
        "similarity_measure": "cosine",
    }
    llmseqsim_config.update(
        {
            "cores": CORES,
            "filter_prompt_items": FILTER_PROMPT_ITEMS,
            "is_verbose": IS_VERBOSE,
            "max_session_length_for_decay_precomputation": MAX_SESSION_LENGTH_FOR_DECAY_PRECOMPUTATION,
        }
    )
    train_and_predict_n(LLMSeqSim, llmseqsim_config, openai_dataset, True)

In [None]:
if "Popular" in INCLUDE:
    train_and_predict_n(SessionBasedPopular, {"cores": CORES, "is_verbose": IS_VERBOSE}, dataset, False, 1)

In [None]:
if "V-SKNN" in INCLUDE:
    v_sknn_config = {
        "decay": "harmonic",
        "idf_weighting": False,
        "k": 320,
        "sample_size": 1530,
        "sampling": "random",
        "similarity_measure": "dot",
    }
    v_sknn_config.update(
        {
            "cores": CORES,
            "filter_prompt_items": FILTER_PROMPT_ITEMS,
            "is_verbose": IS_VERBOSE,
        }
    )
    train_and_predict_n(SessionBasedCF, v_sknn_config, dataset, False)

In [None]:
if "SKNN_EMB" in INCLUDE:
    sknn_emb_config = {
        "decay": "harmonic",
        "dim_reduction_config": {
            "normalize": True,
            "reduced_dim_size": 512,
            "reduction_config": {
                "reduction_technique": "lda",
                "config": {},
            },
        },
        "idf_weighting": False,
        "k": 60,
        "last_n_items": 50,
        "prompt_session_emb_comb_strategy": "mean",
        "sample_size": 1370,
        "sampling": "recent",
        "similarity_measure": "dot",
        "training_session_decay": None,
        "training_session_emb_comb_strategy": "mean",
        "use_item_embeddings": True,
    }
    sknn_emb_config.update(
        {
            "cores": CORES,
            "filter_prompt_items": FILTER_PROMPT_ITEMS,
            "is_verbose": IS_VERBOSE,
            "max_session_length_for_decay_precomputation": MAX_SESSION_LENGTH_FOR_DECAY_PRECOMPUTATION,
        }
    )
    train_and_predict_n(SessionBasedCF, sknn_emb_config, palm_dataset, True)

In [None]:
if "BERT4Rec" in INCLUDE:
    bert_config = {
        "L": 1,
        "N": 20,
        "activation": "gelu",
        "drop_rate": 0.4,
        "emb_dim": 192,
        "fit_batch_size": 256,
        "h": 2,
        "mask_prob": 0.9,
        "optimizer_kwargs": {
            "clipnorm": 92,
            "learning_rate": 0.0023,
            "weight_decay": 0.04088,
        },
        "transformer_layer_kwargs": {"layout": "FDRN"},
    }
    bert_config.update(
        {
            "cores": CORES,
            "early_stopping_patience": EARLY_STOPPING_PATIENCE,
            "is_verbose": IS_VERBOSE,
            "pred_batch_size": PRED_BATCH_SIZE,
            "pred_seen": PRED_SEEN,
            "train_val_fraction": TRAIN_VAL_FRACTION,
        }
    )
    train_and_predict_n(BERT, bert_config, dataset, False)

In [None]:
if "LLM2BERT4Rec" in INCLUDE:
    llm2bert_config = {
        "L": 2,
        "N": 20,
        "activation": "gelu",
        "drop_rate": 0.45,
        "emb_dim": 448,
        "fit_batch_size": 224,
        "h": 1,
        "mask_prob": 0.65,
        "optimizer_kwargs": {
            "clipnorm": 5,
            "learning_rate": 0.0012,
            "weight_decay": 0.02354,
        },
        "transformer_layer_kwargs": {"layout": "FDRN"},
    }
    llm2bert_config.update(
        {
            "cores": CORES,
            "early_stopping_patience": EARLY_STOPPING_PATIENCE,
            "is_verbose": IS_VERBOSE,
            "pred_batch_size": PRED_BATCH_SIZE,
            "pred_seen": PRED_SEEN,
            "train_val_fraction": TRAIN_VAL_FRACTION,
        }
    )
    llm2bert_config.update(
        {
            "product_embeddings_location": PALM_EMBEDDINGS_PATH,
            "red_method": "LDA",
            "red_params": {},
        }
    )
    train_and_predict_n(BERTWithEmbeddings, llm2bert_config, dataset, False)

In [None]:
if "SASRec" in INCLUDE:
    sasrec_config = {
        "L": 2,
        "N": 20,
        "activation": "relu",
        "drop_rate": 0.25,
        "emb_dim": 48,
        "fit_batch_size": 32,
        "h": 3,
        "optimizer_kwargs": {
            "learning_rate": 0.0048,
            "weight_decay": 0.09622,
        },
        "transformer_layer_kwargs": {"layout": "NFDR"},
    }
    sasrec_config.update(
        {
            "cores": CORES,
            "early_stopping_patience": EARLY_STOPPING_PATIENCE,
            "is_verbose": IS_VERBOSE,
            "pred_batch_size": PRED_BATCH_SIZE,
            "pred_seen": PRED_SEEN,
            "train_val_fraction": TRAIN_VAL_FRACTION,
        }
    )
    train_and_predict_n(SASRec, sasrec_config, dataset, False)

In [None]:
if "LLM2SASRec" in INCLUDE:
    llm2sasrec_config = {
        "L": 1,
        "N": 20,
        "activation": "relu",
        "drop_rate": 0.0,
        "emb_dim": 352,
        "fit_batch_size": 416,
        "h": 2,
        "optimizer_kwargs": {
            "learning_rate": 0.0003,
            "weight_decay": 0.057,
        },
        "transformer_layer_kwargs": {"layout": "NFDR"},
    }
    llm2sasrec_config.update(
        {
            "cores": CORES,
            "early_stopping_patience": EARLY_STOPPING_PATIENCE,
            "is_verbose": IS_VERBOSE,
            "pred_batch_size": PRED_BATCH_SIZE,
            "pred_seen": PRED_SEEN,
            "train_val_fraction": TRAIN_VAL_FRACTION,
        }
    )
    llm2sasrec_config.update(
        {
            "product_embeddings_location": PALM_EMBEDDINGS_PATH,
            "red_method": "PCA",
            "red_params": {},
        }
    )
    train_and_predict_n(SASRecWithEmbeddings, llm2sasrec_config, dataset, False)

## Evaluation

In [None]:
all_model_recommendations: list[tuple[str, dict[int, np.ndarray]]] = []

# Get all model recommendation results from the experiments folder.
for rec_file in os.listdir(EXPERIMENTS_FOLDER):
    # Skip unrelated files.
    if not rec_file.startswith("recs_") or not rec_file.endswith(".pickle"):
        continue

    # Parse the model name.
    model_name: str = rec_file.replace("recs_", "").replace(
        ".pickle", ""
    )

    # Load the recommendations.
    with open(f"{EXPERIMENTS_FOLDER}/{rec_file}", mode="rb") as read_file:
        recommendations: dict[int, np.ndarray] = pickle.load(read_file)

    all_model_recommendations.append((model_name, recommendations))

In [None]:
# Some metrics have "external dependencies" which we load here.
dependencies = {
    metrics.MetricDependency.NUM_ITEMS: dataset.get_unique_item_count(),
    metrics.MetricDependency.ITEM_COUNT: dataset.get_item_counts(),
    metrics.MetricDependency.SAMPLE_COUNT: dataset.get_sample_counts(),
}

# Evaluate the recommendations.
eval_reports: list[EvaluationReport] = []
for model_name, recommendations in all_model_recommendations:
    model_report = None
    for top_k in TOP_Ks:
        report: EvaluationReport = Evaluation.eval(
            recommendations,
            dataset.get_test_ground_truths(),
            top_k=top_k,
            metrics_per_sample=False,
            dependencies=dependencies,
            cores=1,  # Using a single core is the fastest for evaluation.
            model_name=model_name,
        )

        if model_report is None:
            model_report = report
        else:
            model_report.results.update(report.results)
    eval_reports.append(model_report)

In [None]:
all_results: Evaluation = Evaluation.from_results(eval_reports)
all_results_df: pd.DataFrame = all_results.results_as_table(
    caption=f"Experimental results: Improving Sequential Recommendations with LLMs ({DATASET_FILENAME})",
    max_color="darkgreen"
)
all_results_df.data.sort_values(by="NDCG@20", inplace=True, ascending=False)
all_results_df