In [1]:
import os
import numpy as np
import pandas as pd
import pickle
from datetime import datetime

from main.data.session_dataset import SessionDataset
from main.utils.config_util import extract_config
from main.eval.evaluation import Evaluation, EvaluationReport, metrics
from main.grurec.grurec import GRURec
from main.popularity.session_popular import SessionBasedPopular
from main.sknn.sknn import SessionBasedCF
from main.transformer.bert.bert import BERT
from main.transformer.bert.bert_with_embeddings import BERTWithEmbeddings
from main.transformer.sasrec.sasrec import SASRec

In [2]:
DATASET_FILENAME = "beauty/dataset.pickle"
EXPERIMENTS_FOLDER = "results/beauty/main"

# Model configuration
CORES = 1
IS_VERBOSE = True
TOP_Ks = [10, 20]

In [3]:
INCLUDE = {
    "SKNN",
    "BERT4Rec", 
    "LLMTOBERT4Rec",
    "GRU4Rec", 
    "SASRec", 
    "Popular",
}

## Load dataset

In [4]:
dataset = SessionDataset.from_pickle(DATASET_FILENAME)

## Initialize, train and store all models

In [5]:
def train_and_predict(model):
    global dataset
    
    # Train and predict model.
    model.train(dataset.get_train_data())
    recs = model.predict(dataset.get_test_prompts(), top_k=max(TOP_Ks))

    # Generate a unique name for each model.
    cur_timestamp = datetime.today().strftime("%Y%m%d%H%M%S")
    base_filename = f"{EXPERIMENTS_FOLDER}/recs_{model.name()}_{cur_timestamp}"
    recs_filename = f"{base_filename}.pickle"

    # Make sure the directory for the experiments exists.
    os.makedirs(os.path.dirname(recs_filename), exist_ok=True)

    # Store recommendations as pickle.
    with open(recs_filename, mode="wb") as write_file:
        pickle.dump(recs, write_file)
    
    with open(f"{base_filename}_config.txt", "w") as f: 
        f.write(str(extract_config(model)))
    

In [6]:
if "SKNN" in INCLUDE:
    sknn_config = {
        "decay": "harmonic", 
        "idf_weighting": True, 
        "k": 70, 
        "sample_size": 1090, 
        "sampling": "random", 
        "sequential_filter": False, 
        "sequential_weighting": False,
        "session_time_discount": None, 
        "similarity_measure": "dot",
    }
    sknn_config.update({"cores": CORES, "is_verbose": IS_VERBOSE, "filter_prompt_items": True})
    model_sknn = SessionBasedCF(**sknn_config)
    train_and_predict(model_sknn)

In [7]:
if "BERT4Rec" in INCLUDE:
    bert_config = {
        "N": 20,
        "transformer_layer_kwargs": {"layout": "FDRN"},
        "activation": "gelu",
        "L": 2,
        "h": 2,
        "emb_dim": 512,
        "drop_rate": 0.25,
        "mask_prob": 0.65,
        "fit_batch_size": 256,
        "optimizer_kwargs": {"learning_rate": 0.0001, "clipnorm": 5, "weight_decay": 0.004}, 
        "filepath_weights": f"{EXPERIMENTS_FOLDER}/bert_weights/weights",
    }
    bert_config.update({"cores": CORES, "is_verbose": IS_VERBOSE, "pred_seen": False, "train_val_fraction": 0.1, "pred_batch_size": 5000})
    model_bert = BERT(**bert_config)
    train_and_predict(model_bert)
    model_bert.model.save_weights(f"{EXPERIMENTS_FOLDER}/bert_weights/weights")

In [8]:
if "LLMTOBERT4Rec" in INCLUDE:
    llmtobert_config = {
        "N": 20,
        "pred_batch_size": 5000,
        "transformer_layer_kwargs": {"layout": "FDRN"},
        "activation": "gelu",
        "L": 3,
        "h": 3,
        "emb_dim": 208,
        "drop_rate": 0.0,
        "mask_prob": 0.5,
        "fit_batch_size": 256,
        "optimizer_kwargs": {"learning_rate": 0.0001, "clipnorm": 5, "weight_decay": 0.002},
        "product_embeddings_location": "beauty/product_embeddings_openai.csv.gzip",
        "filepath_weights": f"{EXPERIMENTS_FOLDER}/bert_with_embeddings_weights/weights",
    }
    llmtobert_config.update({"cores": CORES, "is_verbose": IS_VERBOSE, "pred_seen": False, "train_val_fraction": 0.1, "pred_batch_size": 5000})
    model_llmtobert = BERTWithEmbeddings(**llmtobert_config)
    train_and_predict(model_llmtobert)
    model_llmtobert.model.save_weights(f"{EXPERIMENTS_FOLDER}/bert_with_embeddings_weights/weights")
    

In [9]:
if "GRU4Rec" in INCLUDE:
    grurec_config = {
        "N": 20,
        "activation": "relu",
        "drop_rate": 0.0,
        "emb_dim": 208,
        "fit_batch_size": 128,
        "hidden_dim": 320,
        "optimizer_kwargs": {"learning_rate": 0.001, "weight_decay": 0},
        "filepath_weights": f"{EXPERIMENTS_FOLDER}/grurec_weights/weights",
    }
    grurec_config.update({"cores": CORES, "is_verbose": IS_VERBOSE, "pred_seen": False, "train_val_fraction": 0.1, "pred_batch_size": 5000})
    model_grurec = GRURec(**grurec_config)
    train_and_predict(model_grurec)
    model_grurec.model.save_weights(f"{EXPERIMENTS_FOLDER}/grurec_weights/weights")


In [21]:
if "SASRec" in INCLUDE:
    sasrec_config = {
        "N": 20,
        "transformer_layer_kwargs": {"layout": "NFDR"},
        "activation": "relu",
        "L": 1,
        "h": 1,
        "emb_dim": 320,
        "drop_rate": 0.3,
        "fit_batch_size": 128,
        "optimizer_kwargs": {"learning_rate": 0.001, "clipnorm": 100, "weight_decay": 0},
        "filepath_weights": f"{EXPERIMENTS_FOLDER}/sasrec_weights/weights",
    }
    sasrec_config.update({"cores": CORES, "is_verbose": IS_VERBOSE, "pred_seen": False, "train_val_fraction": 0.1, "pred_batch_size": 5000})
    model_sasrec = SASRec(**sasrec_config)
    train_and_predict(model_sasrec)
    model_sasrec.model.save_weights(f"{EXPERIMENTS_FOLDER}/sasrec_weights/weights")


In [22]:
if "Popular" in INCLUDE:
    model_popular = SessionBasedPopular(cores=CORES, is_verbose=IS_VERBOSE)
    train_and_predict(model_popular)

## Evaluation

In [29]:
all_model_recommendations: list[tuple[str, dict[int, np.ndarray]]] = []

# Get all model recommendation results from the experiments folder.
for rec_file in os.listdir(EXPERIMENTS_FOLDER):
    # Skip unrelated files.
    if not rec_file.startswith("recs_") or not rec_file.endswith(".pickle"):
        continue

    # Parse the model name.
    model_name: str = rec_file.replace("recs_", "").replace(
        ".pickle", ""
    )

    # Load the recommendations.
    with open(f"{EXPERIMENTS_FOLDER}/{rec_file}", mode="rb") as read_file:
        recommendations: dict[int, np.ndarray] = pickle.load(read_file)

    all_model_recommendations.append((model_name, recommendations))

In [30]:
# Some metrics have "external dependencies" which we load here.
dependencies = {
    metrics.MetricDependency.NUM_ITEMS: dataset.get_unique_item_count(),
    metrics.MetricDependency.ITEM_COUNT: dataset.get_item_counts(),
    metrics.MetricDependency.SAMPLE_COUNT: dataset.get_sample_counts(),
}

# Evaluate the recommendations.
eval_reports: list[EvaluationReport] = []
for model_name, recommendations in all_model_recommendations:
    model_report = None
    for top_k in TOP_Ks:
        report: EvaluationReport = Evaluation.eval(
            recommendations,
            dataset.get_test_ground_truths(),
            top_k=top_k,
            metrics_per_sample=False,
            dependencies=dependencies,
            cores=1,  # Using a single core is the fastest for evaluation.
            model_name=model_name,
        )

        if model_report is None: 
            model_report = report
        else: 
            model_report.results.update(report.results)
    eval_reports.append(model_report)

  intersect_mask_all = np.vstack(
                ground-truth samples there were less than 20 predictions.
                ground-truth samples there were less than 20 predictions.


In [31]:
all_results: Evaluation = Evaluation.from_results(eval_reports)
all_results_df: pd.DataFrame = all_results.results_as_table(
    caption=f"Experimental results: Leveraging Large Language Models for Sequential Recommendation ({DATASET_FILENAME})", max_color="darkgreen"
)
all_results_df.data.sort_values(by="NDCG@20", inplace=True, ascending=False)
all_results_df

Unnamed: 0,NDCG@10,HitRate@10,MRR@10,Catalog coverage@10,Serendipity@10,Novelty@10,NDCG@20,HitRate@20,MRR@20,Catalog coverage@20,Serendipity@20,Novelty@20
LLM2BERT4Rec_20230704134941,0.040789,0.07608,0.030123,0.18015,0.072052,11.68753,0.051216,0.1177,0.032933,0.259731,0.109868,11.888101
LLMSeqSim_LAST_ONLY_DOT,0.043951,0.063101,0.038004,0.762664,0.062878,13.819031,0.047974,0.079212,0.039081,0.88877,0.078989,13.857696
V-SKNN_20230705153306,0.041488,0.070933,0.032601,0.672672,0.068919,12.24072,0.047453,0.094652,0.034223,0.889017,0.090624,12.492262
BERT4REC_20230704134841,0.033911,0.066682,0.024114,0.231138,0.064444,12.293144,0.043007,0.102708,0.026603,0.311792,0.097561,12.422634
SASRec_20230707111058,0.027618,0.053032,0.019911,0.185687,0.050571,11.758696,0.034563,0.080779,0.02178,0.265763,0.07429,11.930004
GRU4Rec_20230704134951,0.025971,0.050347,0.018714,0.164862,0.047214,11.482907,0.033782,0.081226,0.020859,0.24279,0.07429,11.635967
LLMSeqPrompt_temp_05_top_p_1,0.023717,0.045872,0.017009,0.425006,0.044977,12.504886,0.029545,0.069143,0.01858,0.661102,0.066458,13.006879
Popularity_20230704135000,0.004515,0.010293,0.00281,0.000826,0.001119,9.187262,0.006359,0.017677,0.003304,0.001653,0.001343,9.408061
