In [None]:
import sys
import os

sys.path.append(os.path.abspath('..'))

import yaml
import pandas as pd
import optuna

from src.data.load import load_data
from src.data.prepare import prepare_data
from src.models.co_visit import CoVisit
from src.models.baseline import popular_items
from src.features.features import feature_engineering
from src.features.utils import build_rank_input
from src.models.tuner import BayesianSearch
from src.models.ranker import Ranker
from src.models.evaluator import Evaluation
from src.models.tracker import launch_mlflow, log_run
from src.models.utils import leave_last_k, set_global_seed

In [2]:
# read config
with open('config.yml', 'r') as file:
    config=yaml.load(file, Loader= yaml.SafeLoader)
del file

# ensure reproducibility
set_global_seed(seed=config["general"]["seed"])

# set experiment tracking
launch_mlflow()

# set algorithm
ALGORITHM = "XGBRanker"

**Data Preparation & Train/Test Split**

- Load and transform the 3 datasets
- Split whole set into train, validation and test sets by segmenting it temporally

In [3]:
# load and prepare data
dfs = load_data(config=config['data_loader'])
dfs = prepare_data(dataframes=dfs, config=config["data_preparation"])

In [4]:
# train-test split
df_train, df_test = leave_last_k(df=dfs['data'], config=config['optimization'])
df_train, df_valid = leave_last_k(df=df_train, config=config['optimization'])

**Feature Engineering**

- Add negative samples - i.e., items likely to be disliked by user
- Feature Engineering: creates cross user-item features for ranking model

In [5]:
# negative samples for different sources
neg_sample_1 = popular_items(ui_matrix=df_train, top_k=15)
neg_sample_2 = CoVisit(methods=["negative"]).fit(ui_matrix=df_train)
neg_sample = pd.concat([neg_sample_1, neg_sample_2], ignore_index=True)

neg_sample = neg_sample[["user_id", "item_id"]]
neg_sample["rating"] = list(config["data_preparation"]["rating_conversion"].keys())[0]

del neg_sample_1, neg_sample_2

In [6]:
# build features for ranking model
user_item_features = feature_engineering(
    dataframes={'user': dfs['user'], 'item': dfs['item'], 'data': df_train}
    )

# add negative samples and merge features
df_train = pd.concat([df_train, neg_sample], ignore_index=True)
df_train, df_valid = [
    build_rank_input(ratings=df.iloc[:,:3], features=user_item_features)
    for df in (df_train, df_valid)
    ]

del neg_sample, user_item_features

**Optimization & Evaluation**

- Hyper-parameters - search which hyper-parameters optimize scoring metric for the given algorithm in the validation set
- Evaluation - retrieve best hyper-parameters and recover full training set to evaluate results on test set

In [7]:
# set tuner for hyperparam optimization
tuner = BayesianSearch(
    config["optimization"],
    method="ranker",
    algorithm=ALGORITHM
    )

def objective(trial) -> float:
    return tuner.fit(df_train, df_valid, trial)

# set study
study = optuna.create_study(
    direction="maximize",
    sampler=optuna.samplers.TPESampler(seed=config["general"]["seed"])
    )
study.optimize(objective, n_trials= config["optimization"]["n_trials"])

# logging experiment
log_run(experiment_name="Ranker", study=study, tuner=tuner)

[I 2025-07-06 19:33:59,140] A new study created in memory with name: no-name-e230d7e3-5504-4f89-b3e6-a310313fb090
[I 2025-07-06 19:34:03,100] Trial 0 finished with value: 0.947485747613998 and parameters: {'learning_rate': 0.1878955193048339, 'gamma': 9.50714306409916, 'max_depth': 12, 'subsample': 0.7993292420985183, 'n_estimators': 104}. Best is trial 0 with value: 0.947485747613998.
[I 2025-07-06 19:34:18,133] Trial 1 finished with value: 0.9497204453870626 and parameters: {'learning_rate': 0.07884126564776513, 'gamma': 0.5808361216819946, 'max_depth': 14, 'subsample': 0.8005575058716043, 'n_estimators': 298}. Best is trial 1 with value: 0.9497204453870626.
[I 2025-07-06 19:34:22,598] Trial 2 finished with value: 0.9476756415694592 and parameters: {'learning_rate': 0.011271662653605422, 'gamma': 9.699098521619943, 'max_depth': 13, 'subsample': 0.6061695553391381, 'n_estimators': 113}. Best is trial 1 with value: 0.9497204453870626.
[I 2025-07-06 19:34:27,828] Trial 3 finished with v

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

🏃 View run 06JUL2025 at: http://127.0.0.1:5000/#/experiments/859922637182404151/runs/1415c217df5b48848b0f5ebb2391eee6
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/859922637182404151
🏃 View run XGBRanker at: http://127.0.0.1:5000/#/experiments/859922637182404151/runs/9ae3d7ebb12e4e87ae922894db06c52e
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/859922637182404151


In [8]:
# get anti test-set, i.e., train & validation sets together
df_train = dfs["data"].merge(
    df_test
    , on=df_test.columns.to_list(), how="left"
    , indicator=True
    )
df_train = df_train[df_train["_merge"] == "left_only"].drop(columns=["_merge"])

In [9]:
# negative samples for different sources
neg_sample_1 = popular_items(ui_matrix=df_train, top_k=15)
neg_sample_2 = CoVisit(methods=["negative"]).fit(ui_matrix=df_train)
neg_sample = pd.concat([neg_sample_1, neg_sample_2], ignore_index=True)

neg_sample = neg_sample[["user_id", "item_id"]]
neg_sample["rating"] = list(config["data_preparation"]["rating_conversion"].keys())[0]

del neg_sample_1, neg_sample_2

In [10]:
# create features for ranking model
user_item_features = feature_engineering(
    dataframes={'user': dfs['user'], 'item': dfs['item'], 'data': df_train}
    )

# add negative samples and merge features
df_train = pd.concat([df_train, neg_sample], ignore_index=True)
df_train, df_test = [
    build_rank_input(ratings=df.iloc[:,:3], features=user_item_features)
    for df in (df_train, df_test)
    ]

del neg_sample, user_item_features

In [11]:
# set algorithm best hyperparams
hyperparams = (
    config["optimization"]["ranker"][ALGORITHM]["fixed"]
    | study.best_trial.params
)

# fit model on whole training set
clf = Ranker(algorithm=ALGORITHM, params=hyperparams)
clf.fit(X=df_train["X"], y=df_train["y"], group=df_train["group"])

# test set evaluation
scorer = Evaluation(clf=clf)
scorer.fit(train=tuple(df_train.values()), test=tuple(df_test.values()))

Unnamed: 0_level_0,ndcg
dataset,Unnamed: 1_level_1
train,0.992149
test,0.952127


In [12]:
# shouldn't be done with test set
# recs_score(df_test. iloc[:, :2], df_train.iloc[:, :3])