In [5]:
import sys
import os 
sys.path.append(os.path.dirname(os.getcwd()))
%load_ext autoreload
%autoreload 2
import mlflow
import pandas as pd
import numpy as np
from catboost import CatBoostRanker
from sklearn.model_selection import ParameterGrid
from sklearn.model_selection import GroupKFold
import numpy as np
from metrics import bootstrap_mrr_at_k, mrr_at_k, hit_rate_at_k, mrr_at_k_per_experiment, hit_rate_at_k_per_experiment
from models import get_catboost_ranker, get_pooled_dataset
from settings import DATA_FOLDER
from notebooks.experiment_data import get_experiment_data, split_experiment_train_test_val_data
from features import FEATURES

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
data = get_experiment_data()

users_df size before removing small experiments: 501008 rows
users_df size after removing small experiments: 500953 rows


### Baseline Ranking Experiment: Model Training


In [None]:
mlflow.set_experiment("Ranking Candidate Model")

model_params = {
    'iterations': 400,
    'depth': 3,
    'learning_rate': 0.03,
    'l2_leaf_reg': 3,
    'random_seed': 42,
    'subsample': 1.0,
    'rsm': 1.0, 
    'random_strength': 0.5,
    'bagging_temperature': 0.25
}
feature_set = FEATURES.get_subset("cpu_best_feats")

with mlflow.start_run(run_name=f"ranking_baseline_experiment"):
    train_data, _, test_data = split_experiment_train_test_val_data(data, n_last_test=4, n_last_val=0)
    mlflow.log_param("feature_set", feature_set.name)

    n_splits = 5
    # Use "EXPERIMENT_ID" to group
    group_kfold = GroupKFold(n_splits=n_splits)
    groups = train_data[["EXPERIMENT_ID", "RECIPIENT_ID"]].apply(lambda x: f"{x['EXPERIMENT_ID']}_{x['RECIPIENT_ID']}", axis=1)

    cv_results = []
    for fold, (train_idx, val_idx) in enumerate(group_kfold.split(train_data, groups=groups)):
        fold_train_data = train_data.iloc[train_idx]
        fold_val_data = train_data.iloc[val_idx]

        # Prepare pools and datasets per fold
        train_df, train_pool, train_group_ids, X_train, y_train = get_pooled_dataset(
            fold_train_data, pos_neg_ratio=1, cols=feature_set.all, cat_cols=feature_set.categorical
        )
        val_df, val_pool, val_group_ids, X_val, y_val = get_pooled_dataset(
            fold_val_data, pos_neg_ratio=0, cols=feature_set.all, cat_cols=feature_set.categorical
        )
        cat_features = train_pool.get_cat_feature_indices()

        # Fit the model
        ranker = get_catboost_ranker(cat_features, model_params)
        ranker.fit(train_pool, eval_set=val_pool, use_best_model=True, plot=True)

        # Validation scoring
        scores = ranker.predict(X_val)
        preds = val_df.assign(
            PRED=scores
        )[["EXPERIMENT_ID", "RECIPIENT_ID", "VARIATION_ID", "PRED"]]
        y_true = val_df[
            ["EXPERIMENT_ID", "RECIPIENT_ID", "VARIATION_ID", "CLICK"]
        ].query("CLICK==1")

        mrr_at_5_catboost = ranker.eval_metrics(val_pool, metrics=['MRR'])
        mrr_at_5 = mrr_at_k(preds, y_true, 5, prefix=f"cvfold{fold}_")
        hit_rate_1 = hit_rate_at_k(preds, y_true, 1, prefix=f"cvfold{fold}_")
        cv_results.append(
            {
                "fold": fold,
                "mrr_at_5": mrr_at_5,
                "mrr_at_5_catboost": mrr_at_5_catboost,
                "hit_rate_1": hit_rate_1,
            }
        )
        print(cv_results)
    mrr_at_5_values = [fold_result["mrr_at_5"] for fold_result in cv_results]
    hit_rate_1_values = [fold_result["hit_rate_1"] for fold_result in cv_results]

    mean_mrr_at_5 = np.mean(mrr_at_5_values)
    std_mrr_at_5 = np.std(mrr_at_5_values)
    mean_hit_rate_1 = np.mean(hit_rate_1_values)
    std_hit_rate_1 = np.std(hit_rate_1_values)

    mlflow.log_metric("cv_mean_mrr_at_5", mean_mrr_at_5)
    mlflow.log_metric("cv_std_mrr_at_5", std_mrr_at_5)
    mlflow.log_metric("cv_mean_hit_rate_1", mean_hit_rate_1)
    mlflow.log_metric("cv_std_hit_rate_1", std_hit_rate_1)
    print("CV results:", cv_results)

    # Train on all data and predict on test data
    train_df, train_pool, train_group_ids, X_train, y_train = get_pooled_dataset(train_data, pos_neg_ratio=1, cols=feature_set.all, cat_cols=feature_set.categorical) 
    test_df, test_pool, test_group_ids, X_test, y_test = get_pooled_dataset(test_data, cols=feature_set.all, cat_cols=feature_set.categorical)
    
    cat_features = train_pool.get_cat_feature_indices()
    ranker = get_model("ranker", cat_features, model_params)
    ranker.fit(train_pool, eval_set=test_pool, use_best_model=True)

    scores = ranker.predict(X_test)
    preds = test_df.assign(PRED=scores)[["EXPERIMENT_ID", "RECIPIENT_ID", "VARIATION_ID", "PRED"]]
    y_true = test_df[["EXPERIMENT_ID", "RECIPIENT_ID", "VARIATION_ID", "CLICK"]].query("CLICK==1") 

    mrr_at_k_per_experiment(preds, y_true, 5, prefix="test_")
    hit_rate_at_k_per_experiment(preds, y_true, 1, prefix="test_")
    bootstrap_mrr_at_k(preds, y_true, 5, bootstrap_samples=100, random_state=42, prefix="test_")


MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Groupwise loss function. OneHotMaxSize set to 10
0:	learn: 0.1313823	test: 0.2598104	best: 0.2598104 (0)	total: 118ms	remaining: 47.1s
1:	learn: 0.1520087	test: 0.2985597	best: 0.2985597 (1)	total: 177ms	remaining: 35.3s
2:	learn: 0.1522025	test: 0.2988600	best: 0.2988600 (2)	total: 265ms	remaining: 35.1s
3:	learn: 0.1517329	test: 0.2977098	best: 0.2988600 (2)	total: 337ms	remaining: 33.4s
4:	learn: 0.1525387	test: 0.2995117	best: 0.2995117 (4)	total: 401ms	remaining: 31.7s
5:	learn: 0.1523699	test: 0.2992094	best: 0.2995117 (4)	total: 499ms	remaining: 32.7s
6:	learn: 0.1527525	test: 0.2999224	best: 0.2999224 (6)	total: 557ms	remaining: 31.3s
7:	learn: 0.1825624	test: 0.3581270	best: 0.3581270 (7)	total: 609ms	remaining: 29.9s
8:	learn: 0.1955941	test: 0.3846000	best: 0.3846000 (8)	total: 664ms	remaining: 28.8s
9:	learn: 0.1953375	test: 0.3839503	best: 0.3846000 (8)	total: 717ms	remaining: 28s
10:	learn: 0.1956101	test: 0.3847164	best: 0.3847164 (10)	total: 767ms	remaining: 27.1s
11:	l

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Groupwise loss function. OneHotMaxSize set to 10
0:	learn: 0.1204331	test: 0.2405099	best: 0.2405099 (0)	total: 54.3ms	remaining: 21.7s
1:	learn: 0.1235642	test: 0.2470217	best: 0.2470217 (1)	total: 106ms	remaining: 21.2s
2:	learn: 0.1243747	test: 0.2498177	best: 0.2498177 (2)	total: 167ms	remaining: 22.2s
3:	learn: 0.1244056	test: 0.2498177	best: 0.2498177 (2)	total: 225ms	remaining: 22.2s
4:	learn: 0.1262954	test: 0.2516652	best: 0.2516652 (4)	total: 296ms	remaining: 23.4s
5:	learn: 0.1243840	test: 0.2498177	best: 0.2516652 (4)	total: 352ms	remaining: 23.1s
6:	learn: 0.1243600	test: 0.2498177	best: 0.2516652 (4)	total: 405ms	remaining: 22.7s
7:	learn: 0.1243624	test: 0.2498177	best: 0.2516652 (4)	total: 464ms	remaining: 22.7s
8:	learn: 0.1243745	test: 0.2498095	best: 0.2516652 (4)	total: 521ms	remaining: 22.6s
9:	learn: 0.1243822	test: 0.2497059	best: 0.2516652 (4)	total: 581ms	remaining: 22.7s
10:	learn: 0.1243611	test: 0.2497597	best: 0.2516652 (4)	total: 635ms	remaining: 22.4s
11:

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Groupwise loss function. OneHotMaxSize set to 10
0:	learn: 0.1261408	test: 0.2489877	best: 0.2489877 (0)	total: 61.2ms	remaining: 24.4s
1:	learn: 0.1305941	test: 0.2575370	best: 0.2575370 (1)	total: 116ms	remaining: 23.1s
2:	learn: 0.1539941	test: 0.3040432	best: 0.3040432 (2)	total: 179ms	remaining: 23.7s
3:	learn: 0.1539941	test: 0.3040432	best: 0.3040432 (2)	total: 238ms	remaining: 23.6s
4:	learn: 0.1935174	test: 0.3855988	best: 0.3855988 (4)	total: 297ms	remaining: 23.4s
5:	learn: 0.1935174	test: 0.3855988	best: 0.3855988 (4)	total: 351ms	remaining: 23s
6:	learn: 0.1938669	test: 0.3864938	best: 0.3864938 (6)	total: 423ms	remaining: 23.8s
7:	learn: 0.1938669	test: 0.3864938	best: 0.3864938 (6)	total: 507ms	remaining: 24.8s
8:	learn: 0.1913063	test: 0.3795720	best: 0.3864938 (6)	total: 566ms	remaining: 24.6s
9:	learn: 0.1910563	test: 0.3793868	best: 0.3864938 (6)	total: 626ms	remaining: 24.4s
10:	learn: 0.1910563	test: 0.3793868	best: 0.3864938 (6)	total: 693ms	remaining: 24.5s
11:	l

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Groupwise loss function. OneHotMaxSize set to 10
0:	learn: 0.1260586	test: 0.2496561	best: 0.2496561 (0)	total: 51.8ms	remaining: 20.7s
1:	learn: 0.1392590	test: 0.2764361	best: 0.2764361 (1)	total: 103ms	remaining: 20.5s
2:	learn: 0.1396886	test: 0.2767043	best: 0.2767043 (2)	total: 155ms	remaining: 20.5s
3:	learn: 0.1396886	test: 0.2767043	best: 0.2767043 (2)	total: 208ms	remaining: 20.6s
4:	learn: 0.1699526	test: 0.3379847	best: 0.3379847 (4)	total: 269ms	remaining: 21.2s
5:	learn: 0.1801540	test: 0.3579904	best: 0.3579904 (5)	total: 326ms	remaining: 21.4s
6:	learn: 0.1801540	test: 0.3579904	best: 0.3579904 (5)	total: 378ms	remaining: 21.2s
7:	learn: 0.1801540	test: 0.3579904	best: 0.3579904 (5)	total: 431ms	remaining: 21.1s
8:	learn: 0.1801540	test: 0.3579904	best: 0.3579904 (5)	total: 497ms	remaining: 21.6s
9:	learn: 0.1801579	test: 0.3579495	best: 0.3579904 (5)	total: 558ms	remaining: 21.8s
10:	learn: 0.1801540	test: 0.3579904	best: 0.3579904 (5)	total: 612ms	remaining: 21.6s
11:

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Groupwise loss function. OneHotMaxSize set to 10
0:	learn: 0.1398966	test: 0.2784459	best: 0.2784459 (0)	total: 56.6ms	remaining: 22.6s
1:	learn: 0.1460187	test: 0.2910870	best: 0.2910870 (1)	total: 109ms	remaining: 21.7s
2:	learn: 0.1563374	test: 0.3129158	best: 0.3129158 (2)	total: 166ms	remaining: 21.9s
3:	learn: 0.1564507	test: 0.3128408	best: 0.3129158 (2)	total: 221ms	remaining: 21.9s
4:	learn: 0.1560319	test: 0.3120540	best: 0.3129158 (2)	total: 288ms	remaining: 22.7s
5:	learn: 0.1625099	test: 0.3247013	best: 0.3247013 (5)	total: 353ms	remaining: 23.2s
6:	learn: 0.1625382	test: 0.3247637	best: 0.3247637 (6)	total: 407ms	remaining: 22.8s
7:	learn: 0.1627925	test: 0.3252925	best: 0.3252925 (7)	total: 592ms	remaining: 29s
8:	learn: 0.1672766	test: 0.3346280	best: 0.3346280 (8)	total: 677ms	remaining: 29.4s
9:	learn: 0.1673434	test: 0.3347009	best: 0.3347009 (9)	total: 761ms	remaining: 29.7s
10:	learn: 0.1673422	test: 0.3347009	best: 0.3347009 (9)	total: 822ms	remaining: 29.1s
11:	l

In [4]:
ranker.save_model("./cpu_ranking_candidate_model.cbm")

In [6]:
# Load the CatBoost ranker from the saved model file
ranker = CatBoostRanker()
ranker.load_model("./cpu_ranking_candidate_model.cbm")


<catboost.core.CatBoostRanker at 0x156c97bd0>

In [4]:
from metrics import hit_rate_at_k_per_experiment, mrr_at_k_per_experiment
scores = ranker.predict(test_pool)
preds = test_df.assign(
    PRED=scores
)[["EXPERIMENT_ID", "RECIPIENT_ID", "VARIATION_ID", "PRED"]]
y_true = test_df[
    ["EXPERIMENT_ID", "RECIPIENT_ID", "VARIATION_ID", "CLICK"]
].query("CLICK==1")

print("MRR@5 per experiment:", mrr_at_k_per_experiment(preds, y_true, 5)[0], ", the mean uplift is", mrr_at_k_per_experiment(preds, y_true, 5)[2])
print("Hit rate@1 per experiment:", hit_rate_at_k_per_experiment(preds, y_true, 1)[0], ", the mean uplift is", hit_rate_at_k_per_experiment(preds, y_true, 1)[1])

MRR@5 per experiment: 0.504505198191288 , the mean uplift is 3.0758106495702853
Hit rate@1 per experiment: 0.25018247986475955 , the mean uplift is 10.186593653741571
