In [1]:
import sys
import os 
sys.path.append(os.path.dirname(os.getcwd()))
%load_ext autoreload
%autoreload 2
import mlflow
import pandas as pd
import numpy as np
from catboost import CatBoostRanker
from sklearn.model_selection import ParameterGrid
from sklearn.model_selection import GroupKFold
import numpy as np
from metrics import bootstrap_mrr_at_k, mrr_at_k, hit_rate_at_k, mrr_at_k_per_experiment, hit_rate_at_k_per_experiment
from models import get_catboost_ranker, get_pooled_dataset
from settings import DATA_FOLDER
from notebooks.experiment_data import get_experiment_data, split_experiment_train_test_val_data
from features import FEATURES

In [2]:
data = get_experiment_data()

users_df size before removing small experiments: 501008 rows
users_df size after removing small experiments: 500953 rows


### Baseline Ranking Experiment: Model Training


In [3]:
mlflow.set_experiment("Ranking Candidate Model")

model_params = {
    'iterations': 400,
    'depth': 3,
    'learning_rate': 0.05,
    'l2_leaf_reg': 3,
    'random_seed': 42,
    'subsample': 0.6,
    'rsm': 1.0, 
    'random_strength': 1,
    'bagging_temperature': 0.25
}
feature_set = FEATURES.get_subset("creative_14")

with mlflow.start_run(run_name=f"ranking_baseline_experiment"):
    train_data, _, test_data = split_experiment_train_test_val_data(data, n_last_test=4, n_last_val=0)
    mlflow.log_param("feature_set", feature_set.name)

    n_splits = 5
    # Use "EXPERIMENT_ID" to group
    group_kfold = GroupKFold(n_splits=n_splits)
    groups = train_data[["EXPERIMENT_ID", "RECIPIENT_ID"]].apply(lambda x: f"{x['EXPERIMENT_ID']}_{x['RECIPIENT_ID']}", axis=1)

    cv_results = []
    for fold, (train_idx, val_idx) in enumerate(group_kfold.split(train_data, groups=groups)):
        fold_train_data = train_data.iloc[train_idx]
        fold_val_data = train_data.iloc[val_idx]

        # Prepare pools and datasets per fold
        train_df, train_pool, train_group_ids, X_train, y_train = get_pooled_dataset(
            fold_train_data, pos_neg_ratio=1, cols=feature_set.all, cat_cols=feature_set.categorical
        )
        val_df, val_pool, val_group_ids, X_val, y_val = get_pooled_dataset(
            fold_val_data, pos_neg_ratio=0, cols=feature_set.all, cat_cols=feature_set.categorical
        )
        cat_features = train_pool.get_cat_feature_indices()

        # Fit the model
        ranker = get_catboost_ranker(cat_features, model_params)
        ranker.fit(train_pool, eval_set=val_pool, use_best_model=True, plot=True)

        # Validation scoring
        scores = ranker.predict(X_val)
        preds = val_df.assign(
            PRED=scores
        )[["EXPERIMENT_ID", "RECIPIENT_ID", "VARIATION_ID", "PRED"]]
        y_true = val_df[
            ["EXPERIMENT_ID", "RECIPIENT_ID", "VARIATION_ID", "CLICK"]
        ].query("CLICK==1")

        mrr_at_5_catboost = ranker.eval_metrics(val_pool, metrics=['MRR'])
        mrr_at_5 = mrr_at_k(preds, y_true, 5, prefix=f"cvfold{fold}_")
        hit_rate_1 = hit_rate_at_k(preds, y_true, 1, prefix=f"cvfold{fold}_")
        cv_results.append(
            {
                "fold": fold,
                "mrr_at_5": mrr_at_5,
                "mrr_at_5_catboost": mrr_at_5_catboost,
                "hit_rate_1": hit_rate_1,
            }
        )
        print(cv_results)
    mrr_at_5_values = [fold_result["mrr_at_5"] for fold_result in cv_results]
    hit_rate_1_values = [fold_result["hit_rate_1"] for fold_result in cv_results]

    mean_mrr_at_5 = np.mean(mrr_at_5_values)
    std_mrr_at_5 = np.std(mrr_at_5_values)
    mean_hit_rate_1 = np.mean(hit_rate_1_values)
    std_hit_rate_1 = np.std(hit_rate_1_values)

    mlflow.log_metric("cv_mean_mrr_at_5", mean_mrr_at_5)
    mlflow.log_metric("cv_std_mrr_at_5", std_mrr_at_5)
    mlflow.log_metric("cv_mean_hit_rate_1", mean_hit_rate_1)
    mlflow.log_metric("cv_std_hit_rate_1", std_hit_rate_1)
    print("CV results:", cv_results)

    # Train on all data and predict on test data
    train_df, train_pool, train_group_ids, X_train, y_train = get_pooled_dataset(train_data, pos_neg_ratio=1, cols=feature_set.all, cat_cols=feature_set.categorical) 
    test_df, test_pool, test_group_ids, X_test, y_test = get_pooled_dataset(test_data, cols=feature_set.all, cat_cols=feature_set.categorical)
    
    cat_features = train_pool.get_cat_feature_indices()
    ranker = get_catboost_ranker(cat_features, model_params)
    ranker.fit(train_pool, eval_set=test_pool, use_best_model=True)

    scores = ranker.predict(X_test)
    preds = test_df.assign(PRED=scores)[["EXPERIMENT_ID", "RECIPIENT_ID", "VARIATION_ID", "PRED"]]
    y_true = test_df[["EXPERIMENT_ID", "RECIPIENT_ID", "VARIATION_ID", "CLICK"]].query("CLICK==1") 

    mrr_at_k_per_experiment(preds, y_true, 5, prefix="test_")
    hit_rate_at_k_per_experiment(preds, y_true, 1, prefix="test_")
    bootstrap_mrr_at_k(preds, y_true, 5, bootstrap_samples=100, random_state=42, prefix="test_")


MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Groupwise loss function. OneHotMaxSize set to 10
0:	learn: 0.1541221	test: 0.3063925	best: 0.3063925 (0)	total: 115ms	remaining: 45.8s
1:	learn: 0.1560198	test: 0.3102660	best: 0.3102660 (1)	total: 170ms	remaining: 33.8s
2:	learn: 0.1554621	test: 0.3092895	best: 0.3102660 (1)	total: 227ms	remaining: 30s
3:	learn: 0.1895795	test: 0.3752390	best: 0.3752390 (3)	total: 293ms	remaining: 29s
4:	learn: 0.1908206	test: 0.3765915	best: 0.3765915 (4)	total: 347ms	remaining: 27.4s
5:	learn: 0.1910961	test: 0.3772432	best: 0.3772432 (5)	total: 399ms	remaining: 26.2s
6:	learn: 0.1910411	test: 0.3771390	best: 0.3772432 (5)	total: 451ms	remaining: 25.3s
7:	learn: 0.1910403	test: 0.3771390	best: 0.3772432 (5)	total: 558ms	remaining: 27.3s
8:	learn: 0.1995273	test: 0.3930559	best: 0.3930559 (8)	total: 612ms	remaining: 26.6s
9:	learn: 0.1995273	test: 0.3930559	best: 0.3930559 (8)	total: 664ms	remaining: 25.9s
10:	learn: 0.1994723	test: 0.3929517	best: 0.3930559 (8)	total: 716ms	remaining: 25.3s
11:	lear

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Groupwise loss function. OneHotMaxSize set to 10
0:	learn: 0.1151909	test: 0.2295514	best: 0.2295514 (0)	total: 47.7ms	remaining: 19s
1:	learn: 0.1753512	test: 0.3484487	best: 0.3484487 (1)	total: 95.3ms	remaining: 19s
2:	learn: 0.1729942	test: 0.3434841	best: 0.3484487 (1)	total: 142ms	remaining: 18.8s
3:	learn: 0.1944993	test: 0.3904374	best: 0.3904374 (3)	total: 189ms	remaining: 18.7s
4:	learn: 0.2002334	test: 0.4016880	best: 0.4016880 (4)	total: 236ms	remaining: 18.7s
5:	learn: 0.2003327	test: 0.4004888	best: 0.4016880 (4)	total: 284ms	remaining: 18.6s
6:	learn: 0.2107465	test: 0.4198935	best: 0.4198935 (6)	total: 330ms	remaining: 18.5s
7:	learn: 0.2086725	test: 0.4145727	best: 0.4198935 (6)	total: 378ms	remaining: 18.5s
8:	learn: 0.2095268	test: 0.4162545	best: 0.4198935 (6)	total: 426ms	remaining: 18.5s
9:	learn: 0.2110295	test: 0.4192370	best: 0.4198935 (6)	total: 486ms	remaining: 19s
10:	learn: 0.2110705	test: 0.4193178	best: 0.4198935 (6)	total: 572ms	remaining: 20.2s
11:	lear

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Groupwise loss function. OneHotMaxSize set to 10
0:	learn: 0.1285182	test: 0.2534424	best: 0.2534424 (0)	total: 47.8ms	remaining: 19.1s
1:	learn: 0.1380222	test: 0.2720597	best: 0.2720597 (1)	total: 92.3ms	remaining: 18.4s
2:	learn: 0.1380222	test: 0.2720597	best: 0.2720597 (1)	total: 139ms	remaining: 18.4s
3:	learn: 0.1454382	test: 0.2862984	best: 0.2862984 (3)	total: 188ms	remaining: 18.6s
4:	learn: 0.1745927	test: 0.3419259	best: 0.3419259 (4)	total: 257ms	remaining: 20.3s
5:	learn: 0.1866269	test: 0.3647572	best: 0.3647572 (5)	total: 302ms	remaining: 19.8s
6:	learn: 0.1949377	test: 0.3801749	best: 0.3801749 (6)	total: 349ms	remaining: 19.6s
7:	learn: 0.1948230	test: 0.3798765	best: 0.3801749 (6)	total: 396ms	remaining: 19.4s
8:	learn: 0.1948230	test: 0.3798765	best: 0.3801749 (6)	total: 448ms	remaining: 19.5s
9:	learn: 0.1948230	test: 0.3798765	best: 0.3801749 (6)	total: 498ms	remaining: 19.4s
10:	learn: 0.2058684	test: 0.4005761	best: 0.4005761 (10)	total: 545ms	remaining: 19.3s
1

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Groupwise loss function. OneHotMaxSize set to 10
0:	learn: 0.1486819	test: 0.2954531	best: 0.2954531 (0)	total: 47.2ms	remaining: 18.8s
1:	learn: 0.1850511	test: 0.3688306	best: 0.3688306 (1)	total: 93.3ms	remaining: 18.6s
2:	learn: 0.1855904	test: 0.3698235	best: 0.3698235 (2)	total: 143ms	remaining: 19s
3:	learn: 0.1934264	test: 0.3855157	best: 0.3855157 (3)	total: 190ms	remaining: 18.8s
4:	learn: 0.2015534	test: 0.3991811	best: 0.3991811 (4)	total: 275ms	remaining: 21.8s
5:	learn: 0.2005169	test: 0.3985997	best: 0.3991811 (4)	total: 323ms	remaining: 21.2s
6:	learn: 0.2021384	test: 0.4010584	best: 0.4010584 (6)	total: 371ms	remaining: 20.8s
7:	learn: 0.2019740	test: 0.4018016	best: 0.4018016 (7)	total: 418ms	remaining: 20.5s
8:	learn: 0.2049459	test: 0.4067703	best: 0.4067703 (8)	total: 465ms	remaining: 20.2s
9:	learn: 0.2057133	test: 0.4077407	best: 0.4077407 (9)	total: 524ms	remaining: 20.4s
10:	learn: 0.2071480	test: 0.4108934	best: 0.4108934 (10)	total: 586ms	remaining: 20.7s
11:

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Groupwise loss function. OneHotMaxSize set to 10
0:	learn: 0.1686667	test: 0.3349361	best: 0.3349361 (0)	total: 47.2ms	remaining: 18.8s
1:	learn: 0.1876880	test: 0.3712751	best: 0.3712751 (1)	total: 93.2ms	remaining: 18.6s
2:	learn: 0.1993597	test: 0.3958495	best: 0.3958495 (2)	total: 140ms	remaining: 18.5s
3:	learn: 0.2103711	test: 0.4184422	best: 0.4184422 (3)	total: 188ms	remaining: 18.6s
4:	learn: 0.2103583	test: 0.4184318	best: 0.4184422 (3)	total: 241ms	remaining: 19.1s
5:	learn: 0.2124215	test: 0.4218621	best: 0.4218621 (5)	total: 292ms	remaining: 19.2s
6:	learn: 0.2199113	test: 0.4361829	best: 0.4361829 (6)	total: 339ms	remaining: 19s
7:	learn: 0.2204858	test: 0.4385017	best: 0.4385017 (7)	total: 387ms	remaining: 19s
8:	learn: 0.2225370	test: 0.4413014	best: 0.4413014 (8)	total: 435ms	remaining: 18.9s
9:	learn: 0.2239972	test: 0.4444153	best: 0.4444153 (9)	total: 484ms	remaining: 18.9s
10:	learn: 0.2241151	test: 0.4461679	best: 0.4461679 (10)	total: 574ms	remaining: 20.3s
11:	l

In [4]:
ranker.save_model("./candidate_production_model.cbm")

In [5]:
from metrics import hit_rate_at_k_per_experiment, mrr_at_k_per_experiment
scores = ranker.predict(test_pool)
preds = test_df.assign(
    PRED=scores
)[["EXPERIMENT_ID", "RECIPIENT_ID", "VARIATION_ID", "PRED"]]
y_true = test_df[
    ["EXPERIMENT_ID", "RECIPIENT_ID", "VARIATION_ID", "CLICK"]
].query("CLICK==1")

print("MRR@5 per experiment:", mrr_at_k_per_experiment(preds, y_true, 5)[0], ", the mean uplift is", mrr_at_k_per_experiment(preds, y_true, 5)[2])
print("Hit rate@1 per experiment:", hit_rate_at_k_per_experiment(preds, y_true, 1)[0], ", the mean uplift is", hit_rate_at_k_per_experiment(preds, y_true, 1)[1])

MRR@5 per experiment: 0.5407051009000517 , the mean uplift is 11.138961250373587
Hit rate@1 per experiment: 0.28869274803295786 , the mean uplift is 31.500204970892053
