In [4]:
import polars as pl
import optuna
from catboost import CatBoostRegressor
import numpy as np

from src.util.constants import DATA_PATH, META_MODEL_PERFORMANCE, FIXED_CB_PARAMETERS
from src.util.common import load_from_pickle, mean_grouped_spearman_correlation, save_as_pickle

In [5]:
selected_features = load_from_pickle(DATA_PATH / 'results/selected_features.pkl')
required_columns = ['era', 'target'] + selected_features

df_train_list = []
df_validate_list = []
for fold in range(2):
    df_train_fold = pl.read_parquet(f"{DATA_PATH}/folds/df_train_{fold}.parquet")
    df_validate_fold = pl.read_parquet(f"{DATA_PATH}/folds/df_validate_{fold}.parquet")

    df_train_fold = df_train_fold.select(required_columns)
    df_validate_fold = df_validate_fold.select(required_columns)

    df_train_list.append(df_train_fold)
    df_validate_list.append(df_validate_fold)
    del df_train_fold, df_validate_fold


df_meta_model = pl.read_parquet(f'{DATA_PATH}/folds/df_meta_model.parquet')
df_meta_model = df_meta_model.select(required_columns + ['numerai_meta_model'])

In [6]:
def performance_approximation(model: CatBoostRegressor, fold: int) -> tuple[float, float, float, float]:
    df_validate_fold = df_validate_list[fold]

    corr = mean_grouped_spearman_correlation(
        pl.Series(model.predict(df_validate_fold[selected_features].to_pandas())),
        df_validate_fold['target'],
        df_validate_fold['era']
    )

    df_mm_corr = pl.DataFrame({
        'prediction': pl.Series(model.predict(df_meta_model[selected_features].to_pandas())),
        'numerai_meta_model': df_meta_model['numerai_meta_model'],
        'era': df_meta_model['era']
    })
    corr_w_mm = df_mm_corr.select(
        pl.corr("prediction", "numerai_meta_model", method="spearman")
        .over('era', mapping_strategy='explode')
    ).mean().item()

    mmc_approximation = corr - corr_w_mm * META_MODEL_PERFORMANCE[fold]
    performance = .75 * corr + 2.25 * mmc_approximation

    return performance, corr, corr_w_mm, mmc_approximation

In [7]:
def objective(trial: optuna.trial.Trial) -> float:
    # we use these exponents to create a log scale that includes zero
    l2_leaf_reg_exponent = trial.suggest_int("l2_leaf_reg_exponent", 0, 10)
    parameter_sample = {
        "learning_rate": trial.suggest_float("learning_rate", .001, .2, log=True),
        "iterations": trial.suggest_int("iterations", 10, 5000),
        'border_count': trial.suggest_int('border_count', 2 ** 3 - 1, 2 ** 10 - 1, log=True),
        "depth": trial.suggest_int("depth", 2, 12),
        "subsample": trial.suggest_float("subsample", .1, 1.0),
        "rsm": trial.suggest_float("rsm", .1, 1.0),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 10, 500000, log=True),
        "l2_leaf_reg": (10 ** l2_leaf_reg_exponent - 1) / 10 ** 5,
    }

    parameters = {
        **FIXED_CB_PARAMETERS,
        **parameter_sample
    }

    result_performance = []

    for fold in range(2):
        model = CatBoostRegressor(**parameters)
        model.fit(
            X=df_train_list[fold][selected_features].to_pandas(),
            y=df_train_list[fold]['target'].to_pandas()
        )

        performance, corr, corr_w_mm, mmc_approximation = performance_approximation(model, fold)
        trial.set_user_attr(f"performance_{fold}", performance)
        trial.set_user_attr(f"correlation_{fold}", corr)
        trial.set_user_attr(f"correlation_with_meta_model_{fold}", corr_w_mm)
        trial.set_user_attr(f"mmc_approximation_{fold}", mmc_approximation)
        print(f'Results for fold {fold}: Performance {performance:.5f}, correlation: {corr:.5f}, correlation with meta model: {corr_w_mm:.5f}, MMC (approx.): {mmc_approximation:.5f}')

        # stop trial if it does not show promise (by default, median pruner is used)
        trial.report(performance, step=fold)
        if fold == 0 and trial.number > 10 and trial.should_prune():
            raise optuna.exceptions.TrialPruned()

        result_performance.append(performance)

    return np.mean(result_performance)

In [8]:
study = optuna.create_study(direction="maximize")

[I 2026-01-28 00:25:12,058] A new study created in memory with name: no-name-bc357e79-8d4b-4bfb-8b99-15d8c4a2a4bd


In [None]:
# cumulative performance 50 runs:
# best performance: 0.0479
# runtime: 27:25

In [15]:
study.optimize(objective, n_trials=19, show_progress_bar=True)

Best trial: 17. Best value: 0.0479147:   5%|▌         | 1/19 [22:25<6:43:41, 1345.63s/it]

Results for fold 0: Performance 0.04005, correlation: 0.02732, correlation with meta model: 0.42335, MMC (approx.): 0.00869
[I 2026-01-29 00:56:48,543] Trial 26 pruned. 


Best trial: 17. Best value: 0.0479147:  11%|█         | 2/19 [47:32<6:48:07, 1440.41s/it]

Results for fold 0: Performance 0.04398, correlation: 0.03411, correlation with meta model: 0.58943, MMC (approx.): 0.00818
[I 2026-01-29 01:21:55,306] Trial 27 pruned. 
Results for fold 0: Performance 0.05115, correlation: 0.03719, correlation with meta model: 0.61023, MMC (approx.): 0.01034


Best trial: 17. Best value: 0.0479147:  16%|█▌        | 3/19 [1:21:02<7:33:26, 1700.42s/it]

Results for fold 1: Performance 0.04003, correlation: 0.03077, correlation with meta model: 0.62809, MMC (approx.): 0.00753
[I 2026-01-29 01:55:25,140] Trial 28 finished with value: 0.04559124166054573 and parameters: {'l2_leaf_reg_exponent': 9, 'learning_rate': 0.035660559579216974, 'iterations': 4597, 'border_count': 176, 'depth': 7, 'subsample': 0.17793741041169583, 'rsm': 0.8501561275956984, 'min_data_in_leaf': 35}. Best is trial 17 with value: 0.04791472124516886.


Best trial: 17. Best value: 0.0479147:  21%|██        | 4/19 [1:30:44<5:14:43, 1258.91s/it]

Results for fold 0: Performance 0.03732, correlation: 0.02965, correlation with meta model: 0.52153, MMC (approx.): 0.00670
[I 2026-01-29 02:05:07,214] Trial 29 pruned. 


Best trial: 17. Best value: 0.0479147:  26%|██▋       | 5/19 [1:39:44<3:53:17, 999.83s/it] 

Results for fold 0: Performance 0.03282, correlation: 0.02967, correlation with meta model: 0.56757, MMC (approx.): 0.00470
[I 2026-01-29 02:14:07,657] Trial 30 pruned. 
Results for fold 0: Performance 0.04842, correlation: 0.03372, correlation with meta model: 0.53260, MMC (approx.): 0.01028


Best trial: 17. Best value: 0.0479147:  32%|███▏      | 6/19 [2:33:33<6:20:48, 1757.59s/it]

Results for fold 1: Performance 0.04090, correlation: 0.02945, correlation with meta model: 0.56980, MMC (approx.): 0.00836
[I 2026-01-29 03:07:56,194] Trial 31 finished with value: 0.04466163136206133 and parameters: {'l2_leaf_reg_exponent': 6, 'learning_rate': 0.03567987351619347, 'iterations': 4635, 'border_count': 345, 'depth': 7, 'subsample': 0.32036498874105857, 'rsm': 0.8138740536972658, 'min_data_in_leaf': 4344}. Best is trial 17 with value: 0.04791472124516886.
Results for fold 0: Performance 0.05237, correlation: 0.03562, correlation with meta model: 0.55032, MMC (approx.): 0.01140


Best trial: 17. Best value: 0.0479147:  37%|███▋      | 7/19 [2:50:34<5:03:22, 1516.91s/it]

Results for fold 1: Performance 0.03227, correlation: 0.02704, correlation with meta model: 0.58683, MMC (approx.): 0.00533
[I 2026-01-29 03:24:57,584] Trial 32 finished with value: 0.04232378554214124 and parameters: {'l2_leaf_reg_exponent': 3, 'learning_rate': 0.06718056135864918, 'iterations': 4018, 'border_count': 266, 'depth': 4, 'subsample': 0.1633336006978105, 'rsm': 0.6682414734903689, 'min_data_in_leaf': 105}. Best is trial 17 with value: 0.04791472124516886.
Results for fold 0: Performance 0.04957, correlation: 0.03557, correlation with meta model: 0.57713, MMC (approx.): 0.01017


Best trial: 17. Best value: 0.0479147:  42%|████▏     | 8/19 [3:47:40<6:29:29, 2124.51s/it]

Results for fold 1: Performance 0.04010, correlation: 0.03005, correlation with meta model: 0.60121, MMC (approx.): 0.00781
[I 2026-01-29 04:22:03,072] Trial 33 finished with value: 0.044835211951202 and parameters: {'l2_leaf_reg_exponent': 7, 'learning_rate': 0.024184631929954972, 'iterations': 4546, 'border_count': 181, 'depth': 5, 'subsample': 0.4636529321786548, 'rsm': 0.879172391084791, 'min_data_in_leaf': 21}. Best is trial 17 with value: 0.04791472124516886.
Results for fold 0: Performance 0.05100, correlation: 0.03585, correlation with meta model: 0.57113, MMC (approx.): 0.01072


Best trial: 17. Best value: 0.0479147:  47%|████▋     | 9/19 [4:13:45<5:24:57, 1949.79s/it]

Results for fold 1: Performance 0.04114, correlation: 0.03041, correlation with meta model: 0.60182, MMC (approx.): 0.00815
[I 2026-01-29 04:48:08,673] Trial 34 finished with value: 0.04606756232966254 and parameters: {'l2_leaf_reg_exponent': 9, 'learning_rate': 0.06333588050059274, 'iterations': 4248, 'border_count': 81, 'depth': 4, 'subsample': 0.24934556223840393, 'rsm': 0.7570803288013078, 'min_data_in_leaf': 82245}. Best is trial 17 with value: 0.04791472124516886.


Best trial: 17. Best value: 0.0479147:  53%|█████▎    | 10/19 [4:33:24<4:16:45, 1711.69s/it]

Results for fold 0: Performance 0.04763, correlation: 0.03398, correlation with meta model: 0.54852, MMC (approx.): 0.00984
[I 2026-01-29 05:07:47,219] Trial 35 pruned. 


Best trial: 17. Best value: 0.0479147:  58%|█████▊    | 11/19 [4:43:22<3:02:47, 1370.93s/it]

Results for fold 0: Performance 0.04629, correlation: 0.03571, correlation with meta model: 0.61458, MMC (approx.): 0.00867
[I 2026-01-29 05:17:45,512] Trial 36 pruned. 


Best trial: 17. Best value: 0.0479147:  63%|██████▎   | 12/19 [7:13:03<7:10:00, 3685.82s/it]

Results for fold 0: Performance 0.03579, correlation: 0.03085, correlation with meta model: 0.57319, MMC (approx.): 0.00562
[I 2026-01-29 07:47:25,950] Trial 37 pruned. 
Results for fold 0: Performance 0.05087, correlation: 0.03485, correlation with meta model: 0.54212, MMC (approx.): 0.01099


Best trial: 17. Best value: 0.0479147:  68%|██████▊   | 13/19 [7:51:38<5:27:04, 3270.83s/it]

Results for fold 1: Performance 0.03678, correlation: 0.02814, correlation with meta model: 0.57209, MMC (approx.): 0.00697
[I 2026-01-29 08:26:01,849] Trial 38 finished with value: 0.04382518164541431 and parameters: {'l2_leaf_reg_exponent': 4, 'learning_rate': 0.060635864887195114, 'iterations': 3599, 'border_count': 38, 'depth': 3, 'subsample': 0.5588913273325035, 'rsm': 0.9319608035509533, 'min_data_in_leaf': 201239}. Best is trial 17 with value: 0.04791472124516886.


Best trial: 17. Best value: 0.0479147:  74%|███████▎  | 14/19 [7:53:17<3:12:43, 2312.62s/it]

Results for fold 0: Performance 0.03529, correlation: 0.02959, correlation with meta model: 0.54034, MMC (approx.): 0.00582
[I 2026-01-29 08:27:40,331] Trial 39 pruned. 


Best trial: 17. Best value: 0.0479147:  79%|███████▉  | 15/19 [8:14:34<2:13:21, 2000.47s/it]

Results for fold 0: Performance 0.04341, correlation: 0.02957, correlation with meta model: 0.45739, MMC (approx.): 0.00944
[I 2026-01-29 08:48:57,373] Trial 40 pruned. 
Results for fold 0: Performance 0.05088, correlation: 0.03711, correlation with meta model: 0.61061, MMC (approx.): 0.01024


Best trial: 17. Best value: 0.0479147:  84%|████████▍ | 16/19 [8:52:25<1:44:05, 2081.92s/it]

Results for fold 1: Performance 0.03980, correlation: 0.03050, correlation with meta model: 0.62110, MMC (approx.): 0.00752
[I 2026-01-29 09:26:48,451] Trial 41 finished with value: 0.04534111737174789 and parameters: {'l2_leaf_reg_exponent': 10, 'learning_rate': 0.04469412876106178, 'iterations': 4829, 'border_count': 448, 'depth': 5, 'subsample': 0.29504541057174627, 'rsm': 0.6321199296234274, 'min_data_in_leaf': 15130}. Best is trial 17 with value: 0.04791472124516886.
Results for fold 0: Performance 0.05255, correlation: 0.03695, correlation with meta model: 0.58889, MMC (approx.): 0.01104


Best trial: 17. Best value: 0.0479147:  89%|████████▉ | 17/19 [9:11:08<59:46, 1793.43s/it]  

Results for fold 1: Performance 0.03678, correlation: 0.02902, correlation with meta model: 0.60400, MMC (approx.): 0.00667
[I 2026-01-29 09:45:30,971] Trial 42 finished with value: 0.04466589803117406 and parameters: {'l2_leaf_reg_exponent': 6, 'learning_rate': 0.02718706838146236, 'iterations': 4698, 'border_count': 275, 'depth': 4, 'subsample': 0.18572227113299933, 'rsm': 0.47729305680195605, 'min_data_in_leaf': 211562}. Best is trial 17 with value: 0.04791472124516886.


Best trial: 17. Best value: 0.0479147:  95%|█████████▍| 18/19 [9:23:14<24:32, 1472.79s/it]

Results for fold 0: Performance 0.04849, correlation: 0.03322, correlation with meta model: 0.51689, MMC (approx.): 0.01048
[I 2026-01-29 09:57:37,330] Trial 43 pruned. 


Best trial: 17. Best value: 0.0479147: 100%|██████████| 19/19 [9:36:21<00:00, 1820.08s/it]

Results for fold 0: Performance 0.04715, correlation: 0.03348, correlation with meta model: 0.53822, MMC (approx.): 0.00980
[I 2026-01-29 10:10:44,451] Trial 44 pruned. 





In [16]:
# visualise result
for param in study.best_params.keys():
    optuna.visualization.plot_slice(study, params=[param]).show()

In [17]:
save_as_pickle(study, DATA_PATH / 'results/study_cb.pkl')