# ðŸ§  Hull Tactical Market Prediction â€” AutoGluon Baseline

This notebook builds a baseline model for the [**Hull Tactical Market Prediction**](https://www.kaggle.com/competitions/hull-tactical-market-prediction) competition using **AutoGluon Tabular**. The goal is to predict trading positions that maximize a Sharpe-like performance metric.  

## Overview
- **Task:** Predict next-period trading positions (long / flat) using engineered financial features.
- **Approach:** Train an AutoGluon model on historical data to predict *forward returns*, then post-process those predictions into positions for scoring and submission.
- **Metric:** Custom approximation of the competitionâ€™s adjusted Sharpe ratio, which penalizes volatility and underperformance.
- **Post-processing:** A unified `post_process_signal()` function ensures parity between local validation and leaderboard logic by converting model predictions into bounded investment positions.

---

## Install Autogluon

In [1]:
from pathlib import Path
WHEELS = Path("/kaggle/input/autogluon-1-4-0-offline")  # <- your dataset

!pip install --no-index --quiet --find-links="{WHEELS}" \
  "torch==2.5.1" "torchvision==0.20.1" "torchaudio==2.5.1" "bitsandbytes>=0.46.1" "mlforecast==0.14.0" "optuna==4.3.0"

!pip install --no-index --quiet --find-links="{WHEELS}" \
    "autogluon.tabular"

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
libcugraph-cu12 25.6.0 requires libraft-cu12==25.6.*, but you have libraft-cu12 25.2.0 which is incompatible.
pylibcugraph-cu12 25.6.0 requires pylibraft-cu12==25.6.*, but you have pylibraft-cu12 25.2.0 which is incompatible.
pylibcugraph-cu12 25.6.0 requires rmm-cu12==25.6.*, but you have rmm-cu12 25.2.0 which is incompatible.[0m[31m
[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cesium 0.12.4 requires numpy<3.0,>=2.0, but you have numpy 1.26.4 which is incompatible.
umap-learn 0.5.9.post2 requires scikit-learn>=1.6, but you have scikit-learn 1.5.2 which is incompatible.[0m[31m
[0m

## Parameters and Config

In [2]:
# ============================================================
# Hull Tactical Kaggle â€” AutoGluon train/infer + organizer metric selection
# Copy/paste notebook cell(s)
# ============================================================

import os
from pathlib import Path
import numpy as np
import pandas as pd
import polars as pl

from autogluon.tabular import TabularPredictor

# -------------------------
# USER CONTROLS
# -------------------------
# notebook_mode:
#   "training"  -> fit model (and optionally tune postprocess on holdout), save under /kaggle/working
#   "inference" -> load model from Kaggle dataset input and only predict
notebook_mode = "training"
assert notebook_mode in ("training", "inference")

# approach:
#   "rmse_forward"  -> predict forward_returns (classic regression)
#   "rmse_excess"   -> predict excess returns: forward_returns - risk_free_rate (often aligns better with scorer)
#   "metric_tune"   -> still trains RMSE, but selects tau/alpha by maximizing organizer score on a holdout split
approach = "metric_tune"
assert approach in ("rmse_forward", "rmse_excess", "metric_tune")


In [3]:

# Where the competition data is
DATA_PATH = "/kaggle/input/hull-tactical-market-prediction/"

# Where a pre-trained AutoGluon model is stored (input dataset)
PRETRAINED_MODEL_DIR = Path("/kaggle/input/hull-tactical-autogluon-train-and-infer-tabular/AutogluonModels")

# Where to write models when training in this notebook
WORKING_MODEL_DIR = Path("/kaggle/working/AutogluonModels")

# Train settings (adjust)
AG_PRESET = "best_quality" 
TIME_LIMIT_SECS = 60 * 60 * 9

# Holdout split for metric_tune
HOLDOUT_FRAC = 0.2

# Postprocess (defaults; may be overwritten by metric tuning)
MIN_INVESTMENT = 0.0
MAX_INVESTMENT = 2.0
TAU_ABS_FOR_SCORER = 9.43717e-05
ALPHA_FOR_SCORER = 0.600132




In [4]:
# =========================
# ORGANIZER SCORER (as provided)
# =========================
import pandas.api.types

class ParticipantVisibleError(Exception):
    pass

def organizer_score(solution: pd.DataFrame, submission: pd.DataFrame, row_id_column_name: str) -> float:
    if not pandas.api.types.is_numeric_dtype(submission['prediction']):
        raise ParticipantVisibleError('Predictions must be numeric')

    solution = solution.copy()
    solution['position'] = submission['prediction']

    if solution['position'].max() > MAX_INVESTMENT:
        raise ParticipantVisibleError(f'Position of {solution["position"].max()} exceeds maximum of {MAX_INVESTMENT}')
    if solution['position'].min() < MIN_INVESTMENT:
        raise ParticipantVisibleError(f'Position of {solution["position"].min()} below minimum of {MIN_INVESTMENT}')

    solution['strategy_returns'] = solution['risk_free_rate'] * (1 - solution['position']) + solution['position'] * solution['forward_returns']

    # Calculate strategy's Sharpe ratio
    strategy_excess_returns = solution['strategy_returns'] - solution['risk_free_rate']
    strategy_excess_cumulative = (1 + strategy_excess_returns).prod()
    strategy_mean_excess_return = (strategy_excess_cumulative) ** (1 / len(solution)) - 1
    strategy_std = solution['strategy_returns'].std()

    trading_days_per_yr = 252
    if strategy_std == 0:
        raise ParticipantVisibleError('Division by zero, strategy std is zero')
    sharpe = strategy_mean_excess_return / strategy_std * np.sqrt(trading_days_per_yr)
    strategy_volatility = float(strategy_std * np.sqrt(trading_days_per_yr) * 100)

    # Calculate market return and volatility
    market_excess_returns = solution['forward_returns'] - solution['risk_free_rate']
    market_excess_cumulative = (1 + market_excess_returns).prod()
    market_mean_excess_return = (market_excess_cumulative) ** (1 / len(solution)) - 1
    market_std = solution['forward_returns'].std()

    market_volatility = float(market_std * np.sqrt(trading_days_per_yr) * 100)

    if market_volatility == 0:
        raise ParticipantVisibleError('Division by zero, market std is zero')

    # Calculate the volatility penalty
    excess_vol = max(0, strategy_volatility / market_volatility - 1.2) if market_volatility > 0 else 0
    vol_penalty = 1 + excess_vol

    # Calculate the return penalty
    return_gap = max(
        0,
        (market_mean_excess_return - strategy_mean_excess_return) * 100 * trading_days_per_yr,
    )
    return_penalty = 1 + (return_gap**2) / 100

    adjusted_sharpe = sharpe / (vol_penalty * return_penalty)
    return min(float(adjusted_sharpe), 1_000_000)


In [5]:

# =========================
# POST-PROCESS: raw prediction -> position in [0,2]
# =========================
def post_process_signal(y_pred,
                        *,
                        tau: float = TAU_ABS_FOR_SCORER,
                        alpha: float = ALPHA_FOR_SCORER,
                        min_investment: float = MIN_INVESTMENT,
                        max_investment: float = MAX_INVESTMENT):
    sig = np.asarray(y_pred, dtype=float).ravel()
    pos = np.where(sig > tau, alpha, 0.0)
    return np.clip(pos, min_investment, max_investment)

# =========================
# COLUMNS
# =========================
# Keep these for scorer / sanity.
NEEDED_FOR_SCORER = ["risk_free_rate", "forward_returns"]

# Non-feature columns to drop at inference & (optionally) training.
# NOTE: do NOT drop risk_free_rate in rmse_excess / metric_tune (it can be a useful feature).
DROP_ALWAYS = ["row_id", "id", "market_forward_excess_returns"]



In [6]:
# =========================
# LOAD TRAIN (only if training)
# =========================
train = None
target_col = None

if notebook_mode == "training":
    train = pd.read_csv(f"{DATA_PATH}train.csv")

    # Choose target based on approach
    if approach == "rmse_forward":
        target_col = "forward_returns"
    else:
        # rmse_excess or metric_tune
        train["excess_forward_returns"] = train["forward_returns"] - train["risk_free_rate"]
        target_col = "excess_forward_returns"

    # Basic checks
    for c in NEEDED_FOR_SCORER:
        if c not in train.columns:
            raise ValueError(f"Expected '{c}' in train.csv but not found")

    if target_col not in train.columns:
        raise ValueError(f"Expected target '{target_col}' in train.csv but not found")

    # Build training frame: drop obvious IDs/leaks; keep risk_free_rate
    use_cols = [c for c in train.columns if c not in DROP_ALWAYS]
    train = train[use_cols].copy()



In [7]:
# =========================
# TRAIN OR LOAD PREDICTOR
# =========================
predictor = None

if notebook_mode == "inference":
    predictor = TabularPredictor.load(str(PRETRAINED_MODEL_DIR))
    print(f"[inference] Loaded predictor from: {PRETRAINED_MODEL_DIR}")

else:
    predictor = TabularPredictor(
        label=target_col,
        eval_metric="rmse",
        problem_type="regression",
        path=str(WORKING_MODEL_DIR),
    )

    predictor.fit(
        train_data=train,
        presets=AG_PRESET,
        time_limit=TIME_LIMIT_SECS,
    )

    print(f"[training] Trained. Models saved to: {WORKING_MODEL_DIR}")

# Cache model feature list (works in both modes)
MODEL_FEATURES = predictor.feature_metadata.get_features()



Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.4.0
Python Version:     3.11.13
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Sat Sep 27 10:16:09 UTC 2025
CPU Count:          4
Memory Avail:       30.22 GB / 31.35 GB (96.4%)
Disk Space Avail:   19.50 GB / 19.52 GB (99.9%)
Presets specified: ['best_quality']
Using hyperparameters preset: hyperparameters='zeroshot'
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdout validation data is used to detect stacked overfitti

[1000]	valid_set's rmse: 0.00118659
[2000]	valid_set's rmse: 0.00115001
[3000]	valid_set's rmse: 0.00114062
[4000]	valid_set's rmse: 0.00113762
[5000]	valid_set's rmse: 0.00113557
[6000]	valid_set's rmse: 0.00113327
[7000]	valid_set's rmse: 0.00113209
[8000]	valid_set's rmse: 0.00113074
[9000]	valid_set's rmse: 0.00113007
[10000]	valid_set's rmse: 0.00112911
[1000]	valid_set's rmse: 0.00095888
[2000]	valid_set's rmse: 0.000933351
[3000]	valid_set's rmse: 0.000925804
[4000]	valid_set's rmse: 0.000923462
[5000]	valid_set's rmse: 0.000922295
[6000]	valid_set's rmse: 0.000921071
[7000]	valid_set's rmse: 0.000920562
[8000]	valid_set's rmse: 0.000920002
[9000]	valid_set's rmse: 0.000919423
[10000]	valid_set's rmse: 0.000919013
[1000]	valid_set's rmse: 0.00121781
[2000]	valid_set's rmse: 0.00118545
[3000]	valid_set's rmse: 0.00117852
[4000]	valid_set's rmse: 0.00117508
[5000]	valid_set's rmse: 0.00117246
[6000]	valid_set's rmse: 0.00117153
[7000]	valid_set's rmse: 0.00117021
[8000]	valid_set'

	-0.001	 = Validation score   (-root_mean_squared_error)
	451.19s	 = Training   runtime
	7.53s	 = Validation runtime
Fitting model: LightGBM_BAG_L1 ... Training model for up to 4924.39s of the 7623.26s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0002	 = Validation score   (-root_mean_squared_error)
	37.12s	 = Training   runtime
	0.1s	 = Validation runtime
Fitting model: RandomForestMSE_BAG_L1 ... Training model for up to 4886.98s of the 7585.85s of remaining time.
	-0.0001	 = Validation score   (-root_mean_squared_error)
	71.46s	 = Training   runtime
	0.56s	 = Validation runtime
Fitting model: CatBoost_BAG_L1 ... Training model for up to 4813.91s of the 7512.78s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0003	 = Validation score   (-root_mean_squared_error)
	1326.16s	 = Training   runtime
	0.1s	 

[1000]	valid_set's rmse: 0.000868629
[2000]	valid_set's rmse: 0.000868557
[1000]	valid_set's rmse: 0.000712893
[1000]	valid_set's rmse: 0.00085249
[2000]	valid_set's rmse: 0.000852354
[3000]	valid_set's rmse: 0.00085235
[1000]	valid_set's rmse: 0.000757289
[2000]	valid_set's rmse: 0.000757191
[3000]	valid_set's rmse: 0.000757188
[4000]	valid_set's rmse: 0.000757187
[1000]	valid_set's rmse: 0.000713541
[1000]	valid_set's rmse: 0.000702429
[1000]	valid_set's rmse: 0.000786294
[2000]	valid_set's rmse: 0.000786171
[3000]	valid_set's rmse: 0.000786161
[4000]	valid_set's rmse: 0.00078616
[5000]	valid_set's rmse: 0.00078616
[6000]	valid_set's rmse: 0.000786159
[7000]	valid_set's rmse: 0.000786159
[8000]	valid_set's rmse: 0.000786159
[9000]	valid_set's rmse: 0.000786159


	-0.0008	 = Validation score   (-root_mean_squared_error)
	679.44s	 = Training   runtime
	3.6s	 = Validation runtime
Fitting model: CatBoost_r177_BAG_L1 ... Training model for up to 2438.91s of the 5137.78s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0003	 = Validation score   (-root_mean_squared_error)
	1349.82s	 = Training   runtime
	0.1s	 = Validation runtime
Fitting model: NeuralNetTorch_r79_BAG_L1 ... Training model for up to 1088.67s of the 3787.54s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0005	 = Validation score   (-root_mean_squared_error)
	478.2s	 = Training   runtime
	0.3s	 = Validation runtime
Fitting model: LightGBM_r131_BAG_L1 ... Training model for up to 610.03s of the 3308.90s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrat

[1000]	valid_set's rmse: 0.000693256
[2000]	valid_set's rmse: 0.000678563
[3000]	valid_set's rmse: 0.000675979
[4000]	valid_set's rmse: 0.000675373
[5000]	valid_set's rmse: 0.000675104
[6000]	valid_set's rmse: 0.000674958


	Ran out of time, early stopping on iteration 6699. Best iteration is:
	[6699]	valid_set's rmse: 0.000674861


[1000]	valid_set's rmse: 0.000563413
[2000]	valid_set's rmse: 0.00055241
[3000]	valid_set's rmse: 0.000550458
[4000]	valid_set's rmse: 0.000549763
[5000]	valid_set's rmse: 0.00054963
[6000]	valid_set's rmse: 0.000549503


	Ran out of time, early stopping on iteration 6748. Best iteration is:
	[6746]	valid_set's rmse: 0.000549434


[1000]	valid_set's rmse: 0.00065913
[2000]	valid_set's rmse: 0.000647169
[3000]	valid_set's rmse: 0.000644927
[4000]	valid_set's rmse: 0.000643476
[5000]	valid_set's rmse: 0.000642855
[6000]	valid_set's rmse: 0.00064249
[7000]	valid_set's rmse: 0.00064226


	Ran out of time, early stopping on iteration 7191. Best iteration is:
	[7109]	valid_set's rmse: 0.000642232


[1000]	valid_set's rmse: 0.000596727
[2000]	valid_set's rmse: 0.000583161
[3000]	valid_set's rmse: 0.000581157
[4000]	valid_set's rmse: 0.000580366
[5000]	valid_set's rmse: 0.000579984
[6000]	valid_set's rmse: 0.000579837
[7000]	valid_set's rmse: 0.000579743


	Ran out of time, early stopping on iteration 7261. Best iteration is:
	[6891]	valid_set's rmse: 0.000579742


[1000]	valid_set's rmse: 0.000609801
[2000]	valid_set's rmse: 0.000600506
[3000]	valid_set's rmse: 0.000598656
[4000]	valid_set's rmse: 0.000598008
[5000]	valid_set's rmse: 0.000597872
[6000]	valid_set's rmse: 0.000597817
[7000]	valid_set's rmse: 0.000597799


	Ran out of time, early stopping on iteration 7695. Best iteration is:
	[7587]	valid_set's rmse: 0.000597769


[1000]	valid_set's rmse: 0.000621326
[2000]	valid_set's rmse: 0.000610362
[3000]	valid_set's rmse: 0.000608336
[4000]	valid_set's rmse: 0.000607794
[5000]	valid_set's rmse: 0.000607443
[6000]	valid_set's rmse: 0.000607298
[7000]	valid_set's rmse: 0.000607359
[1000]	valid_set's rmse: 0.00059595
[2000]	valid_set's rmse: 0.000583645
[3000]	valid_set's rmse: 0.00058097
[4000]	valid_set's rmse: 0.000580316
[5000]	valid_set's rmse: 0.000580239
[6000]	valid_set's rmse: 0.000580167
[7000]	valid_set's rmse: 0.000580158
[8000]	valid_set's rmse: 0.000580103
[9000]	valid_set's rmse: 0.000580075


	Ran out of time, early stopping on iteration 9327. Best iteration is:
	[9323]	valid_set's rmse: 0.000580066


[1000]	valid_set's rmse: 0.000657362
[2000]	valid_set's rmse: 0.000640253
[3000]	valid_set's rmse: 0.000638031
[4000]	valid_set's rmse: 0.000636936
[5000]	valid_set's rmse: 0.00063671
[6000]	valid_set's rmse: 0.000636592
[7000]	valid_set's rmse: 0.000636449
[8000]	valid_set's rmse: 0.000636387
[9000]	valid_set's rmse: 0.000636317
[10000]	valid_set's rmse: 0.000636287


	-0.0006	 = Validation score   (-root_mean_squared_error)
	566.17s	 = Training   runtime
	9.57s	 = Validation runtime
Fitting model: NeuralNetFastAI_r191_BAG_L1 ... Training model for up to 24.48s of the 2723.35s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	Ran out of time, stopping training early. (Stopping on epoch 2)
	Ran out of time, stopping training early. (Stopping on epoch 3)
	Ran out of time, stopping training early. (Stopping on epoch 3)
	Ran out of time, stopping training early. (Stopping on epoch 3)
	Ran out of time, stopping training early. (Stopping on epoch 3)
	Ran out of time, stopping training early. (Stopping on epoch 4)
	Ran out of time, stopping training early. (Stopping on epoch 5)
	Ran out of time, stopping training early. (Stopping on epoch 6)
	-0.0016	 = Validation score   (-root_mean_squared_error)
	22.37s	 = Training   runtime
	0.45s	 = Validation runtime
Fitting model:

[1000]	valid_set's rmse: 0.0004426
[2000]	valid_set's rmse: 0.000437558
[3000]	valid_set's rmse: 0.000436099
[4000]	valid_set's rmse: 0.000435771
[5000]	valid_set's rmse: 0.000435582
[6000]	valid_set's rmse: 0.000435522
[7000]	valid_set's rmse: 0.00043547
[8000]	valid_set's rmse: 0.000435453
[9000]	valid_set's rmse: 0.000435448
[10000]	valid_set's rmse: 0.000435422
[1000]	valid_set's rmse: 0.000431792
[2000]	valid_set's rmse: 0.000428384
[3000]	valid_set's rmse: 0.000427214
[4000]	valid_set's rmse: 0.000426819
[5000]	valid_set's rmse: 0.000426739
[6000]	valid_set's rmse: 0.000426689
[7000]	valid_set's rmse: 0.000426669
[1000]	valid_set's rmse: 0.000462988
[2000]	valid_set's rmse: 0.000458236
[3000]	valid_set's rmse: 0.000457142
[4000]	valid_set's rmse: 0.000456629
[5000]	valid_set's rmse: 0.000456388
[6000]	valid_set's rmse: 0.000456316
[7000]	valid_set's rmse: 0.000456212
[8000]	valid_set's rmse: 0.000456184
[9000]	valid_set's rmse: 0.000456128
[10000]	valid_set's rmse: 0.000456094
[1

	-0.0005	 = Validation score   (-root_mean_squared_error)
	417.54s	 = Training   runtime
	7.23s	 = Validation runtime
Fitting model: LightGBM_BAG_L2 ... Training model for up to 2268.05s of the 2268.01s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0002	 = Validation score   (-root_mean_squared_error)
	39.15s	 = Training   runtime
	0.08s	 = Validation runtime
Fitting model: RandomForestMSE_BAG_L2 ... Training model for up to 2228.65s of the 2228.61s of remaining time.
	-0.0001	 = Validation score   (-root_mean_squared_error)
	75.47s	 = Training   runtime
	0.63s	 = Validation runtime
Fitting model: CatBoost_BAG_L2 ... Training model for up to 2151.65s of the 2151.61s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	Ran out of time, early stopping on iteration 9090.
	Ran out of time, early stopping on iterati

[1000]	valid_set's rmse: 0.00105389
[2000]	valid_set's rmse: 0.00102728
[3000]	valid_set's rmse: 0.0010212
[4000]	valid_set's rmse: 0.00101821
[5000]	valid_set's rmse: 0.00101646
[6000]	valid_set's rmse: 0.00101543
[7000]	valid_set's rmse: 0.00101484
[8000]	valid_set's rmse: 0.00101432
[9000]	valid_set's rmse: 0.00101393
[10000]	valid_set's rmse: 0.00101365
[1000]	valid_set's rmse: 0.00107752
[2000]	valid_set's rmse: 0.00105513
[3000]	valid_set's rmse: 0.00105038
[4000]	valid_set's rmse: 0.00104793
[5000]	valid_set's rmse: 0.00104684
[6000]	valid_set's rmse: 0.0010463
[7000]	valid_set's rmse: 0.00104578
[8000]	valid_set's rmse: 0.00104538
[9000]	valid_set's rmse: 0.00104509
[10000]	valid_set's rmse: 0.00104488
[1000]	valid_set's rmse: 0.000875609
[2000]	valid_set's rmse: 0.000852668
[3000]	valid_set's rmse: 0.000846628
[4000]	valid_set's rmse: 0.000843965
[5000]	valid_set's rmse: 0.000842528
[6000]	valid_set's rmse: 0.000841874
[7000]	valid_set's rmse: 0.000841513
[8000]	valid_set's rm

	-0.001	 = Validation score   (-root_mean_squared_error)
	463.21s	 = Training   runtime
	9.45s	 = Validation runtime
Fitting model: LightGBM_BAG_L1 ... Training model for up to 15676.44s of the 23760.55s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0002	 = Validation score   (-root_mean_squared_error)
	35.89s	 = Training   runtime
	0.07s	 = Validation runtime
Fitting model: RandomForestMSE_BAG_L1 ... Training model for up to 15640.33s of the 23724.44s of remaining time.
	-0.0001	 = Validation score   (-root_mean_squared_error)
	75.22s	 = Training   runtime
	0.76s	 = Validation runtime
Fitting model: CatBoost_BAG_L1 ... Training model for up to 15563.91s of the 23648.01s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0003	 = Validation score   (-root_mean_squared_error)
	1117.02s	 = Training   runtime


[1000]	valid_set's rmse: 0.000645287
[2000]	valid_set's rmse: 0.000645161
[3000]	valid_set's rmse: 0.000645145
[4000]	valid_set's rmse: 0.000645143
[5000]	valid_set's rmse: 0.000645143
[6000]	valid_set's rmse: 0.000645143
[1000]	valid_set's rmse: 0.000768768
[2000]	valid_set's rmse: 0.000768538
[3000]	valid_set's rmse: 0.0007685
[4000]	valid_set's rmse: 0.000768495
[5000]	valid_set's rmse: 0.000768494
[6000]	valid_set's rmse: 0.000768493
[7000]	valid_set's rmse: 0.000768493
[8000]	valid_set's rmse: 0.000768493
[9000]	valid_set's rmse: 0.000768493
[10000]	valid_set's rmse: 0.000768493
[1000]	valid_set's rmse: 0.000696284
[2000]	valid_set's rmse: 0.000696215
[1000]	valid_set's rmse: 0.000665043
[2000]	valid_set's rmse: 0.000664987
[3000]	valid_set's rmse: 0.000664978
[4000]	valid_set's rmse: 0.000664976
[5000]	valid_set's rmse: 0.000664976
[1000]	valid_set's rmse: 0.000795718
[2000]	valid_set's rmse: 0.000795527
[3000]	valid_set's rmse: 0.000795518
[1000]	valid_set's rmse: 0.000665297
[1

	-0.0007	 = Validation score   (-root_mean_squared_error)
	967.52s	 = Training   runtime
	5.86s	 = Validation runtime
Fitting model: CatBoost_r177_BAG_L1 ... Training model for up to 13076.74s of the 21160.84s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0003	 = Validation score   (-root_mean_squared_error)
	1150.63s	 = Training   runtime
	0.11s	 = Validation runtime
Fitting model: NeuralNetTorch_r79_BAG_L1 ... Training model for up to 11925.62s of the 20009.72s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0005	 = Validation score   (-root_mean_squared_error)
	546.47s	 = Training   runtime
	0.28s	 = Validation runtime
Fitting model: LightGBM_r131_BAG_L1 ... Training model for up to 11378.73s of the 19462.83s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldF

[1000]	valid_set's rmse: 0.000576831
[2000]	valid_set's rmse: 0.000563191
[3000]	valid_set's rmse: 0.000561064
[4000]	valid_set's rmse: 0.000560126
[5000]	valid_set's rmse: 0.000559803
[6000]	valid_set's rmse: 0.000559655
[7000]	valid_set's rmse: 0.000559632
[8000]	valid_set's rmse: 0.000559568
[9000]	valid_set's rmse: 0.000559547
[10000]	valid_set's rmse: 0.000559532
[1000]	valid_set's rmse: 0.000665502
[2000]	valid_set's rmse: 0.000652629
[3000]	valid_set's rmse: 0.000650605
[4000]	valid_set's rmse: 0.000649836
[5000]	valid_set's rmse: 0.000649625
[6000]	valid_set's rmse: 0.00064942
[7000]	valid_set's rmse: 0.000649224
[8000]	valid_set's rmse: 0.000649108
[9000]	valid_set's rmse: 0.000649057
[10000]	valid_set's rmse: 0.000649079
[1000]	valid_set's rmse: 0.000569274
[2000]	valid_set's rmse: 0.000560051
[3000]	valid_set's rmse: 0.000557969
[4000]	valid_set's rmse: 0.000557062
[5000]	valid_set's rmse: 0.000556766
[6000]	valid_set's rmse: 0.000556548
[7000]	valid_set's rmse: 0.000556424


	-0.0006	 = Validation score   (-root_mean_squared_error)
	723.09s	 = Training   runtime
	16.46s	 = Validation runtime
Fitting model: NeuralNetFastAI_r191_BAG_L1 ... Training model for up to 10626.68s of the 18710.78s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0002	 = Validation score   (-root_mean_squared_error)
	186.54s	 = Training   runtime
	0.45s	 = Validation runtime
Fitting model: CatBoost_r9_BAG_L1 ... Training model for up to 10439.43s of the 18523.54s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0005	 = Validation score   (-root_mean_squared_error)
	4951.35s	 = Training   runtime
	0.68s	 = Validation runtime
Fitting model: LightGBM_r96_BAG_L1 ... Training model for up to 5486.55s of the 13570.65s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFi

[1000]	valid_set's rmse: 0.00137346
[2000]	valid_set's rmse: 0.0010312
[3000]	valid_set's rmse: 0.000917925
[4000]	valid_set's rmse: 0.000849801
[5000]	valid_set's rmse: 0.000813845
[6000]	valid_set's rmse: 0.00079453
[7000]	valid_set's rmse: 0.000780129
[8000]	valid_set's rmse: 0.000768153
[9000]	valid_set's rmse: 0.000760125
[10000]	valid_set's rmse: 0.000752406
[1000]	valid_set's rmse: 0.00147735
[2000]	valid_set's rmse: 0.00109706
[3000]	valid_set's rmse: 0.000969694
[4000]	valid_set's rmse: 0.000904562
[5000]	valid_set's rmse: 0.000872266
[6000]	valid_set's rmse: 0.00085528
[7000]	valid_set's rmse: 0.000838242
[8000]	valid_set's rmse: 0.000826883
[9000]	valid_set's rmse: 0.000816142
[10000]	valid_set's rmse: 0.000809636
[1000]	valid_set's rmse: 0.00132206
[2000]	valid_set's rmse: 0.000987765
[3000]	valid_set's rmse: 0.000863283
[4000]	valid_set's rmse: 0.000798426
[5000]	valid_set's rmse: 0.000765419
[6000]	valid_set's rmse: 0.000738716
[7000]	valid_set's rmse: 0.000724714
[8000]	

	-0.0008	 = Validation score   (-root_mean_squared_error)
	211.48s	 = Training   runtime
	6.73s	 = Validation runtime
Fitting model: NeuralNetTorch_r22_BAG_L1 ... Training model for up to 5264.25s of the 13348.35s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0003	 = Validation score   (-root_mean_squared_error)
	1032.07s	 = Training   runtime
	0.32s	 = Validation runtime
Fitting model: XGBoost_r33_BAG_L1 ... Training model for up to 4231.71s of the 12315.81s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0009	 = Validation score   (-root_mean_squared_error)
	133.77s	 = Training   runtime
	0.26s	 = Validation runtime
Fitting model: ExtraTrees_r42_BAG_L1 ... Training model for up to 4097.40s of the 12181.50s of remaining time.
	-0.0002	 = Validation score   (-root_mean_squared_error)
	14.67s	 = Training

[1000]	valid_set's rmse: 0.0022461


	Ran out of time, early stopping on iteration 1079. Best iteration is:
	[1079]	valid_set's rmse: 0.00224433
	-0.0022	 = Validation score   (-root_mean_squared_error)
	121.67s	 = Training   runtime
	0.9s	 = Validation runtime
Fitting model: NeuralNetFastAI_r145_BAG_L1 ... Training model for up to 3.96s of the 8088.07s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	Time limit exceeded... Skipping NeuralNetFastAI_r145_BAG_L1.
Fitting model: XGBoost_r89_BAG_L1 ... Training model for up to 3.31s of the 8087.42s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0042	 = Validation score   (-root_mean_squared_error)
	3.07s	 = Training   runtime
	0.07s	 = Validation runtime
Fitting model: NeuralNetTorch_r30_BAG_L1 ... Training model for up to 0.08s of the 8084.18s of remaining time.
	Fitting 8 child models (S1F1 - S1F

[1000]	valid_set's rmse: 0.000551365
[2000]	valid_set's rmse: 0.000548355
[3000]	valid_set's rmse: 0.000547549
[4000]	valid_set's rmse: 0.000547257
[5000]	valid_set's rmse: 0.00054712
[6000]	valid_set's rmse: 0.000547086
[7000]	valid_set's rmse: 0.000547063
[8000]	valid_set's rmse: 0.000547067
[1000]	valid_set's rmse: 0.000475845
[2000]	valid_set's rmse: 0.00047252
[3000]	valid_set's rmse: 0.00047195
[1000]	valid_set's rmse: 0.000400409
[2000]	valid_set's rmse: 0.00039913
[1000]	valid_set's rmse: 0.000445684
[2000]	valid_set's rmse: 0.000443598
[3000]	valid_set's rmse: 0.000443138
[4000]	valid_set's rmse: 0.000442825
[5000]	valid_set's rmse: 0.000442521
[6000]	valid_set's rmse: 0.000442386
[7000]	valid_set's rmse: 0.000442283
[8000]	valid_set's rmse: 0.000442262
[9000]	valid_set's rmse: 0.000442226
[10000]	valid_set's rmse: 0.000442212
[1000]	valid_set's rmse: 0.00046799
[2000]	valid_set's rmse: 0.000463215
[3000]	valid_set's rmse: 0.000462142
[4000]	valid_set's rmse: 0.00046161
[5000]

	-0.0004	 = Validation score   (-root_mean_squared_error)
	404.69s	 = Training   runtime
	6.81s	 = Validation runtime
Fitting model: LightGBM_BAG_L2 ... Training model for up to 7666.72s of the 7666.66s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0001	 = Validation score   (-root_mean_squared_error)
	47.7s	 = Training   runtime
	0.1s	 = Validation runtime
Fitting model: RandomForestMSE_BAG_L2 ... Training model for up to 7618.72s of the 7618.66s of remaining time.
	-0.0	 = Validation score   (-root_mean_squared_error)
	95.17s	 = Training   runtime
	0.73s	 = Validation runtime
Fitting model: CatBoost_BAG_L2 ... Training model for up to 7521.69s of the 7521.63s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0002	 = Validation score   (-root_mean_squared_error)
	2149.95s	 = Training   runtime
	0.17s	 = 

[1000]	valid_set's rmse: 0.000118166


	-0.0001	 = Validation score   (-root_mean_squared_error)
	215.39s	 = Training   runtime
	0.31s	 = Validation runtime
Fitting model: CatBoost_r177_BAG_L2 ... Training model for up to 4924.89s of the 4924.84s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0002	 = Validation score   (-root_mean_squared_error)
	2183.62s	 = Training   runtime
	0.17s	 = Validation runtime
Fitting model: NeuralNetTorch_r79_BAG_L2 ... Training model for up to 2740.56s of the 2740.50s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0004	 = Validation score   (-root_mean_squared_error)
	847.6s	 = Training   runtime
	0.3s	 = Validation runtime
Fitting model: LightGBM_r131_BAG_L2 ... Training model for up to 1892.47s of the 1892.41s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingSt

[1000]	valid_set's rmse: 0.00014905
[1000]	valid_set's rmse: 0.000147835
[1000]	valid_set's rmse: 0.000137003
[1000]	valid_set's rmse: 0.00014207
[1000]	valid_set's rmse: 0.000139355
[1000]	valid_set's rmse: 0.00013696


	-0.0001	 = Validation score   (-root_mean_squared_error)
	133.09s	 = Training   runtime
	0.67s	 = Validation runtime
Fitting model: NeuralNetFastAI_r191_BAG_L2 ... Training model for up to 1757.66s of the 1757.60s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0002	 = Validation score   (-root_mean_squared_error)
	208.04s	 = Training   runtime
	0.5s	 = Validation runtime
Fitting model: CatBoost_r9_BAG_L2 ... Training model for up to 1548.81s of the 1548.75s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	Ran out of time, early stopping on iteration 1159.
	Ran out of time, early stopping on iteration 1247.
	Ran out of time, early stopping on iteration 1368.
	Ran out of time, early stopping on iteration 1300.
	Ran out of time, early stopping on iteration 1433.
	Ran out of time, early stopping on iteration 15

[1000]	valid_set's rmse: 0.000431475


	Ran out of time, early stopping on iteration 1496. Best iteration is:
	[1493]	valid_set's rmse: 0.000396571


[1000]	valid_set's rmse: 0.000400791
[2000]	valid_set's rmse: 0.000342385


	Ran out of time, early stopping on iteration 2233. Best iteration is:
	[2233]	valid_set's rmse: 0.000335845


[1000]	valid_set's rmse: 0.000360171
[2000]	valid_set's rmse: 0.000319192


	Ran out of time, early stopping on iteration 2369. Best iteration is:
	[2367]	valid_set's rmse: 0.000313368


[1000]	valid_set's rmse: 0.000347871
[2000]	valid_set's rmse: 0.000294982


	Ran out of time, early stopping on iteration 2394. Best iteration is:
	[2394]	valid_set's rmse: 0.000287659


[1000]	valid_set's rmse: 0.000372543
[2000]	valid_set's rmse: 0.000309275


	Ran out of time, early stopping on iteration 2424. Best iteration is:
	[2424]	valid_set's rmse: 0.000300619


[1000]	valid_set's rmse: 0.000351694
[2000]	valid_set's rmse: 0.000319988


	Ran out of time, early stopping on iteration 2647. Best iteration is:
	[2638]	valid_set's rmse: 0.000313


[1000]	valid_set's rmse: 0.000293168
[2000]	valid_set's rmse: 0.000264035


	Ran out of time, early stopping on iteration 2856. Best iteration is:
	[2845]	valid_set's rmse: 0.000257909


[1000]	valid_set's rmse: 0.000356835
[2000]	valid_set's rmse: 0.000312417
[3000]	valid_set's rmse: 0.000301124


	Ran out of time, early stopping on iteration 3322. Best iteration is:
	[3306]	valid_set's rmse: 0.000299968
	-0.0003	 = Validation score   (-root_mean_squared_error)
	58.3s	 = Training   runtime
	1.5s	 = Validation runtime
Fitting model: WeightedEnsemble_L3 ... Training model for up to 808.36s of the 1.40s of remaining time.
	Ensemble Weights: {'ExtraTreesMSE_BAG_L2': 0.478, 'ExtraTreesMSE_BAG_L1': 0.304, 'RandomForestMSE_BAG_L1': 0.087, 'RandomForestMSE_BAG_L2': 0.087, 'NeuralNetFastAI_r191_BAG_L1': 0.043}
	-0.0	 = Validation score   (-root_mean_squared_error)
	0.05s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 24239.54s ... Best model: WeightedEnsemble_L3 | Estimated inference throughput: 44.0 rows/s (1131 batch size)
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("/kaggle/working/AutogluonModels")


[training] Trained. Models saved to: /kaggle/working/AutogluonModels


In [8]:
# =========================
# OPTIONAL: Tune tau/alpha using organizer metric on a holdout
# (Only when training + approach == metric_tune)
# =========================
if notebook_mode == "training" and approach == "metric_tune":
    n = len(train)
    cut = int(n * (1.0 - HOLDOUT_FRAC))
    if cut <= 0 or cut >= n:
        raise ValueError("Bad HOLDOUT_FRAC; leads to empty train or empty holdout.")

    train_tr = train.iloc[:cut].copy()
    train_va = train.iloc[cut:].copy()

    # Refit quickly on the train_tr subset? (optional)
    # For simplicity, we keep the trained predictor and just tune postprocess on the holdout portion.
    # If you want strict separation, train predictor on train_tr from the start.

    # Build solution df for scorer (must contain forward_returns and risk_free_rate)
    # Note: train_va still has original forward_returns and risk_free_rate because we kept them
    solution = train_va[NEEDED_FOR_SCORER].copy()

    # Build X_va: drop label column only; keep other columns
    X_va = train_va.drop(columns=[predictor.label], errors="ignore")

    raw = predictor.predict(X_va).to_numpy()

    taus = np.logspace(-7, -3, 25)
    alphas = np.linspace(0.05, 2.0, 40)

    best_score = -np.inf
    best_tau = TAU_ABS_FOR_SCORER
    best_alpha = ALPHA_FOR_SCORER

    # Make a fresh copy each loop because organizer_score mutates solution
    for tau in taus:
        for alpha in alphas:
            pos = post_process_signal(raw, tau=tau, alpha=alpha)
            sub = pd.DataFrame({"prediction": pos})
            try:
                s = organizer_score(solution.copy(), sub, row_id_column_name="row_id")
            except ParticipantVisibleError:
                continue
            if s > best_score:
                best_score = s
                best_tau = float(tau)
                best_alpha = float(alpha)

    TAU_ABS_FOR_SCORER = best_tau
    ALPHA_FOR_SCORER = best_alpha

    print(f"[metric_tune] Best holdout organizer metric: {best_score:.6f}")
    print(f"[metric_tune] Using tau={TAU_ABS_FOR_SCORER:.6g}, alpha={ALPHA_FOR_SCORER:.6g}")



[metric_tune] Best holdout organizer metric: -inf
[metric_tune] Using tau=9.43717e-05, alpha=0.600132


In [9]:
# =========================
# PREDICT FUNCTION FOR KAGGLE EVAL SERVER
# =========================
def predict(test: pl.DataFrame) -> float:
    """Return a single post-processed position for a single-row Polars DataFrame."""
    if not isinstance(test, pl.DataFrame):
        raise TypeError("predict(test): expected a Polars DataFrame input")
    if test.height != 1:
        raise ValueError(f"predict(test): expected a single-row Polars DataFrame, got {test.height} rows")

    # Drop always-drop columns if present
    drop_cols = [c for c in DROP_ALWAYS if c in test.columns]
    test_pl = test.drop(drop_cols) if drop_cols else test

    # Ensure label is not present
    if predictor.label in test_pl.columns:
        test_pl = test_pl.drop(predictor.label)

    # Polars -> Pandas
    test_pd = test_pl.to_pandas()

    # Align columns to model features (drops extras, fills missing with 0)
    test_pd = test_pd.reindex(columns=MODEL_FEATURES, fill_value=0)

    raw = predictor.predict(test_pd)
    pos = post_process_signal(
        raw,
        tau=TAU_ABS_FOR_SCORER,
        alpha=ALPHA_FOR_SCORER,
        min_investment=MIN_INVESTMENT,
        max_investment=MAX_INVESTMENT,
    )
    return float(np.asarray(pos).ravel()[0])



In [10]:
import kaggle_evaluation.default_inference_server as kis
import os

# ---------- KAGGLE SERVER BOOTSTRAP ----------
inference_server = kis.DefaultInferenceServer(predict)

if os.getenv("KAGGLE_IS_COMPETITION_RERUN"):
    inference_server.serve()
else:
    inference_server.run_local_gateway(('/kaggle/input/hull-tactical-market-prediction/',))