# ðŸ§  Hull Tactical Market Prediction â€” AutoGluon Baseline

This notebook builds a baseline model for the [**Hull Tactical Market Prediction**](https://www.kaggle.com/competitions/hull-tactical-market-prediction) competition using **AutoGluon Tabular**. The goal is to predict trading positions that maximize a Sharpe-like performance metric.  

## Overview
- **Task:** Predict next-period trading positions (long / flat) using engineered financial features.
- **Approach:** Train an AutoGluon model on historical data to predict *forward returns*, then post-process those predictions into positions for scoring and submission.
- **Metric:** Custom approximation of the competitionâ€™s adjusted Sharpe ratio, which penalizes volatility and underperformance.
- **Post-processing:** A unified `post_process_signal()` function ensures parity between local validation and leaderboard logic by converting model predictions into bounded investment positions.

---

## Install Autogluon

In [1]:
from pathlib import Path
WHEELS = Path("/kaggle/input/autogluon-1-4-0-offline")  # <- your dataset

!pip install --no-index --quiet --find-links="{WHEELS}" \
  "torch==2.5.1" "torchvision==0.20.1" "torchaudio==2.5.1" "bitsandbytes>=0.46.1" "mlforecast==0.14.0" "optuna==4.3.0"

!pip install --no-index --quiet --find-links="{WHEELS}" \
    "autogluon.tabular"

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
libcugraph-cu12 25.6.0 requires libraft-cu12==25.6.*, but you have libraft-cu12 25.2.0 which is incompatible.
pylibcugraph-cu12 25.6.0 requires pylibraft-cu12==25.6.*, but you have pylibraft-cu12 25.2.0 which is incompatible.
pylibcugraph-cu12 25.6.0 requires rmm-cu12==25.6.*, but you have rmm-cu12 25.2.0 which is incompatible.[0m[31m
[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cesium 0.12.4 requires numpy<3.0,>=2.0, but you have numpy 1.26.4 which is incompatible.
umap-learn 0.5.9.post2 requires scikit-learn>=1.6, but you have scikit-learn 1.5.2 which is incompatible.[0m[31m
[0m

## Parameters and Config

In [2]:
# ============================================================
# Hull Tactical Kaggle â€” AutoGluon train/infer + organizer metric selection
# Copy/paste notebook cell(s)
# ============================================================

import os
from pathlib import Path
import numpy as np
import pandas as pd
import polars as pl

from autogluon.tabular import TabularPredictor

# -------------------------
# USER CONTROLS
# -------------------------
# notebook_mode:
#   "training"  -> fit model (and optionally tune postprocess on holdout), save under /kaggle/working
#   "inference" -> load model from Kaggle dataset input and only predict
notebook_mode = "training"
assert notebook_mode in ("training", "inference")

# approach:
#   "rmse_forward"  -> predict forward_returns (classic regression)
#   "rmse_excess"   -> predict excess returns: forward_returns - risk_free_rate (often aligns better with scorer)
#   "metric_tune"   -> still trains RMSE, but selects tau/alpha by maximizing organizer score on a holdout split
approach = "rmse_excess"
assert approach in ("rmse_forward", "rmse_excess", "metric_tune")


In [3]:

# Where the competition data is
DATA_PATH = "/kaggle/input/hull-tactical-market-prediction/"

# Where a pre-trained AutoGluon model is stored (input dataset)
PRETRAINED_MODEL_DIR = Path("/kaggle/input/hull-tactical-autogluon-train-and-infer-tabular/AutogluonModels")

# Where to write models when training in this notebook
WORKING_MODEL_DIR = Path("/kaggle/working/AutogluonModels")

# Train settings (adjust)
AG_PRESET = "best_quality" 
TIME_LIMIT_SECS = 60 * 60 * 9

# Holdout split for metric_tune
HOLDOUT_FRAC = 0.2

# Postprocess (defaults; may be overwritten by metric tuning)
MIN_INVESTMENT = 0.0
MAX_INVESTMENT = 2.0
TAU_ABS_FOR_SCORER = 9.43717e-05
ALPHA_FOR_SCORER = 0.600132




In [4]:
# =========================
# ORGANIZER SCORER (as provided)
# =========================
import pandas.api.types

class ParticipantVisibleError(Exception):
    pass

def organizer_score(solution: pd.DataFrame, submission: pd.DataFrame, row_id_column_name: str) -> float:
    if not pandas.api.types.is_numeric_dtype(submission['prediction']):
        raise ParticipantVisibleError('Predictions must be numeric')

    solution = solution.copy()
    solution['position'] = submission['prediction']

    if solution['position'].max() > MAX_INVESTMENT:
        raise ParticipantVisibleError(f'Position of {solution["position"].max()} exceeds maximum of {MAX_INVESTMENT}')
    if solution['position'].min() < MIN_INVESTMENT:
        raise ParticipantVisibleError(f'Position of {solution["position"].min()} below minimum of {MIN_INVESTMENT}')

    solution['strategy_returns'] = solution['risk_free_rate'] * (1 - solution['position']) + solution['position'] * solution['forward_returns']

    # Calculate strategy's Sharpe ratio
    strategy_excess_returns = solution['strategy_returns'] - solution['risk_free_rate']
    strategy_excess_cumulative = (1 + strategy_excess_returns).prod()
    strategy_mean_excess_return = (strategy_excess_cumulative) ** (1 / len(solution)) - 1
    strategy_std = solution['strategy_returns'].std()

    trading_days_per_yr = 252
    if strategy_std == 0:
        raise ParticipantVisibleError('Division by zero, strategy std is zero')
    sharpe = strategy_mean_excess_return / strategy_std * np.sqrt(trading_days_per_yr)
    strategy_volatility = float(strategy_std * np.sqrt(trading_days_per_yr) * 100)

    # Calculate market return and volatility
    market_excess_returns = solution['forward_returns'] - solution['risk_free_rate']
    market_excess_cumulative = (1 + market_excess_returns).prod()
    market_mean_excess_return = (market_excess_cumulative) ** (1 / len(solution)) - 1
    market_std = solution['forward_returns'].std()

    market_volatility = float(market_std * np.sqrt(trading_days_per_yr) * 100)

    if market_volatility == 0:
        raise ParticipantVisibleError('Division by zero, market std is zero')

    # Calculate the volatility penalty
    excess_vol = max(0, strategy_volatility / market_volatility - 1.2) if market_volatility > 0 else 0
    vol_penalty = 1 + excess_vol

    # Calculate the return penalty
    return_gap = max(
        0,
        (market_mean_excess_return - strategy_mean_excess_return) * 100 * trading_days_per_yr,
    )
    return_penalty = 1 + (return_gap**2) / 100

    adjusted_sharpe = sharpe / (vol_penalty * return_penalty)
    return min(float(adjusted_sharpe), 1_000_000)


In [5]:

# =========================
# POST-PROCESS: raw prediction -> position in [0,2]
# =========================
def post_process_signal(y_pred,
                        *,
                        tau: float = TAU_ABS_FOR_SCORER,
                        alpha: float = ALPHA_FOR_SCORER,
                        min_investment: float = MIN_INVESTMENT,
                        max_investment: float = MAX_INVESTMENT):
    sig = np.asarray(y_pred, dtype=float).ravel()
    pos = np.where(sig > tau, alpha, 0.0)
    return np.clip(pos, min_investment, max_investment)

# =========================
# COLUMNS
# =========================
# Keep these for scorer / sanity.
NEEDED_FOR_SCORER = ["risk_free_rate", "forward_returns"]

# Non-feature columns to drop at inference & (optionally) training.
# NOTE: do NOT drop risk_free_rate in rmse_excess / metric_tune (it can be a useful feature).
DROP_ALWAYS = ["row_id", "id", "market_forward_excess_returns"]



In [6]:
# =========================
# LOAD TRAIN (only if training)
# =========================
train = None
target_col = None

if notebook_mode == "training":
    train = pd.read_csv(f"{DATA_PATH}train.csv")

    # Choose target based on approach
    if approach == "rmse_forward":
        target_col = "forward_returns"
    else:
        # rmse_excess or metric_tune
        train["excess_forward_returns"] = train["forward_returns"] - train["risk_free_rate"]
        target_col = "excess_forward_returns"

    # Basic checks
    for c in NEEDED_FOR_SCORER:
        if c not in train.columns:
            raise ValueError(f"Expected '{c}' in train.csv but not found")

    if target_col not in train.columns:
        raise ValueError(f"Expected target '{target_col}' in train.csv but not found")

    # Build training frame: drop obvious IDs/leaks; keep risk_free_rate
    use_cols = [c for c in train.columns if c not in DROP_ALWAYS]
    train = train[use_cols].copy()



In [7]:
# =========================
# TRAIN OR LOAD PREDICTOR
# =========================
predictor = None

if notebook_mode == "inference":
    predictor = TabularPredictor.load(str(PRETRAINED_MODEL_DIR))
    print(f"[inference] Loaded predictor from: {PRETRAINED_MODEL_DIR}")

else:
    predictor = TabularPredictor(
        label=target_col,
        eval_metric="rmse",
        problem_type="regression",
        path=str(WORKING_MODEL_DIR),
    )

    predictor.fit(
        train_data=train,
        presets=AG_PRESET,
        time_limit=TIME_LIMIT_SECS,
    )

    print(f"[training] Trained. Models saved to: {WORKING_MODEL_DIR}")

# Cache model feature list (works in both modes)
MODEL_FEATURES = predictor.feature_metadata.get_features()



Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.4.0
Python Version:     3.11.13
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Sat Sep 27 10:16:09 UTC 2025
CPU Count:          4
Memory Avail:       30.22 GB / 31.35 GB (96.4%)
Disk Space Avail:   19.50 GB / 19.52 GB (99.9%)
Presets specified: ['best_quality']
Using hyperparameters preset: hyperparameters='zeroshot'
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdout validation data is used to detect stacked overfitti

[1000]	valid_set's rmse: 0.00118659
[2000]	valid_set's rmse: 0.00115001
[3000]	valid_set's rmse: 0.00114062
[4000]	valid_set's rmse: 0.00113762
[5000]	valid_set's rmse: 0.00113557
[6000]	valid_set's rmse: 0.00113327
[7000]	valid_set's rmse: 0.00113209
[8000]	valid_set's rmse: 0.00113074
[9000]	valid_set's rmse: 0.00113007
[10000]	valid_set's rmse: 0.00112911
[1000]	valid_set's rmse: 0.00095888
[2000]	valid_set's rmse: 0.000933351
[3000]	valid_set's rmse: 0.000925804
[4000]	valid_set's rmse: 0.000923462
[5000]	valid_set's rmse: 0.000922295
[6000]	valid_set's rmse: 0.000921071
[7000]	valid_set's rmse: 0.000920562
[8000]	valid_set's rmse: 0.000920002
[9000]	valid_set's rmse: 0.000919423
[10000]	valid_set's rmse: 0.000919013
[1000]	valid_set's rmse: 0.00121781
[2000]	valid_set's rmse: 0.00118545
[3000]	valid_set's rmse: 0.00117852
[4000]	valid_set's rmse: 0.00117508
[5000]	valid_set's rmse: 0.00117246
[6000]	valid_set's rmse: 0.00117153
[7000]	valid_set's rmse: 0.00117021
[8000]	valid_set'

	-0.001	 = Validation score   (-root_mean_squared_error)
	459.49s	 = Training   runtime
	7.57s	 = Validation runtime
Fitting model: LightGBM_BAG_L1 ... Training model for up to 4916.72s of the 7615.47s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0002	 = Validation score   (-root_mean_squared_error)
	36.78s	 = Training   runtime
	0.1s	 = Validation runtime
Fitting model: RandomForestMSE_BAG_L1 ... Training model for up to 4879.65s of the 7578.41s of remaining time.
	-0.0001	 = Validation score   (-root_mean_squared_error)
	66.15s	 = Training   runtime
	0.62s	 = Validation runtime
Fitting model: CatBoost_BAG_L1 ... Training model for up to 4811.87s of the 7510.62s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0003	 = Validation score   (-root_mean_squared_error)
	1338.01s	 = Training   runtime
	0.1s	 

[1000]	valid_set's rmse: 0.000868629
[2000]	valid_set's rmse: 0.000868557
[1000]	valid_set's rmse: 0.000712893
[1000]	valid_set's rmse: 0.00085249
[2000]	valid_set's rmse: 0.000852354
[3000]	valid_set's rmse: 0.00085235
[1000]	valid_set's rmse: 0.000757289
[2000]	valid_set's rmse: 0.000757191
[3000]	valid_set's rmse: 0.000757188
[4000]	valid_set's rmse: 0.000757187
[1000]	valid_set's rmse: 0.000713541
[1000]	valid_set's rmse: 0.000702429
[1000]	valid_set's rmse: 0.000786294
[2000]	valid_set's rmse: 0.000786171
[3000]	valid_set's rmse: 0.000786161
[4000]	valid_set's rmse: 0.00078616
[5000]	valid_set's rmse: 0.00078616
[6000]	valid_set's rmse: 0.000786159
[7000]	valid_set's rmse: 0.000786159
[8000]	valid_set's rmse: 0.000786159
[9000]	valid_set's rmse: 0.000786159


	-0.0008	 = Validation score   (-root_mean_squared_error)
	605.03s	 = Training   runtime
	2.99s	 = Validation runtime
Fitting model: CatBoost_r177_BAG_L1 ... Training model for up to 2499.40s of the 5198.15s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0003	 = Validation score   (-root_mean_squared_error)
	1328.3s	 = Training   runtime
	0.1s	 = Validation runtime
Fitting model: NeuralNetTorch_r79_BAG_L1 ... Training model for up to 1170.69s of the 3869.45s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0005	 = Validation score   (-root_mean_squared_error)
	451.01s	 = Training   runtime
	0.3s	 = Validation runtime
Fitting model: LightGBM_r131_BAG_L1 ... Training model for up to 719.26s of the 3418.02s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStra

[1000]	valid_set's rmse: 0.000693256
[2000]	valid_set's rmse: 0.000678563
[3000]	valid_set's rmse: 0.000675979
[4000]	valid_set's rmse: 0.000675373
[5000]	valid_set's rmse: 0.000675104
[6000]	valid_set's rmse: 0.000674958


	Ran out of time, early stopping on iteration 6037. Best iteration is:
	[6025]	valid_set's rmse: 0.000674956


[1000]	valid_set's rmse: 0.000563413
[2000]	valid_set's rmse: 0.00055241
[3000]	valid_set's rmse: 0.000550458
[4000]	valid_set's rmse: 0.000549763
[5000]	valid_set's rmse: 0.00054963
[6000]	valid_set's rmse: 0.000549503
[7000]	valid_set's rmse: 0.000549426
[8000]	valid_set's rmse: 0.000549378


	Ran out of time, early stopping on iteration 8214. Best iteration is:
	[7918]	valid_set's rmse: 0.000549373


[1000]	valid_set's rmse: 0.00065913
[2000]	valid_set's rmse: 0.000647169
[3000]	valid_set's rmse: 0.000644927
[4000]	valid_set's rmse: 0.000643476
[5000]	valid_set's rmse: 0.000642855
[6000]	valid_set's rmse: 0.00064249
[7000]	valid_set's rmse: 0.00064226
[8000]	valid_set's rmse: 0.000642096


	Ran out of time, early stopping on iteration 8658. Best iteration is:
	[8656]	valid_set's rmse: 0.000642018


[1000]	valid_set's rmse: 0.000596727
[2000]	valid_set's rmse: 0.000583161
[3000]	valid_set's rmse: 0.000581157
[4000]	valid_set's rmse: 0.000580366
[5000]	valid_set's rmse: 0.000579984
[6000]	valid_set's rmse: 0.000579837
[7000]	valid_set's rmse: 0.000579743
[8000]	valid_set's rmse: 0.000579749


	Ran out of time, early stopping on iteration 8700. Best iteration is:
	[7419]	valid_set's rmse: 0.000579734


[1000]	valid_set's rmse: 0.000609801
[2000]	valid_set's rmse: 0.000600506
[3000]	valid_set's rmse: 0.000598656
[4000]	valid_set's rmse: 0.000598008
[5000]	valid_set's rmse: 0.000597872
[6000]	valid_set's rmse: 0.000597817
[7000]	valid_set's rmse: 0.000597799
[8000]	valid_set's rmse: 0.000597773
[9000]	valid_set's rmse: 0.000597749


	Ran out of time, early stopping on iteration 9469. Best iteration is:
	[9000]	valid_set's rmse: 0.000597749


[1000]	valid_set's rmse: 0.000621326
[2000]	valid_set's rmse: 0.000610362
[3000]	valid_set's rmse: 0.000608336
[4000]	valid_set's rmse: 0.000607794
[5000]	valid_set's rmse: 0.000607443
[6000]	valid_set's rmse: 0.000607298
[7000]	valid_set's rmse: 0.000607359
[1000]	valid_set's rmse: 0.00059595
[2000]	valid_set's rmse: 0.000583645
[3000]	valid_set's rmse: 0.00058097
[4000]	valid_set's rmse: 0.000580316
[5000]	valid_set's rmse: 0.000580239
[6000]	valid_set's rmse: 0.000580167
[7000]	valid_set's rmse: 0.000580158
[8000]	valid_set's rmse: 0.000580103
[9000]	valid_set's rmse: 0.000580075
[10000]	valid_set's rmse: 0.000580066
[1000]	valid_set's rmse: 0.000657362
[2000]	valid_set's rmse: 0.000640253
[3000]	valid_set's rmse: 0.000638031
[4000]	valid_set's rmse: 0.000636936
[5000]	valid_set's rmse: 0.00063671
[6000]	valid_set's rmse: 0.000636592
[7000]	valid_set's rmse: 0.000636449
[8000]	valid_set's rmse: 0.000636387
[9000]	valid_set's rmse: 0.000636317
[10000]	valid_set's rmse: 0.000636287


	-0.0006	 = Validation score   (-root_mean_squared_error)
	636.34s	 = Training   runtime
	11.6s	 = Validation runtime
Fitting model: NeuralNetFastAI_r191_BAG_L1 ... Training model for up to 61.17s of the 2759.93s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	Ran out of time, stopping training early. (Stopping on epoch 9)
	Ran out of time, stopping training early. (Stopping on epoch 9)
	Ran out of time, stopping training early. (Stopping on epoch 10)
	Ran out of time, stopping training early. (Stopping on epoch 11)
	Ran out of time, stopping training early. (Stopping on epoch 11)
	Ran out of time, stopping training early. (Stopping on epoch 12)
	Ran out of time, stopping training early. (Stopping on epoch 14)
	Ran out of time, stopping training early. (Stopping on epoch 17)
	-0.0014	 = Validation score   (-root_mean_squared_error)
	57.97s	 = Training   runtime
	0.44s	 = Validation runtime
Fitting 

[1000]	valid_set's rmse: 0.000534241
[2000]	valid_set's rmse: 0.000529119
[3000]	valid_set's rmse: 0.000527798
[4000]	valid_set's rmse: 0.000527257
[5000]	valid_set's rmse: 0.000527079
[6000]	valid_set's rmse: 0.000527058
[7000]	valid_set's rmse: 0.000527046
[8000]	valid_set's rmse: 0.000527031
[1000]	valid_set's rmse: 0.000475558
[2000]	valid_set's rmse: 0.000469865
[3000]	valid_set's rmse: 0.000468573
[4000]	valid_set's rmse: 0.000467794
[5000]	valid_set's rmse: 0.000467464
[6000]	valid_set's rmse: 0.000467362
[7000]	valid_set's rmse: 0.000467326
[8000]	valid_set's rmse: 0.000467312
[9000]	valid_set's rmse: 0.000467326
[10000]	valid_set's rmse: 0.000467336
[1000]	valid_set's rmse: 0.000481809
[2000]	valid_set's rmse: 0.000477485
[3000]	valid_set's rmse: 0.000475858
[4000]	valid_set's rmse: 0.000475047
[5000]	valid_set's rmse: 0.000474592
[6000]	valid_set's rmse: 0.000474415
[7000]	valid_set's rmse: 0.000474216
[8000]	valid_set's rmse: 0.000474113
[9000]	valid_set's rmse: 0.000474053


	-0.0005	 = Validation score   (-root_mean_squared_error)
	481.92s	 = Training   runtime
	8.09s	 = Validation runtime
Fitting model: LightGBM_BAG_L2 ... Training model for up to 2201.47s of the 2201.43s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0002	 = Validation score   (-root_mean_squared_error)
	40.78s	 = Training   runtime
	0.08s	 = Validation runtime
Fitting model: RandomForestMSE_BAG_L2 ... Training model for up to 2160.44s of the 2160.39s of remaining time.
	-0.0001	 = Validation score   (-root_mean_squared_error)
	78.44s	 = Training   runtime
	0.71s	 = Validation runtime
Fitting model: CatBoost_BAG_L2 ... Training model for up to 2080.50s of the 2080.46s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	Ran out of time, early stopping on iteration 8789.
	Ran out of time, early stopping on iterati

[1000]	valid_set's rmse: 0.00105389
[2000]	valid_set's rmse: 0.00102728
[3000]	valid_set's rmse: 0.0010212
[4000]	valid_set's rmse: 0.00101821
[5000]	valid_set's rmse: 0.00101646
[6000]	valid_set's rmse: 0.00101543
[7000]	valid_set's rmse: 0.00101484
[8000]	valid_set's rmse: 0.00101432
[9000]	valid_set's rmse: 0.00101393
[10000]	valid_set's rmse: 0.00101365
[1000]	valid_set's rmse: 0.00107752
[2000]	valid_set's rmse: 0.00105513
[3000]	valid_set's rmse: 0.00105038
[4000]	valid_set's rmse: 0.00104793
[5000]	valid_set's rmse: 0.00104684
[6000]	valid_set's rmse: 0.0010463
[7000]	valid_set's rmse: 0.00104578
[8000]	valid_set's rmse: 0.00104538
[9000]	valid_set's rmse: 0.00104509
[10000]	valid_set's rmse: 0.00104488
[1000]	valid_set's rmse: 0.000875609
[2000]	valid_set's rmse: 0.000852668
[3000]	valid_set's rmse: 0.000846628
[4000]	valid_set's rmse: 0.000843965
[5000]	valid_set's rmse: 0.000842528
[6000]	valid_set's rmse: 0.000841874
[7000]	valid_set's rmse: 0.000841513
[8000]	valid_set's rm

	-0.001	 = Validation score   (-root_mean_squared_error)
	473.36s	 = Training   runtime
	9.34s	 = Validation runtime
Fitting model: LightGBM_BAG_L1 ... Training model for up to 15672.71s of the 23760.37s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0002	 = Validation score   (-root_mean_squared_error)
	35.62s	 = Training   runtime
	0.07s	 = Validation runtime
Fitting model: RandomForestMSE_BAG_L1 ... Training model for up to 15636.87s of the 23724.53s of remaining time.
	-0.0001	 = Validation score   (-root_mean_squared_error)
	75.02s	 = Training   runtime
	0.74s	 = Validation runtime
Fitting model: CatBoost_BAG_L1 ... Training model for up to 15560.17s of the 23647.83s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0003	 = Validation score   (-root_mean_squared_error)
	1113.83s	 = Training   runtime


[1000]	valid_set's rmse: 0.000645287
[2000]	valid_set's rmse: 0.000645161
[3000]	valid_set's rmse: 0.000645145
[4000]	valid_set's rmse: 0.000645143
[5000]	valid_set's rmse: 0.000645143
[6000]	valid_set's rmse: 0.000645143
[1000]	valid_set's rmse: 0.000768768
[2000]	valid_set's rmse: 0.000768538
[3000]	valid_set's rmse: 0.0007685
[4000]	valid_set's rmse: 0.000768495
[5000]	valid_set's rmse: 0.000768494
[6000]	valid_set's rmse: 0.000768493
[7000]	valid_set's rmse: 0.000768493
[8000]	valid_set's rmse: 0.000768493
[9000]	valid_set's rmse: 0.000768493
[10000]	valid_set's rmse: 0.000768493
[1000]	valid_set's rmse: 0.000696284
[2000]	valid_set's rmse: 0.000696215
[1000]	valid_set's rmse: 0.000665043
[2000]	valid_set's rmse: 0.000664987
[3000]	valid_set's rmse: 0.000664978
[4000]	valid_set's rmse: 0.000664976
[5000]	valid_set's rmse: 0.000664976
[1000]	valid_set's rmse: 0.000795718
[2000]	valid_set's rmse: 0.000795527
[3000]	valid_set's rmse: 0.000795518
[1000]	valid_set's rmse: 0.000665297
[1

	-0.0007	 = Validation score   (-root_mean_squared_error)
	992.1s	 = Training   runtime
	5.91s	 = Validation runtime
Fitting model: CatBoost_r177_BAG_L1 ... Training model for up to 13076.95s of the 21164.61s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0003	 = Validation score   (-root_mean_squared_error)
	1139.95s	 = Training   runtime
	0.11s	 = Validation runtime
Fitting model: NeuralNetTorch_r79_BAG_L1 ... Training model for up to 11936.57s of the 20024.23s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0005	 = Validation score   (-root_mean_squared_error)
	563.08s	 = Training   runtime
	0.28s	 = Validation runtime
Fitting model: LightGBM_r131_BAG_L1 ... Training model for up to 11373.06s of the 19460.72s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFi

[1000]	valid_set's rmse: 0.000576831
[2000]	valid_set's rmse: 0.000563191
[3000]	valid_set's rmse: 0.000561064
[4000]	valid_set's rmse: 0.000560126
[5000]	valid_set's rmse: 0.000559803
[6000]	valid_set's rmse: 0.000559655
[7000]	valid_set's rmse: 0.000559632
[8000]	valid_set's rmse: 0.000559568
[9000]	valid_set's rmse: 0.000559547
[10000]	valid_set's rmse: 0.000559532
[1000]	valid_set's rmse: 0.000665502
[2000]	valid_set's rmse: 0.000652629
[3000]	valid_set's rmse: 0.000650605
[4000]	valid_set's rmse: 0.000649836
[5000]	valid_set's rmse: 0.000649625
[6000]	valid_set's rmse: 0.00064942
[7000]	valid_set's rmse: 0.000649224
[8000]	valid_set's rmse: 0.000649108
[9000]	valid_set's rmse: 0.000649057
[10000]	valid_set's rmse: 0.000649079
[1000]	valid_set's rmse: 0.000569274
[2000]	valid_set's rmse: 0.000560051
[3000]	valid_set's rmse: 0.000557969
[4000]	valid_set's rmse: 0.000557062
[5000]	valid_set's rmse: 0.000556766
[6000]	valid_set's rmse: 0.000556548
[7000]	valid_set's rmse: 0.000556424


	-0.0006	 = Validation score   (-root_mean_squared_error)
	740.22s	 = Training   runtime
	16.63s	 = Validation runtime
Fitting model: NeuralNetFastAI_r191_BAG_L1 ... Training model for up to 10603.64s of the 18691.30s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0002	 = Validation score   (-root_mean_squared_error)
	185.5s	 = Training   runtime
	0.45s	 = Validation runtime
Fitting model: CatBoost_r9_BAG_L1 ... Training model for up to 10417.43s of the 18505.09s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0005	 = Validation score   (-root_mean_squared_error)
	4937.73s	 = Training   runtime
	0.69s	 = Validation runtime
Fitting model: LightGBM_r96_BAG_L1 ... Training model for up to 5477.98s of the 13565.64s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFit

[1000]	valid_set's rmse: 0.00137346
[2000]	valid_set's rmse: 0.0010312
[3000]	valid_set's rmse: 0.000917925
[4000]	valid_set's rmse: 0.000849801
[5000]	valid_set's rmse: 0.000813845
[6000]	valid_set's rmse: 0.00079453
[7000]	valid_set's rmse: 0.000780129
[8000]	valid_set's rmse: 0.000768153
[9000]	valid_set's rmse: 0.000760125
[10000]	valid_set's rmse: 0.000752406
[1000]	valid_set's rmse: 0.00147735
[2000]	valid_set's rmse: 0.00109706
[3000]	valid_set's rmse: 0.000969694
[4000]	valid_set's rmse: 0.000904562
[5000]	valid_set's rmse: 0.000872266
[6000]	valid_set's rmse: 0.00085528
[7000]	valid_set's rmse: 0.000838242
[8000]	valid_set's rmse: 0.000826883
[9000]	valid_set's rmse: 0.000816142
[10000]	valid_set's rmse: 0.000809636
[1000]	valid_set's rmse: 0.00132206
[2000]	valid_set's rmse: 0.000987765
[3000]	valid_set's rmse: 0.000863283
[4000]	valid_set's rmse: 0.000798426
[5000]	valid_set's rmse: 0.000765419
[6000]	valid_set's rmse: 0.000738716
[7000]	valid_set's rmse: 0.000724714
[8000]	

	-0.0008	 = Validation score   (-root_mean_squared_error)
	212.36s	 = Training   runtime
	7.68s	 = Validation runtime
Fitting model: NeuralNetTorch_r22_BAG_L1 ... Training model for up to 5253.42s of the 13341.08s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0003	 = Validation score   (-root_mean_squared_error)
	1049.77s	 = Training   runtime
	0.32s	 = Validation runtime
Fitting model: XGBoost_r33_BAG_L1 ... Training model for up to 4203.15s of the 12290.81s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0009	 = Validation score   (-root_mean_squared_error)
	132.81s	 = Training   runtime
	0.25s	 = Validation runtime
Fitting model: ExtraTrees_r42_BAG_L1 ... Training model for up to 4069.84s of the 12157.50s of remaining time.
	-0.0002	 = Validation score   (-root_mean_squared_error)
	15.49s	 = Training

[1000]	valid_set's rmse: 0.000535781
[2000]	valid_set's rmse: 0.000532136
[3000]	valid_set's rmse: 0.000531291
[4000]	valid_set's rmse: 0.000531151
[5000]	valid_set's rmse: 0.000530992
[6000]	valid_set's rmse: 0.00053092
[7000]	valid_set's rmse: 0.000530884
[8000]	valid_set's rmse: 0.00053092
[9000]	valid_set's rmse: 0.000530913
[1000]	valid_set's rmse: 0.00045449
[2000]	valid_set's rmse: 0.000450992
[3000]	valid_set's rmse: 0.000450369
[4000]	valid_set's rmse: 0.000449992
[5000]	valid_set's rmse: 0.000449806
[6000]	valid_set's rmse: 0.000449634
[7000]	valid_set's rmse: 0.000449558
[8000]	valid_set's rmse: 0.000449528
[9000]	valid_set's rmse: 0.000449475
[10000]	valid_set's rmse: 0.000449466
[1000]	valid_set's rmse: 0.000381942
[2000]	valid_set's rmse: 0.000379991
[3000]	valid_set's rmse: 0.000379378
[4000]	valid_set's rmse: 0.000379321
[1000]	valid_set's rmse: 0.000439775
[2000]	valid_set's rmse: 0.000437572
[3000]	valid_set's rmse: 0.000437384
[4000]	valid_set's rmse: 0.00043729
[100

	-0.0004	 = Validation score   (-root_mean_squared_error)
	413.31s	 = Training   runtime
	6.26s	 = Validation runtime
Fitting model: LightGBM_BAG_L2 ... Training model for up to 7661.97s of the 7661.91s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0001	 = Validation score   (-root_mean_squared_error)
	42.98s	 = Training   runtime
	0.09s	 = Validation runtime
Fitting model: RandomForestMSE_BAG_L2 ... Training model for up to 7618.70s of the 7618.64s of remaining time.
	-0.0	 = Validation score   (-root_mean_squared_error)
	95.52s	 = Training   runtime
	0.8s	 = Validation runtime
Fitting model: CatBoost_BAG_L2 ... Training model for up to 7521.16s of the 7521.10s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0002	 = Validation score   (-root_mean_squared_error)
	2146.38s	 = Training   runtime
	0.17s	 =

[1000]	valid_set's rmse: 0.000151462
[1000]	valid_set's rmse: 0.000147744
[1000]	valid_set's rmse: 0.000136658
[1000]	valid_set's rmse: 0.000141713
[1000]	valid_set's rmse: 0.000138661


	-0.0001	 = Validation score   (-root_mean_squared_error)
	125.44s	 = Training   runtime
	0.64s	 = Validation runtime
Fitting model: NeuralNetFastAI_r191_BAG_L2 ... Training model for up to 1667.17s of the 1667.11s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	-0.0002	 = Validation score   (-root_mean_squared_error)
	200.11s	 = Training   runtime
	0.49s	 = Validation runtime
Fitting model: CatBoost_r9_BAG_L2 ... Training model for up to 1466.29s of the 1466.23s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=2, gpus=0)
	Ran out of time, early stopping on iteration 1109.
	Ran out of time, early stopping on iteration 1217.
	Ran out of time, early stopping on iteration 1207.
	Ran out of time, early stopping on iteration 1229.
	Ran out of time, early stopping on iteration 1332.
	Ran out of time, early stopping on iteration 1

[1000]	valid_set's rmse: 0.00044045


	Ran out of time, early stopping on iteration 1066. Best iteration is:
	[1066]	valid_set's rmse: 0.000433709


[1000]	valid_set's rmse: 0.000406771
[2000]	valid_set's rmse: 0.000345027


	Ran out of time, early stopping on iteration 2179. Best iteration is:
	[2176]	valid_set's rmse: 0.000341156


[1000]	valid_set's rmse: 0.000387688
[2000]	valid_set's rmse: 0.000342695


	Ran out of time, early stopping on iteration 2204. Best iteration is:
	[2204]	valid_set's rmse: 0.000337989


[1000]	valid_set's rmse: 0.000350881
[2000]	valid_set's rmse: 0.000294182


	Ran out of time, early stopping on iteration 2338. Best iteration is:
	[2338]	valid_set's rmse: 0.000286803


[1000]	valid_set's rmse: 0.00037206
[2000]	valid_set's rmse: 0.000307416


	Ran out of time, early stopping on iteration 2381. Best iteration is:
	[2381]	valid_set's rmse: 0.000298194


[1000]	valid_set's rmse: 0.000358782
[2000]	valid_set's rmse: 0.000320152


	Ran out of time, early stopping on iteration 2544. Best iteration is:
	[2544]	valid_set's rmse: 0.000313782


[1000]	valid_set's rmse: 0.000309726
[2000]	valid_set's rmse: 0.000275264


	Ran out of time, early stopping on iteration 2685. Best iteration is:
	[2681]	valid_set's rmse: 0.000269315


[1000]	valid_set's rmse: 0.000358582
[2000]	valid_set's rmse: 0.000314745
[3000]	valid_set's rmse: 0.000304209


	Ran out of time, early stopping on iteration 3216. Best iteration is:
	[3208]	valid_set's rmse: 0.000302819
	-0.0003	 = Validation score   (-root_mean_squared_error)
	55.29s	 = Training   runtime
	1.44s	 = Validation runtime
Fitting model: WeightedEnsemble_L3 ... Training model for up to 808.71s of the 1.36s of remaining time.
	Ensemble Weights: {'ExtraTreesMSE_BAG_L2': 0.522, 'ExtraTreesMSE_BAG_L1': 0.304, 'RandomForestMSE_BAG_L2': 0.087, 'RandomForestMSE_BAG_L1': 0.043, 'NeuralNetFastAI_r191_BAG_L1': 0.043}
	-0.0	 = Validation score   (-root_mean_squared_error)
	0.05s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 24250.13s ... Best model: WeightedEnsemble_L3 | Estimated inference throughput: 43.6 rows/s (1131 batch size)
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("/kaggle/working/AutogluonModels")


[training] Trained. Models saved to: /kaggle/working/AutogluonModels


In [8]:
# =========================
# OPTIONAL: Tune tau/alpha using organizer metric on a holdout
# (Only when training + approach == metric_tune)
# =========================
if notebook_mode == "training" and approach == "metric_tune":
    n = len(train)
    cut = int(n * (1.0 - HOLDOUT_FRAC))
    if cut <= 0 or cut >= n:
        raise ValueError("Bad HOLDOUT_FRAC; leads to empty train or empty holdout.")

    train_tr = train.iloc[:cut].copy()
    train_va = train.iloc[cut:].copy()

    # Refit quickly on the train_tr subset? (optional)
    # For simplicity, we keep the trained predictor and just tune postprocess on the holdout portion.
    # If you want strict separation, train predictor on train_tr from the start.

    # Build solution df for scorer (must contain forward_returns and risk_free_rate)
    # Note: train_va still has original forward_returns and risk_free_rate because we kept them
    solution = train_va[NEEDED_FOR_SCORER].copy()

    # Build X_va: drop label column only; keep other columns
    X_va = train_va.drop(columns=[predictor.label], errors="ignore")

    raw = predictor.predict(X_va).to_numpy()

    taus = np.logspace(-7, -3, 25)
    alphas = np.linspace(0.05, 2.0, 40)

    best_score = -np.inf
    best_tau = TAU_ABS_FOR_SCORER
    best_alpha = ALPHA_FOR_SCORER

    # Make a fresh copy each loop because organizer_score mutates solution
    for tau in taus:
        for alpha in alphas:
            pos = post_process_signal(raw, tau=tau, alpha=alpha)
            sub = pd.DataFrame({"prediction": pos})
            try:
                s = organizer_score(solution.copy(), sub, row_id_column_name="row_id")
            except ParticipantVisibleError:
                continue
            if s > best_score:
                best_score = s
                best_tau = float(tau)
                best_alpha = float(alpha)

    TAU_ABS_FOR_SCORER = best_tau
    ALPHA_FOR_SCORER = best_alpha

    print(f"[metric_tune] Best holdout organizer metric: {best_score:.6f}")
    print(f"[metric_tune] Using tau={TAU_ABS_FOR_SCORER:.6g}, alpha={ALPHA_FOR_SCORER:.6g}")



In [9]:
# =========================
# PREDICT FUNCTION FOR KAGGLE EVAL SERVER
# =========================
def predict(test: pl.DataFrame) -> float:
    """Return a single post-processed position for a single-row Polars DataFrame."""
    if not isinstance(test, pl.DataFrame):
        raise TypeError("predict(test): expected a Polars DataFrame input")
    if test.height != 1:
        raise ValueError(f"predict(test): expected a single-row Polars DataFrame, got {test.height} rows")

    # Drop always-drop columns if present
    drop_cols = [c for c in DROP_ALWAYS if c in test.columns]
    test_pl = test.drop(drop_cols) if drop_cols else test

    # Ensure label is not present
    if predictor.label in test_pl.columns:
        test_pl = test_pl.drop(predictor.label)

    # Polars -> Pandas
    test_pd = test_pl.to_pandas()

    # Align columns to model features (drops extras, fills missing with 0)
    test_pd = test_pd.reindex(columns=MODEL_FEATURES, fill_value=0)

    raw = predictor.predict(test_pd)
    pos = post_process_signal(
        raw,
        tau=TAU_ABS_FOR_SCORER,
        alpha=ALPHA_FOR_SCORER,
        min_investment=MIN_INVESTMENT,
        max_investment=MAX_INVESTMENT,
    )
    return float(np.asarray(pos).ravel()[0])



In [10]:
import kaggle_evaluation.default_inference_server as kis
import os

# ---------- KAGGLE SERVER BOOTSTRAP ----------
inference_server = kis.DefaultInferenceServer(predict)

if os.getenv("KAGGLE_IS_COMPETITION_RERUN"):
    inference_server.serve()
else:
    inference_server.run_local_gateway(('/kaggle/input/hull-tactical-market-prediction/',))