# ðŸ§  Hull Tactical Market Prediction â€” AutoGluon Baseline

This notebook builds a baseline model for the [**Hull Tactical Market Prediction**](https://www.kaggle.com/competitions/hull-tactical-market-prediction) competition using **AutoGluon Tabular**. The goal is to predict trading positions that maximize a Sharpe-like performance metric.  

## Overview
- **Task:** Predict next-period trading positions (long / flat) using engineered financial features.
- **Approach:** Train an AutoGluon model on historical data to predict *forward returns*, then post-process those predictions into positions for scoring and submission.
- **Metric:** Custom approximation of the competitionâ€™s adjusted Sharpe ratio, which penalizes volatility and underperformance.
- **Post-processing:** A unified `post_process_signal()` function ensures parity between local validation and leaderboard logic by converting model predictions into bounded investment positions.

---

## Install Autogluon

In [None]:
from pathlib import Path
WHEELS = Path("/kaggle/input/autogluon-1-4-0-offline")  # <- your dataset

!pip install --no-index --quiet --find-links="{WHEELS}" \
  "torch==2.5.1" "torchvision==0.20.1" "torchaudio==2.5.1" "bitsandbytes>=0.46.1" "mlforecast==0.14.0" "optuna==4.3.0"

!pip install --no-index --quiet --find-links="{WHEELS}" \
    "autogluon.tabular"

## Parameters and Config

In [None]:
# ============================================================
# Hull Tactical Kaggle â€” AutoGluon train/infer + organizer metric selection
# Copy/paste notebook cell(s)
# ============================================================

import os
from pathlib import Path
import numpy as np
import pandas as pd
import polars as pl

from autogluon.tabular import TabularPredictor

# -------------------------
# USER CONTROLS
# -------------------------
# notebook_mode:
#   "training"  -> fit model (and optionally tune postprocess on holdout), save under /kaggle/working
#   "inference" -> load model from Kaggle dataset input and only predict
notebook_mode = "training"
assert notebook_mode in ("training", "inference")

# approach:
#   "rmse_forward"  -> predict forward_returns (classic regression)
#   "rmse_excess"   -> predict excess returns: forward_returns - risk_free_rate (often aligns better with scorer)
#   "metric_tune"   -> still trains RMSE, but selects tau/alpha by maximizing organizer score on a holdout split
approach = "rmse_forward"
assert approach in ("rmse_forward", "rmse_excess", "metric_tune")


In [None]:

# Where the competition data is
DATA_PATH = "/kaggle/input/hull-tactical-market-prediction/"

# Where a pre-trained AutoGluon model is stored (input dataset)
PRETRAINED_MODEL_DIR = Path("/kaggle/input/hull-tactical-autogluon-train-and-infer-tabular/AutogluonModels")

# Where to write models when training in this notebook
WORKING_MODEL_DIR = Path("/kaggle/working/AutogluonModels")

# Train settings (adjust)
AG_PRESET = "best_quality" 
TIME_LIMIT_SECS = 60 * 60 * 0.2

# Holdout split for metric_tune
HOLDOUT_FRAC = 0.2

# Postprocess (defaults; may be overwritten by metric tuning)
MIN_INVESTMENT = 0.0
MAX_INVESTMENT = 2.0
TAU_ABS_FOR_SCORER = 9.43717e-05
ALPHA_FOR_SCORER = 0.600132




In [None]:
# =========================
# ORGANIZER SCORER (as provided)
# =========================
import pandas.api.types

class ParticipantVisibleError(Exception):
    pass

def organizer_score(solution: pd.DataFrame, submission: pd.DataFrame, row_id_column_name: str) -> float:
    if not pandas.api.types.is_numeric_dtype(submission['prediction']):
        raise ParticipantVisibleError('Predictions must be numeric')

    solution = solution.copy()
    solution['position'] = submission['prediction']

    if solution['position'].max() > MAX_INVESTMENT:
        raise ParticipantVisibleError(f'Position of {solution["position"].max()} exceeds maximum of {MAX_INVESTMENT}')
    if solution['position'].min() < MIN_INVESTMENT:
        raise ParticipantVisibleError(f'Position of {solution["position"].min()} below minimum of {MIN_INVESTMENT}')

    solution['strategy_returns'] = solution['risk_free_rate'] * (1 - solution['position']) + solution['position'] * solution['forward_returns']

    # Calculate strategy's Sharpe ratio
    strategy_excess_returns = solution['strategy_returns'] - solution['risk_free_rate']
    strategy_excess_cumulative = (1 + strategy_excess_returns).prod()
    strategy_mean_excess_return = (strategy_excess_cumulative) ** (1 / len(solution)) - 1
    strategy_std = solution['strategy_returns'].std()

    trading_days_per_yr = 252
    if strategy_std == 0:
        raise ParticipantVisibleError('Division by zero, strategy std is zero')
    sharpe = strategy_mean_excess_return / strategy_std * np.sqrt(trading_days_per_yr)
    strategy_volatility = float(strategy_std * np.sqrt(trading_days_per_yr) * 100)

    # Calculate market return and volatility
    market_excess_returns = solution['forward_returns'] - solution['risk_free_rate']
    market_excess_cumulative = (1 + market_excess_returns).prod()
    market_mean_excess_return = (market_excess_cumulative) ** (1 / len(solution)) - 1
    market_std = solution['forward_returns'].std()

    market_volatility = float(market_std * np.sqrt(trading_days_per_yr) * 100)

    if market_volatility == 0:
        raise ParticipantVisibleError('Division by zero, market std is zero')

    # Calculate the volatility penalty
    excess_vol = max(0, strategy_volatility / market_volatility - 1.2) if market_volatility > 0 else 0
    vol_penalty = 1 + excess_vol

    # Calculate the return penalty
    return_gap = max(
        0,
        (market_mean_excess_return - strategy_mean_excess_return) * 100 * trading_days_per_yr,
    )
    return_penalty = 1 + (return_gap**2) / 100

    adjusted_sharpe = sharpe / (vol_penalty * return_penalty)
    return min(float(adjusted_sharpe), 1_000_000)


In [None]:

# =========================
# POST-PROCESS: raw prediction -> position in [0,2]
# =========================
def post_process_signal(y_pred,
                        *,
                        tau: float = TAU_ABS_FOR_SCORER,
                        alpha: float = ALPHA_FOR_SCORER,
                        min_investment: float = MIN_INVESTMENT,
                        max_investment: float = MAX_INVESTMENT):
    sig = np.asarray(y_pred, dtype=float).ravel()
    pos = np.where(sig > tau, alpha, 0.0)
    return np.clip(pos, min_investment, max_investment)

# =========================
# COLUMNS
# =========================
# Keep these for scorer / sanity.
NEEDED_FOR_SCORER = ["risk_free_rate", "forward_returns"]

# Non-feature columns to drop at inference & (optionally) training.
# NOTE: do NOT drop risk_free_rate in rmse_excess / metric_tune (it can be a useful feature).
DROP_ALWAYS = ["row_id", "id", "market_forward_excess_returns"]



In [None]:
# =========================
# LOAD TRAIN (only if training)
# =========================
train = None
target_col = None

if notebook_mode == "training":
    train = pd.read_csv(f"{DATA_PATH}train.csv")

    # Choose target based on approach
    if approach == "rmse_forward":
        target_col = "forward_returns"
    else:
        # rmse_excess or metric_tune
        train["excess_forward_returns"] = train["forward_returns"] - train["risk_free_rate"]
        target_col = "excess_forward_returns"

    # Basic checks
    for c in NEEDED_FOR_SCORER:
        if c not in train.columns:
            raise ValueError(f"Expected '{c}' in train.csv but not found")

    if target_col not in train.columns:
        raise ValueError(f"Expected target '{target_col}' in train.csv but not found")

    # Build training frame: drop obvious IDs/leaks; keep risk_free_rate
    use_cols = [c for c in train.columns if c not in DROP_ALWAYS]
    train = train[use_cols].copy()



In [None]:
# =========================
# TRAIN OR LOAD PREDICTOR
# =========================
predictor = None

if notebook_mode == "inference":
    predictor = TabularPredictor.load(str(PRETRAINED_MODEL_DIR))
    print(f"[inference] Loaded predictor from: {PRETRAINED_MODEL_DIR}")

else:
    predictor = TabularPredictor(
        label=target_col,
        eval_metric="rmse",
        problem_type="regression",
        path=str(WORKING_MODEL_DIR),
    )

    predictor.fit(
        train_data=train,
        presets=AG_PRESET,
        time_limit=TIME_LIMIT_SECS,
    )

    print(f"[training] Trained. Models saved to: {WORKING_MODEL_DIR}")

# Cache model feature list (works in both modes)
MODEL_FEATURES = predictor.feature_metadata.get_features()



In [None]:
# =========================
# OPTIONAL: Tune tau/alpha using organizer metric on a holdout
# (Only when training + approach == metric_tune)
# =========================
if notebook_mode == "training" and approach == "metric_tune":
    n = len(train)
    cut = int(n * (1.0 - HOLDOUT_FRAC))
    if cut <= 0 or cut >= n:
        raise ValueError("Bad HOLDOUT_FRAC; leads to empty train or empty holdout.")

    train_tr = train.iloc[:cut].copy()
    train_va = train.iloc[cut:].copy()

    # Refit quickly on the train_tr subset? (optional)
    # For simplicity, we keep the trained predictor and just tune postprocess on the holdout portion.
    # If you want strict separation, train predictor on train_tr from the start.

    # Build solution df for scorer (must contain forward_returns and risk_free_rate)
    # Note: train_va still has original forward_returns and risk_free_rate because we kept them
    solution = train_va[NEEDED_FOR_SCORER].copy()

    # Build X_va: drop label column only; keep other columns
    X_va = train_va.drop(columns=[predictor.label], errors="ignore")

    raw = predictor.predict(X_va).to_numpy()

    taus = np.logspace(-7, -3, 25)
    alphas = np.linspace(0.05, 2.0, 40)

    best_score = -np.inf
    best_tau = TAU_ABS_FOR_SCORER
    best_alpha = ALPHA_FOR_SCORER

    # Make a fresh copy each loop because organizer_score mutates solution
    for tau in taus:
        for alpha in alphas:
            pos = post_process_signal(raw, tau=tau, alpha=alpha)
            sub = pd.DataFrame({"prediction": pos})
            try:
                s = organizer_score(solution.copy(), sub, row_id_column_name="row_id")
            except ParticipantVisibleError:
                continue
            if s > best_score:
                best_score = s
                best_tau = float(tau)
                best_alpha = float(alpha)

    TAU_ABS_FOR_SCORER = best_tau
    ALPHA_FOR_SCORER = best_alpha

    print(f"[metric_tune] Best holdout organizer metric: {best_score:.6f}")
    print(f"[metric_tune] Using tau={TAU_ABS_FOR_SCORER:.6g}, alpha={ALPHA_FOR_SCORER:.6g}")



In [None]:
# =========================
# PREDICT FUNCTION FOR KAGGLE EVAL SERVER
# =========================
def predict(test: pl.DataFrame) -> float:
    """Return a single post-processed position for a single-row Polars DataFrame."""
    if not isinstance(test, pl.DataFrame):
        raise TypeError("predict(test): expected a Polars DataFrame input")
    if test.height != 1:
        raise ValueError(f"predict(test): expected a single-row Polars DataFrame, got {test.height} rows")

    # Drop always-drop columns if present
    drop_cols = [c for c in DROP_ALWAYS if c in test.columns]
    test_pl = test.drop(drop_cols) if drop_cols else test

    # Ensure label is not present
    if predictor.label in test_pl.columns:
        test_pl = test_pl.drop(predictor.label)

    # Polars -> Pandas
    test_pd = test_pl.to_pandas()

    # Align columns to model features (drops extras, fills missing with 0)
    test_pd = test_pd.reindex(columns=MODEL_FEATURES, fill_value=0)

    raw = predictor.predict(test_pd)
    pos = post_process_signal(
        raw,
        tau=TAU_ABS_FOR_SCORER,
        alpha=ALPHA_FOR_SCORER,
        min_investment=MIN_INVESTMENT,
        max_investment=MAX_INVESTMENT,
    )
    return float(np.asarray(pos).ravel()[0])



In [None]:
import kaggle_evaluation.default_inference_server as kis
import os

# ---------- KAGGLE SERVER BOOTSTRAP ----------
inference_server = kis.DefaultInferenceServer(predict)

if os.getenv("KAGGLE_IS_COMPETITION_RERUN"):
    inference_server.serve()
else:
    inference_server.run_local_gateway(('/kaggle/input/hull-tactical-market-prediction/',))