# Autogluon infer

This notebook uses a model trained with Autogluon in another notebook: https://www.kaggle.com/code/dalloliogm/hull-tactical-autogluon-train-and-infer-tabular

This is a workaround to submit a model trained for multiple hours, as otherwise the kaggle interface fails.

In [1]:
from pathlib import Path
WHEELS = Path("/kaggle/input/autogluon-1-4-0-offline")  # <- your dataset

!pip install --no-index --quiet --find-links="{WHEELS}" \
  "torch==2.5.1" "torchvision==0.20.1" "torchaudio==2.5.1" "bitsandbytes>=0.46.1" "mlforecast==0.14.0" "optuna==4.3.0"

!pip install --no-index --quiet --find-links="{WHEELS}" \
    "autogluon.tabular"

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
libcugraph-cu12 25.6.0 requires libraft-cu12==25.6.*, but you have libraft-cu12 25.2.0 which is incompatible.
pylibcugraph-cu12 25.6.0 requires pylibraft-cu12==25.6.*, but you have pylibraft-cu12 25.2.0 which is incompatible.
pylibcugraph-cu12 25.6.0 requires rmm-cu12==25.6.*, but you have rmm-cu12 25.2.0 which is incompatible.[0m[31m
[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cesium 0.12.4 requires numpy<3.0,>=2.0, but you have numpy 1.26.4 which is incompatible.
umap-learn 0.5.9.post2 requires scikit-learn>=1.6, but you have scikit-learn 1.5.2 which is incompatible.[0m[31m
[0m

In [2]:
# Set this to inference to use a previously trained model
notebook_mode = "inference"


In [3]:
from pathlib import Path
import os
import numpy as np
import pandas as pd
import polars as pl
from autogluon.tabular import TabularPredictor
import autogluon

# =========================
# MODE SWITCH
# =========================
# Set manually in the notebook:
# notebook_mode = "training" or "inference"
assert notebook_mode in ("training", "inference")

# Path where your pre-trained models live (dataset input)
PRETRAINED_MODEL_DIR = Path(
    "/kaggle/input/hull-tactical-autogluon-train-and-infer-tabular/AutogluonModels/ag-20251212_181000/"
)

# Optional: where to save if you train in this notebook
WORKING_MODEL_DIR = Path("/kaggle/working/AutogluonModels")

# =========================
# CONSTANTS + POSTPROCESS
# =========================
ALPHA_FOR_SCORER = 0.600132
TAU_ABS_FOR_SCORER = 9.43717e-05
MIN_INVESTMENT, MAX_INVESTMENT = 0.0, 2.0

def post_process_signal(y_pred,
                        *,
                        tau: float = TAU_ABS_FOR_SCORER,
                        alpha: float = ALPHA_FOR_SCORER,
                        min_investment: float = MIN_INVESTMENT,
                        max_investment: float = MAX_INVESTMENT):
    sig = np.asarray(y_pred, dtype=float).ravel()
    pos = np.where(sig > tau, alpha, 0.0)
    return np.clip(pos, min_investment, max_investment)

# =========================
# DATA
# =========================
DATA_PATH = "/kaggle/input/hull-tactical-market-prediction/"
TARGET = "forward_returns"

DROP_IF_EXISTS = ["row_id", "id", "risk_free_rate", "market_forward_excess_returns"]


In [4]:

# We'll only read train if training (keeps inference lightweight)
train = None
if notebook_mode == "training":
    train = pd.read_csv(f"{DATA_PATH}train.csv")
    if TARGET not in train.columns:
        raise ValueError(f"Expected target column '{TARGET}' in train.csv; found: {list(train.columns)}")
    use_cols = [c for c in train.columns if c not in DROP_IF_EXISTS]
    train = train[use_cols]



In [5]:
# =========================
# LOAD OR TRAIN PREDICTOR
# =========================
predictor = None

if notebook_mode == "inference":
    # Load pre-trained model
    predictor = TabularPredictor.load(str(PRETRAINED_MODEL_DIR))
    print(f"[inference] Loaded predictor from: {PRETRAINED_MODEL_DIR}")

else:
    # Train model
    predictor = TabularPredictor(
        label=TARGET,
        eval_metric="rmse",
        problem_type="regression",
        path=str(WORKING_MODEL_DIR),  # ensures models are written under /kaggle/working
    )

    predictor.fit(
        train_data=train,
        presets="best_quality",
        time_limit=60 * 60 * 9,
    )

    print(f"[training] Trained. Models saved to: {WORKING_MODEL_DIR}")
    # If you want: zip /kaggle/working/AutogluonModels into a dataset later.

# Cache feature list once (works for both training+inference)
MODEL_FEATURES = predictor.feature_metadata.get_features()



[inference] Loaded predictor from: /kaggle/input/hull-tactical-autogluon-train-and-infer-tabular/AutogluonModels/ag-20251212_181000


In [6]:
# =========================
# KAGGLE PREDICT FN
# =========================
def predict(test: pl.DataFrame) -> float:
    """Return a single post-processed position for a single-row Polars DataFrame."""
    if not isinstance(test, pl.DataFrame):
        raise TypeError("predict(test): expected a Polars DataFrame input")
    if test.height != 1:
        raise ValueError(f"predict(test): expected a single-row Polars DataFrame, got {test.height} rows")

    # Drop known non-feature columns if present
    drop_cols = [c for c in DROP_IF_EXISTS if c in test.columns]
    test_pl = test.drop(drop_cols) if drop_cols else test

    # Ensure target isn't present at inference
    if TARGET in test_pl.columns:
        test_pl = test_pl.drop(TARGET)

    # Polars -> Pandas for AutoGluon
    test_pd = test_pl.to_pandas()

    # Align columns to model features (drops extras, adds missing as 0)
    test_pd = test_pd.reindex(columns=MODEL_FEATURES, fill_value=0)

    raw = predictor.predict(test_pd)
    pos = post_process_signal(raw)
    return float(np.asarray(pos).ravel()[0])



In [7]:
# =========================
# KAGGLE SERVER BOOTSTRAP
# =========================
import kaggle_evaluation.default_inference_server as kis

inference_server = kis.DefaultInferenceServer(predict)

if os.getenv("KAGGLE_IS_COMPETITION_RERUN"):
    inference_server.serve()
else:
    inference_server.run_local_gateway((DATA_PATH,))
