In [None]:
!pip -q install -U "autogluon==1.5.0" "scikit-learn>=1.3" "pandas>=2.0" "numpy>=1.24"

import os, time, json, warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, log_loss, accuracy_score, classification_report, confusion_matrix

from autogluon.tabular import TabularPredictor

In [None]:
from sklearn.datasets import fetch_openml
df = fetch_openml(data_id=40945, as_frame=True).frame

target = "survived"
df[target] = df[target].astype(int)

drop_cols = [c for c in ["boat", "body", "home.dest"] if c in df.columns]
df = df.drop(columns=drop_cols, errors="ignore")

df = df.replace({None: np.nan})
print("Shape:", df.shape)
print("Target positive rate:", df[target].mean().round(4))
print("Columns:", list(df.columns))

train_df, test_df = train_test_split(
    df,
    test_size=0.2,
    random_state=42,
    stratify=df[target],
)

In [None]:
def has_gpu():
    try:
        import torch
        return torch.cuda.is_available()
    except Exception:
        return False

presets = "extreme" if has_gpu() else "best_quality"

save_path = "/content/autogluon_titanic_advanced"
os.makedirs(save_path, exist_ok=True)

predictor = TabularPredictor(
    label=target,
    eval_metric="roc_auc",
    path=save_path,
    verbosity=2
)

In [None]:
start = time.time()
predictor.fit(
    train_data=train_df,
    presets=presets,
    time_limit=7 * 60,
    num_bag_folds=5,
    num_stack_levels=2,
    refit_full=False
)
train_time = time.time() - start
print(f"\nTraining done in {train_time:.1f}s with presets='{presets}'")

In [None]:
lb = predictor.leaderboard(test_df, silent=True)
print("\n=== Leaderboard (top 15) ===")
display(lb.head(15))

proba = predictor.predict_proba(test_df)
pred = predictor.predict(test_df)

y_true = test_df[target].values
if isinstance(proba, pd.DataFrame) and 1 in proba.columns:
    y_proba = proba[1].values
else:
    y_proba = np.asarray(proba).reshape(-1)

print("\n=== Test Metrics ===")
print("ROC-AUC:", roc_auc_score(y_true, y_proba).round(5))
print("LogLoss:", log_loss(y_true, np.clip(y_proba, 1e-6, 1 - 1e-6)).round(5))
print("Accuracy:", accuracy_score(y_true, pred).round(5))
print("\nClassification report:\n", classification_report(y_true, pred))

In [None]:
if "pclass" in test_df.columns:
    print("\n=== Slice AUC by pclass ===")
    for grp, part in test_df.groupby("pclass"):
        part_proba = predictor.predict_proba(part)
        part_proba = part_proba[1].values if isinstance(part_proba, pd.DataFrame) and 1 in part_proba.columns else np.asarray(part_proba).reshape(-1)
        auc = roc_auc_score(part[target].values, part_proba)
        print(f"pclass={grp}: AUC={auc:.4f} (n={len(part)})")

fi = predictor.feature_importance(test_df, silent=True)
print("\n=== Feature importance (top 20) ===")
display(fi.head(20))

In [None]:
t0 = time.time()
refit_map = predictor.refit_full()
t_refit = time.time() - t0

print(f"\nrefit_full completed in {t_refit:.1f}s")
print("Refit mapping (sample):", dict(list(refit_map.items())[:5]))

lb_full = predictor.leaderboard(test_df, silent=True)
print("\n=== Leaderboard after refit_full (top 15) ===")
display(lb_full.head(15))

best_model = predictor.get_model_best()
full_candidates = [m for m in predictor.get_model_names() if m.endswith("_FULL")]

def bench_infer(model_name, df_in, repeats=3):
    times = []
    for _ in range(repeats):
        t1 = time.time()
        _ = predictor.predict(df_in, model=model_name)
        times.append(time.time() - t1)
    return float(np.median(times))

small_batch = test_df.drop(columns=[target]).head(256)
lat_best = bench_infer(best_model, small_batch)
print(f"\nBest model: {best_model} | median predict() latency on 256 rows: {lat_best:.4f}s")

if full_candidates:
    lb_full_sorted = lb_full.sort_values(by="score_test", ascending=False)
    best_full = lb_full_sorted[lb_full_sorted["model"].str.endswith("_FULL")].iloc[0]["model"]
    lat_full = bench_infer(best_full, small_batch)
    print(f"Best FULL model: {best_full} | median predict() latency on 256 rows: {lat_full:.4f}s")
    print(f"Speedup factor (best / full): {lat_best / max(lat_full, 1e-9):.2f}x")

try:
    t0 = time.time()
    distill_result = predictor.distill(
        train_data=train_df,
        time_limit=4 * 60,
        augment_method="spunge",
    )
    t_distill = time.time() - t0
    print(f"\nDistillation completed in {t_distill:.1f}s")
except Exception as e:
    print("\nDistillation step failed")
    print("Error:", repr(e))

lb2 = predictor.leaderboard(test_df, silent=True)
print("\n=== Leaderboard after distillation attempt (top 20) ===")
display(lb2.head(20))

predictor.save()
reloaded = TabularPredictor.load(save_path)

sample = test_df.drop(columns=[target]).sample(8, random_state=0)
sample_pred = reloaded.predict(sample)
sample_proba = reloaded.predict_proba(sample)

print("\n=== Reloaded predictor sanity-check ===")
print(sample.assign(pred=sample_pred).head())

print("\nProbabilities (head):")
display(sample_proba.head())

artifacts = {
    "path": save_path,
    "presets": presets,
    "best_model": reloaded.get_model_best(),
    "model_names": reloaded.get_model_names(),
    "leaderboard_top10": lb2.head(10).to_dict(orient="records"),
}
with open(os.path.join(save_path, "run_summary.json"), "w") as f:
    json.dump(artifacts, f, indent=2)

print("\nSaved summary to:", os.path.join(save_path, "run_summary.json"))
print("Done.")

Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.5.0
Python Version:     3.12.12
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Thu Oct  2 10:42:05 UTC 2025
CPU Count:          2
Pytorch Version:    2.9.0+cpu
CUDA Version:       CUDA is not available
Memory Avail:       10.04 GB / 12.67 GB (79.2%)
Disk Space Avail:   85.40 GB / 107.72 GB (79.3%)
Presets specified: ['best_quality']
Using hyperparameters preset: hyperparameters='zeroshot'
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=2, num_bag_folds=5, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of t

Shape: (1309, 11)
Target positive rate: 0.382
Columns: ['pclass', 'survived', 'name', 'sex', 'age', 'sibsp', 'parch', 'ticket', 'fare', 'cabin', 'embarked']


Leaderboard on holdout data (DyStack):
                 model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val   fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0      LightGBM_BAG_L2       0.866975   0.837793     roc_auc        1.220580       0.465710  42.812524                 0.035893                0.057318          17.013317            2       True          4
1    LightGBMXT_BAG_L2       0.863580   0.844238     roc_auc        1.228134       0.447580  44.289877                 0.043447                0.039188          18.490669            2       True          3
2  WeightedEnsemble_L4       0.863580   0.844238     roc_auc        1.230090       0.448575  44.294562                 0.001956                0.000994           0.004685            4       True          7
3  WeightedEnsemble_L3       0.863580   0.844238     roc_auc        1.230333       0.448662  44.295093                 0.002198          