In [12]:
# ==============================================
# End-to-end (Gender-Specific Models) + BMI feature:
# Load → Detect ID/Target/Gender → Drop MTRANS/SMOKE → +BMI →
# Split by Gender → Per-gender 5-Fold XGB (ES) → Predict → submission.csv
# ==============================================

# -------- Imports --------
import os
import numpy as np
import pandas as pd

from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.base import clone

import xgboost as xgb

# -------- Paths --------
TRAIN_PATH = "train.csv"
TEST_PATH = "test.csv"
SAMPLE_SUB_PATH = "sample_submission.csv"

RANDOM_STATE = 42
N_FOLDS = 5
N_JOBS = -1
NUM_CLASSES_EXPECTED = 7   # used only for a sanity warning

# -------- Helpers --------
def norm_col(s: str) -> str:
    if s is None: return s
    return str(s).replace("\ufeff", "").strip().lower()

def build_norm_map(cols):
    fwd = {c: norm_col(c) for c in cols}
    rev = {}
    for orig, n in fwd.items():
        if n not in rev:
            rev[n] = orig
    return fwd, rev

def find_id_and_label(sample_sub, train, test):
    ss_fwd, ss_rev = build_norm_map(sample_sub.columns)
    tr_fwd, tr_rev = build_norm_map(train.columns)
    te_fwd, te_rev = build_norm_map(test.columns)

    ss_norm_cols = [ss_fwd[c] for c in sample_sub.columns]
    tr_norm_cols = [tr_fwd[c] for c in train.columns]
    te_norm_cols = [te_fwd[c] for c in test.columns]

    id_norm, label_norm = None, None
    if len(ss_norm_cols) == 2:
        c1, c2 = ss_norm_cols
        if c1 in te_norm_cols and c2 not in te_norm_cols:
            id_norm, label_norm = c1, c2
        elif c2 in te_norm_cols and c1 not in te_norm_cols:
            id_norm, label_norm = c2, c1
        else:
            if c1 in te_norm_cols and c1 in tr_norm_cols:
                id_norm, label_norm = c1, c2
            elif c2 in te_norm_cols and c2 in tr_norm_cols:
                id_norm, label_norm = c2, c1

    if id_norm is None:
        for cand in ["id", "row_id", "index", "sample_id"]:
            if cand in te_norm_cols and cand in tr_norm_cols:
                id_norm = cand
                break

    if label_norm is None:
        candidates = [c for c in ss_norm_cols if c != id_norm]
        if len(candidates) == 1:
            label_norm = candidates[0]

    if label_norm is None:
        for cand in ["label", "target", "class", "y", "weightcategory", "nobeyesdad"]:
            if cand in tr_norm_cols and cand != id_norm:
                label_norm = cand
                break

    if label_norm is None:
        for c in reversed(tr_norm_cols):
            if c != id_norm:
                label_norm = c
                break

    return {
        "id_norm": id_norm,
        "label_norm": label_norm,
        "id_in_train": build_norm_map(train.columns)[1].get(id_norm, None),
        "id_in_test": build_norm_map(test.columns)[1].get(id_norm, None),
        "id_in_sample": build_norm_map(sample_sub.columns)[1].get(id_norm, None),
        "label_in_train": build_norm_map(train.columns)[1].get(label_norm, None),
        "label_in_sample": build_norm_map(sample_sub.columns)[1].get(label_norm, None),
    }

def infer_feature_types(df):
    cat_cols = df.select_dtypes(include=["object", "category", "bool"]).columns.tolist()
    num_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    return num_cols, cat_cols

def detect_gender_column(df):
    # try common names
    candidates = [c for c in df.columns if norm_col(c) in {"gender","sex"}]
    if candidates:
        return candidates[0]
    # weak fallback: any column with two unique values that looks like M/F
    for c in df.columns:
        vals = pd.Series(df[c].dropna().astype(str).str.lower().str.strip()).unique()
        if len(vals) in (2, 3):
            if any(v.startswith("m") for v in vals) and any(v.startswith("f") for v in vals):
                return c
    return None

def split_by_gender(series):
    s = series.astype(str).str.lower().str.strip()
    male_mask = s.str.startswith(("m","1","true"))
    female_mask = s.str.startswith(("f","0","false"))
    if male_mask.sum()==0 and female_mask.sum()==0:
        top = s.value_counts().index.tolist()
        if len(top)>=2:
            male_mask = s==top[0]
            female_mask = s==top[1]
    return male_mask, female_mask

def add_bmi(df):
    """Compute BMI = Weight / (Height_m^2) with robust height-unit detection."""
    if ("Weight" in df.columns) and ("Height" in df.columns):
        h = df["Height"].astype(float)
        # If median height > 3 assume cm and convert to meters
        height_m = np.where(h.median() > 3.0, h / 100.0, h)
        with np.errstate(divide="ignore", invalid="ignore"):
            bmi = df["Weight"].astype(float) / (np.power(height_m, 2) + 1e-12)
        df["BMI"] = bmi.replace([np.inf, -np.inf], np.nan)
    return df

# -------- Load data --------
train = pd.read_csv(TRAIN_PATH)
test  = pd.read_csv(TEST_PATH)
sample_sub = pd.read_csv(SAMPLE_SUB_PATH)

# Drop not-used columns
for c in ["MTRANS","SMOKE"]:
    if c in train.columns: train.drop(columns=[c], inplace=True)
    if c in test.columns:  test.drop(columns=[c], inplace=True)

# >>> Add BMI feature (train & test) <<<
train = add_bmi(train)
test  = add_bmi(test)

info = find_id_and_label(sample_sub, train, test)

ID_COL_TRAIN   = info["id_in_train"]
ID_COL_TEST    = info["id_in_test"]
ID_COL_SAMPLE  = info["id_in_sample"]
TARGET_COL     = info["label_in_train"]
LABEL_COL_SAMP = info["label_in_sample"]

if TARGET_COL is None:
    raise ValueError("Could not detect the target column. Please ensure sample_submission and train headers align.")
if LABEL_COL_SAMP is None:
    ss_cols = list(sample_sub.columns)
    others = [c for c in ss_cols if c != ID_COL_SAMPLE]
    if len(others)==1:
        LABEL_COL_SAMP = others[0]
    else:
        raise ValueError("Could not detect label header in sample_submission.csv")

print(f"[Detected] Target in train: '{TARGET_COL}', Label in sample_sub: '{LABEL_COL_SAMP}'")
if ID_COL_TRAIN and ID_COL_TEST:
    print(f"[Detected] ID in train: '{ID_COL_TRAIN}', ID in test: '{ID_COL_TEST}'")

# -------- Target / Features --------
y = train[TARGET_COL].copy()
X = train.drop(columns=[TARGET_COL]).copy()
if ID_COL_TRAIN in X.columns:
    X.drop(columns=[ID_COL_TRAIN], inplace=True)

test_features = test.copy()
if ID_COL_TEST in test_features.columns:
    test_ids = test_features[ID_COL_TEST].copy()
    test_features.drop(columns=[ID_COL_TEST], inplace=True)
else:
    test_ids = pd.Series(np.arange(len(test_features)), name="id")

# -------- Label encode target --------
le = LabelEncoder()
y_enc = le.fit_transform(y)
classes = list(le.classes_)
if len(classes) != NUM_CLASSES_EXPECTED:
    print(f"[Warn] Expected {NUM_CLASSES_EXPECTED} classes but found {len(classes)}. Proceeding.")

# -------- Detect gender column and split --------
gender_col = detect_gender_column(pd.concat([X, test_features], axis=0))
if gender_col is None:
    raise ValueError("Could not detect a gender column (e.g., 'Gender' or 'SEX'). Please confirm the column name.")

male_mask, female_mask = split_by_gender(train[gender_col])
test_male_mask, test_female_mask = split_by_gender(test_features[gender_col])

print(f"[Info] Train male rows: {int(male_mask.sum())} | female rows: {int(female_mask.sum())}")
print(f"[Info] Test  male rows: {int(test_male_mask.sum())} | female rows: {int(test_female_mask.sum())}")

# We drop gender col inside each group (it's constant after split)
def train_group_and_predict(X_grp, y_enc_grp, test_grp, group_name):
    # remove gender from features
    cols_to_use = [c for c in X_grp.columns if c != gender_col]
    Xg = X_grp[cols_to_use].copy()
    Xtestg = test_grp[cols_to_use].copy()

    # feature types
    num_cols, cat_cols = infer_feature_types(Xg)

    # Preprocessor: sparse OHE (works with xgb DMatrix)
    numeric_transformer = Pipeline(steps=[
        ("imputer", SimpleImputer(strategy="median")),
        ("scaler", StandardScaler(with_mean=False))
    ])
    try:
        ohe = OneHotEncoder(handle_unknown="ignore", sparse_output=True)
    except TypeError:
        ohe = OneHotEncoder(handle_unknown="ignore", sparse=True)

    categorical_transformer = Pipeline(steps=[
        ("imputer", SimpleImputer(strategy="most_frequent")),
        ("onehot", ohe)
    ])

    preprocessor = ColumnTransformer(
        transformers=[
            ("num", numeric_transformer, num_cols),
            ("cat", categorical_transformer, cat_cols),
        ],
        remainder="drop",
        sparse_threshold=1.0
    )

    # XGBoost params
    xgb_params = {
        "objective": "multi:softprob",
        "num_class": len(classes),
        "eval_metric": "mlogloss",
        "tree_method": "hist",
        "max_depth": 6,
        "min_child_weight": 2,
        "subsample": 0.9,
        "colsample_bytree": 0.9,
        "lambda": 1.0,
        "alpha": 0.0,
        "eta": 0.03,
        "nthread": N_JOBS,
        "seed": RANDOM_STATE,
    }
    NUM_BOOST_ROUND = 20000
    EARLY_STOP = 200

    skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=RANDOM_STATE)
    oof_group = np.zeros((len(Xg), len(classes)), dtype=np.float32)
    test_group_pred = np.zeros((len(Xtestg), len(classes)), dtype=np.float32)
    fold_best = []
    fold_metrics = []

    for fold, (tr_idx, va_idx) in enumerate(skf.split(Xg, y_enc_grp), start=1):
        print(f"\n[{group_name}] Fold {fold}/{N_FOLDS}")
        X_tr, X_va = Xg.iloc[tr_idx], Xg.iloc[va_idx]
        y_tr, y_va = y_enc_grp[tr_idx], y_enc_grp[va_idx]

        prep = clone(preprocessor)
        Xtr = prep.fit_transform(X_tr)
        Xva = prep.transform(X_va)

        dtrain = xgb.DMatrix(Xtr, label=y_tr)
        dval   = xgb.DMatrix(Xva, label=y_va)

        bst = xgb.train(
            params=xgb_params,
            dtrain=dtrain,
            num_boost_round=NUM_BOOST_ROUND,
            evals=[(dtrain, "train"), (dval, "valid")],
            early_stopping_rounds=EARLY_STOP,
            verbose_eval=False
        )
        best_round = int(bst.best_iteration + 1)
        fold_best.append(best_round)
        print(f"[{group_name}] Best iteration: {best_round}")

        oof_proba = bst.predict(dval, iteration_range=(0, best_round))
        oof_group[va_idx] = oof_proba
        oof_labels = np.argmax(oof_proba, axis=1)
        acc = accuracy_score(y_va, oof_labels)
        f1m = f1_score(y_va, oof_labels, average="macro")
        fold_metrics.append((acc, f1m))
        print(f"[{group_name}] Acc: {acc:.4f} | Macro F1: {f1m:.4f}")

        # test preds for this fold
        Xtest_tf = prep.transform(Xtestg)
        dtest = xgb.DMatrix(Xtest_tf)
        test_group_pred += bst.predict(dtest, iteration_range=(0, best_round)) / N_FOLDS

    # OOF summary for the group
    oof_argmax = np.argmax(oof_group, axis=1)
    acc_g = accuracy_score(y_enc_grp, oof_argmax)
    f1_g = f1_score(y_enc_grp, oof_argmax, average="macro")
    print(f"\n[{group_name}] OOF Accuracy: {acc_g:.4f} | Macro F1: {f1_g:.4f}")
    print(f"[{group_name}] Best iterations: {fold_best} | Median: {int(np.median(fold_best))}")

    return oof_group, test_group_pred

# -------- Run male model --------
X_male = X[male_mask].reset_index(drop=True)
y_male_enc = y_enc[male_mask]
test_male = test_features[test_male_mask].reset_index(drop=True)

male_oof, male_test_pred = train_group_and_predict(X_male, y_male_enc, test_male, "MALE")

# -------- Run female model --------
X_female = X[female_mask].reset_index(drop=True)
y_female_enc = y_enc[female_mask]
test_female = test_features[test_female_mask].reset_index(drop=True)

female_oof, female_test_pred = train_group_and_predict(X_female, y_female_enc, test_female, "FEMALE")

# -------- Combine OOF for overall report --------
oof_full = np.zeros((len(X), len(classes)), dtype=np.float32)
oof_full[male_mask.values] = male_oof
oof_full[female_mask.values] = female_oof

oof_labels = np.argmax(oof_full, axis=1)
oof_acc = accuracy_score(y_enc, oof_labels)
oof_f1 = f1_score(y_enc, oof_labels, average="macro")
print("\n========== OVERALL OOF ==========")
print(f"OOF Accuracy: {oof_acc:.4f} | OOF Macro F1: {oof_f1:.4f}")
try:
    print("\nOOF Classification Report:\n",
          classification_report(y_enc, oof_labels, target_names=classes))
except Exception as e:
    print(f"[Info] Could not print classification report: {e}")

# -------- Build full test predictions by placing group preds back to original order --------
test_pred_proba = np.zeros((len(test_features), len(classes)), dtype=np.float32)
test_pred_proba[test_male_mask.values] = male_test_pred
test_pred_proba[test_female_mask.values] = female_test_pred

test_pred_int = np.argmax(test_pred_proba, axis=1)
test_pred_labels = le.inverse_transform(test_pred_int)

# -------- Build submission --------
ss_cols = list(sample_sub.columns)
ID_HEADER = ID_COL_SAMPLE if ID_COL_SAMPLE in sample_sub.columns else None
LABEL_HEADER = LABEL_COL_SAMP

sub = pd.DataFrame()
if ID_HEADER is not None and ID_COL_TEST in test.columns:
    sub[ID_HEADER] = test[ID_COL_TEST].values
elif ID_HEADER is not None:
    sub[ID_HEADER] = np.arange(len(test_features))
sub[LABEL_HEADER] = test_pred_labels

# Reorder/complete to match sample_sub exactly
for c in ss_cols:
    if c not in sub.columns:
        sub[c] = sample_sub[c].iloc[0] if len(sample_sub[c]) else None
sub = sub[ss_cols]

sub.to_csv("submission.csv", index=False)
print("\nSaved submission.csv")
print(sub.head(10))


[Detected] Target in train: 'WeightCategory', Label in sample_sub: 'WeightCategory'
[Detected] ID in train: 'id', ID in test: 'id'
[Info] Train male rows: 7783 | female rows: 7750
[Info] Test  male rows: 2553 | female rows: 2672

[MALE] Fold 1/5
[MALE] Best iteration: 302
[MALE] Acc: 0.8915 | Macro F1: 0.7553

[MALE] Fold 2/5
[MALE] Best iteration: 316
[MALE] Acc: 0.8915 | Macro F1: 0.7552

[MALE] Fold 3/5
[MALE] Best iteration: 390
[MALE] Acc: 0.8838 | Macro F1: 0.7476

[MALE] Fold 4/5
[MALE] Best iteration: 353
[MALE] Acc: 0.8869 | Macro F1: 0.7491

[MALE] Fold 5/5
[MALE] Best iteration: 309
[MALE] Acc: 0.8830 | Macro F1: 0.7465

[MALE] OOF Accuracy: 0.8873 | Macro F1: 0.7508
[MALE] Best iterations: [302, 316, 390, 353, 309] | Median: 316

[FEMALE] Fold 1/5
[FEMALE] Best iteration: 318
[FEMALE] Acc: 0.9174 | Macro F1: 0.7515

[FEMALE] Fold 2/5
[FEMALE] Best iteration: 314
[FEMALE] Acc: 0.9219 | Macro F1: 0.7522

[FEMALE] Fold 3/5
[FEMALE] Best iteration: 298
[FEMALE] Acc: 0.9090 | Ma

In [7]:
# ==============================================================
# Robust tuning for your winning setup (pure XGB, no ensembles)
# Gender-split XGBoost + BMI + OHE + EarlyStopping
# Optuna (tight ranges) + multi-seed CV stability
# Produces: submission.csv
# ==============================================================

import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, f1_score

import xgboost as xgb
import optuna

# ---------------- Config ----------------
TRAIN_PATH = "train.csv"
TEST_PATH  = "test.csv"
SAMPLE_SUB_PATH = "sample_submission.csv"

RANDOM_STATE = 42
N_JOBS = -1
N_FOLDS = 5
EARLY_STOP = 200

# Tuning budget (tight but effective)
TRIALS_MALE = 30
TRIALS_FEMALE = 30

# Use multiple CV shuffles during tuning for stability
CV_SEEDS = [42, 2027, 1337]   # add/remove seeds to trade speed vs stability

# ---------------- Helpers ----------------
def add_bmi(df):
    if ("Weight" in df.columns) and ("Height" in df.columns):
        h = df["Height"].astype(float)
        hm = np.where(h.median() > 3.0, h/100.0, h)
        with np.errstate(divide="ignore", invalid="ignore"):
            bmi = df["Weight"].astype(float) / (hm**2 + 1e-12)
        df["BMI"] = pd.Series(bmi).replace([np.inf, -np.inf], np.nan).clip(10, 80)
    return df

def make_preprocessor(num_cols, cat_cols):
    num_pipe = Pipeline([
        ("imputer", SimpleImputer(strategy="median")),
        ("scaler", StandardScaler(with_mean=False))
    ])
    try:
        ohe = OneHotEncoder(handle_unknown="ignore", sparse_output=True)
    except TypeError:
        ohe = OneHotEncoder(handle_unknown="ignore", sparse=True)
    cat_pipe = Pipeline([
        ("imputer", SimpleImputer(strategy="most_frequent")),
        ("onehot", ohe),
    ])
    return ColumnTransformer(
        transformers=[
            ("num", num_pipe, num_cols),
            ("cat", cat_pipe, cat_cols)
        ],
        remainder="drop",
        sparse_threshold=1.0
    )

def detect_gender(df):
    for c in df.columns:
        if c.lower() in ("gender","sex"):
            return c
    raise ValueError("Could not detect gender column (expected Gender or Sex).")

def detect_id_and_target(sample_sub):
    return sample_sub.columns[0], sample_sub.columns[1]

def infer_cols(df):
    cat_cols = df.select_dtypes(include=["object","category","bool"]).columns.tolist()
    num_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    return num_cols, cat_cols

# Baseline params you’ve been using (we’ll tune around these)
BASELINE = {
    "eta": 0.03,
    "max_depth": 6,
    "min_child_weight": 2.0,
    "subsample": 0.90,
    "colsample_bytree": 0.90,
    "reg_lambda": 1.0,
    "reg_alpha": 0.0,
    "gamma": 0.0,
    "max_delta_step": 0.0,
    "num_boost_round": 20000,  # with ES it will stop earlier
}

def suggest_params(trial):
    # Tight ranges around the baseline to avoid CV-overfit drift
    return {
        "eta": trial.suggest_float("eta", 0.02, 0.06, log=True),
        "max_depth": trial.suggest_int("max_depth", 5, 8),
        "min_child_weight": trial.suggest_float("min_child_weight", 1.0, 4.5),
        "subsample": trial.suggest_float("subsample", 0.75, 0.95),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.70, 0.95),
        "reg_lambda": trial.suggest_float("reg_lambda", 0.8, 2.5, log=True),
        "reg_alpha": trial.suggest_float("reg_alpha", 0.0, 0.6),
        "gamma": trial.suggest_float("gamma", 0.0, 0.4),
        "max_delta_step": trial.suggest_float("max_delta_step", 0.0, 3.0),
        # We’ll let early stopping pick the effective iteration; still bound a reasonable cap:
        "num_boost_round": trial.suggest_int("num_boost_round", 2000, 12000, step=1000),
    }

def tune_group(Xg, yg, label, num_class, trials):
    num_cols, cat_cols = infer_cols(Xg)

    def objective(trial):
        hp = suggest_params(trial)

        scores_across_seeds = []
        for cv_seed in CV_SEEDS:
            skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=cv_seed)
            fold_scores = []

            for tr_idx, va_idx in skf.split(Xg, yg):
                X_tr, X_va = Xg.iloc[tr_idx], Xg.iloc[va_idx]
                y_tr, y_va = yg[tr_idx], yg[va_idx]

                pre = make_preprocessor(num_cols, cat_cols)
                Xtr = pre.fit_transform(X_tr)
                Xva = pre.transform(X_va)

                dtrain = xgb.DMatrix(Xtr, label=y_tr)
                dvalid = xgb.DMatrix(Xva, label=y_va)

                params = {
                    "objective": "multi:softprob",
                    "num_class": num_class,
                    "eval_metric": "mlogloss",
                    "tree_method": "hist",
                    "eta": hp["eta"],
                    "max_depth": int(hp["max_depth"]),
                    "min_child_weight": float(hp["min_child_weight"]),
                    "subsample": float(hp["subsample"]),
                    "colsample_bytree": float(hp["colsample_bytree"]),
                    "reg_lambda": float(hp["reg_lambda"]),
                    "reg_alpha": float(hp["reg_alpha"]),
                    "gamma": float(hp["gamma"]),
                    "max_delta_step": float(hp["max_delta_step"]),
                    "nthread": N_JOBS,
                    "seed": cv_seed,
                    "verbosity": 0,
                }

                bst = xgb.train(
                    params=params,
                    dtrain=dtrain,
                    num_boost_round=hp["num_boost_round"],
                    evals=[(dtrain, "train"), (dvalid, "valid")],
                    early_stopping_rounds=EARLY_STOP,
                    verbose_eval=False
                )

                pred_va = bst.predict(dvalid, iteration_range=(0, bst.best_iteration + 1))
                y_hat = np.argmax(pred_va, axis=1)
                fold_scores.append(accuracy_score(y_va, y_hat))

            scores_across_seeds.append(np.mean(fold_scores))

            # report progress so TPE can prune bad regions
            trial.report(np.mean(scores_across_seeds), step=len(scores_across_seeds))
            if trial.should_prune():
                raise optuna.TrialPruned()

        return float(np.mean(scores_across_seeds))

    study = optuna.create_study(direction="maximize")
    print(f"\n[{label}] Robust tuning: {trials} trials × {len(CV_SEEDS)} seeds × {N_FOLDS}-fold CV")
    study.optimize(objective, n_trials=trials, show_progress_bar=True)

    print(f"[{label}] Best CV Acc: {study.best_value:.5f}")
    print(f"[{label}] Best params:\n{study.best_params}")
    return study.best_params

def train_cv_predict(Xg, yg, Xtestg, params, num_class, label):
    num_cols, cat_cols = infer_cols(Xg)
    skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=RANDOM_STATE)

    oof = np.zeros((len(Xg), num_class), dtype=np.float32)
    test_pred = np.zeros((len(Xtestg), num_class), dtype=np.float32)
    best_iters = []

    for fold, (tr_idx, va_idx) in enumerate(skf.split(Xg, yg), start=1):
        print(f"[{label}] Fold {fold}/{N_FOLDS}")
        X_tr, X_va = Xg.iloc[tr_idx], Xg.iloc[va_idx]
        y_tr, y_va = yg[tr_idx], yg[va_idx]

        pre = make_preprocessor(num_cols, cat_cols)
        Xtr = pre.fit_transform(X_tr)
        Xva = pre.transform(X_va)
        Xte = pre.transform(Xtestg)

        dtrain = xgb.DMatrix(Xtr, label=y_tr)
        dvalid = xgb.DMatrix(Xva, label=y_va)
        dtest  = xgb.DMatrix(Xte)

        train_params = {
            "objective": "multi:softprob",
            "num_class": num_class,
            "eval_metric": "mlogloss",
            "tree_method": "hist",
            "nthread": N_JOBS,
            "seed": RANDOM_STATE,
            "verbosity": 0,
            "eta": float(params["eta"]),
            "max_depth": int(params["max_depth"]),
            "min_child_weight": float(params["min_child_weight"]),
            "subsample": float(params["subsample"]),
            "colsample_bytree": float(params["colsample_bytree"]),
            "reg_lambda": float(params["reg_lambda"]),
            "reg_alpha": float(params["reg_alpha"]),
            "gamma": float(params["gamma"]),
            "max_delta_step": float(params["max_delta_step"]),
        }

        bst = xgb.train(
            params=train_params,
            dtrain=dtrain,
            num_boost_round=int(params["num_boost_round"]),
            evals=[(dtrain, "train"), (dvalid, "valid")],
            early_stopping_rounds=EARLY_STOP,
            verbose_eval=False
        )
        best_iters.append(int(bst.best_iteration + 1))

        oof[va_idx] = bst.predict(dvalid, iteration_range=(0, bst.best_iteration + 1))
        test_pred += bst.predict(dtest, iteration_range=(0, bst.best_iteration + 1)) / N_FOLDS

    y_oof = np.argmax(oof, axis=1)
    print(f"[{label}] OOF Acc: {accuracy_score(yg, y_oof):.5f} | OOF F1: {f1_score(yg, y_oof, average='macro'):.5f} | median best_iter: {int(np.median(best_iters))}")
    return test_pred

# ==============================================================
# Load & Prepare
# ==============================================================

train = pd.read_csv(TRAIN_PATH)
test  = pd.read_csv(TEST_PATH)
sample_sub = pd.read_csv(SAMPLE_SUB_PATH)

# Drop columns you don’t want
for c in ["MTRANS","SMOKE"]:
    if c in train.columns: train.drop(columns=[c], inplace=True)
    if c in test.columns:  test.drop(columns=[c], inplace=True)

# Add BMI (only engineered feature you kept)
train = add_bmi(train)
test  = add_bmi(test)

# Detect ID/Target
ID_COL, TARGET_COL = detect_id_and_target(sample_sub)
y = train[TARGET_COL].copy()
X = train.drop(columns=[TARGET_COL, ID_COL], errors="ignore")
test_ids = test[ID_COL]
test_X = test.drop(columns=[ID_COL], errors="ignore")

# Label encode target
le = LabelEncoder()
y_enc = le.fit_transform(y)
classes = le.classes_
num_class = len(classes)

# Gender split
gender_col = detect_gender(X)
male_mask   = X[gender_col].astype(str).str.lower().str.startswith(("m"))
female_mask = X[gender_col].astype(str).str.lower().str.startswith(("f"))
test_male_mask   = test_X[gender_col].astype(str).str.lower().str.startswith(("m"))
test_female_mask = test_X[gender_col].astype(str).str.lower().str.startswith(("f"))

print(f"Train: males={male_mask.sum()}, females={female_mask.sum()}")
print(f"Test:  males={test_male_mask.sum()}, females={test_female_mask.sum()}")

X_male,   y_male   = X[male_mask].reset_index(drop=True),   y_enc[male_mask]
X_female, y_female = X[female_mask].reset_index(drop=True), y_enc[female_mask]
test_male   = test_X[test_male_mask].reset_index(drop=True)
test_female = test_X[test_female_mask].reset_index(drop=True)

# ==============================================================
# Tune (tight ranges, multi-seed stability) per gender
# ==============================================================

best_male   = tune_group(X_male, y_male, "MALE",   num_class, TRIALS_MALE)
best_female = tune_group(X_female, y_female, "FEMALE", num_class, TRIALS_FEMALE)

# ==============================================================
# Train with best params and predict (5-fold avg)
# ==============================================================

pred_male   = train_cv_predict(X_male, y_male, test_male,   best_male,   num_class, "MALE")
pred_female = train_cv_predict(X_female, y_female, test_female, best_female, num_class, "FEMALE")

# Merge predictions and build submission
final_proba = np.zeros((len(test_X), num_class), dtype=np.float32)
final_proba[test_male_mask.values]   = pred_male
final_proba[test_female_mask.values] = pred_female
final_pred = le.inverse_transform(np.argmax(final_proba, axis=1))

sub = pd.DataFrame({ID_COL: test_ids, TARGET_COL: final_pred})
sub.to_csv("submission.csv", index=False)
print("\nSaved submission.csv ✅")
print(sub.head())


[I 2025-10-23 14:36:15,894] A new study created in memory with name: no-name-b02d92ff-280b-47c7-8868-6387fde293d8


Train: males=7783, females=7750
Test:  males=2553, females=2672

[MALE] Robust tuning: 30 trials × 3 seeds × 5-fold CV


  0%|          | 0/30 [00:00<?, ?it/s]

[I 2025-10-23 14:36:54,931] Trial 0 finished with value: 0.8880036477329076 and parameters: {'eta': 0.03144161771601719, 'max_depth': 5, 'min_child_weight': 2.7951119663071418, 'subsample': 0.8422573187846457, 'colsample_bytree': 0.908021857824381, 'reg_lambda': 1.1384175276155883, 'reg_alpha': 0.21000918009944766, 'gamma': 0.1504247093899292, 'max_delta_step': 1.7038570039360976, 'num_boost_round': 3000}. Best is trial 0 with value: 0.8880036477329076.
[I 2025-10-23 14:37:21,757] Trial 1 finished with value: 0.88894612549456 and parameters: {'eta': 0.04797006861249111, 'max_depth': 7, 'min_child_weight': 2.3862238644362206, 'subsample': 0.7975882567788476, 'colsample_bytree': 0.7690008946376732, 'reg_lambda': 1.094292488954696, 'reg_alpha': 0.008815840511338857, 'gamma': 0.25669800208742877, 'max_delta_step': 1.2436470318358337, 'num_boost_round': 4000}. Best is trial 1 with value: 0.88894612549456.
[I 2025-10-23 14:39:00,817] Trial 2 finished with value: 0.8885601636526639 and parame

[I 2025-10-23 14:48:20,162] A new study created in memory with name: no-name-a9b2c401-980a-4014-b60e-79ea6e83a6f3


[I 2025-10-23 14:48:20,146] Trial 29 pruned. 
[MALE] Best CV Acc: 0.88959
[MALE] Best params:
{'eta': 0.04846505000215754, 'max_depth': 7, 'min_child_weight': 2.918361644831322, 'subsample': 0.8132267926559592, 'colsample_bytree': 0.7559932838553789, 'reg_lambda': 1.6449663834471437, 'reg_alpha': 0.27502101084477665, 'gamma': 0.14721930211811146, 'max_delta_step': 0.624651143585174, 'num_boost_round': 5000}

[FEMALE] Robust tuning: 30 trials × 3 seeds × 5-fold CV


  0%|          | 0/30 [00:00<?, ?it/s]

[I 2025-10-23 14:48:50,332] Trial 0 finished with value: 0.916258064516129 and parameters: {'eta': 0.048688319264963274, 'max_depth': 7, 'min_child_weight': 4.282458257461285, 'subsample': 0.880604318698849, 'colsample_bytree': 0.8254782605511541, 'reg_lambda': 1.6682280071337459, 'reg_alpha': 0.22086085323422852, 'gamma': 0.14432055917399045, 'max_delta_step': 2.9614566154582986, 'num_boost_round': 6000}. Best is trial 0 with value: 0.916258064516129.
[I 2025-10-23 14:49:32,215] Trial 1 finished with value: 0.9164731182795699 and parameters: {'eta': 0.02571027148691823, 'max_depth': 8, 'min_child_weight': 2.7489816751332086, 'subsample': 0.7657517572063801, 'colsample_bytree': 0.7827436783781312, 'reg_lambda': 0.946215979389272, 'reg_alpha': 0.5907662282782293, 'gamma': 0.3385902365745781, 'max_delta_step': 2.7321993822920803, 'num_boost_round': 8000}. Best is trial 1 with value: 0.9164731182795699.
[I 2025-10-23 14:49:57,426] Trial 2 finished with value: 0.9183225806451613 and parame

[Detected] Target in train: 'WeightCategory', Label in sample_sub: 'WeightCategory'
[Detected] ID in train: 'id', ID in test: 'id'
[Info] Train male rows: 8851 | female rows: 8793
[Info] Test  male rows: 2553 | female rows: 2672

[MALE] Fold 1/5
[MALE] Best iteration: 409
[MALE] Acc: 0.8984 | Macro F1: 0.7595

[MALE] Fold 2/5
[MALE] Best iteration: 349
[MALE] Acc: 0.8881 | Macro F1: 0.7526

[MALE] Fold 3/5
[MALE] Best iteration: 275
[MALE] Acc: 0.8825 | Macro F1: 0.7455

[MALE] Fold 4/5
[MALE] Best iteration: 336
[MALE] Acc: 0.8960 | Macro F1: 0.7605

[MALE] Fold 5/5
[MALE] Best iteration: 354
[MALE] Acc: 0.9000 | Macro F1: 0.7617

[MALE] OOF Accuracy: 0.8930 | Macro F1: 0.7560
[MALE] Best iterations: [409, 349, 275, 336, 354] | Median: 349

[FEMALE] Fold 1/5
[FEMALE] Best iteration: 334
[FEMALE] Acc: 0.9176 | Macro F1: 0.7531

[FEMALE] Fold 2/5
[FEMALE] Best iteration: 363
[FEMALE] Acc: 0.9346 | Macro F1: 0.7769

[FEMALE] Fold 3/5
[FEMALE] Best iteration: 326
[FEMALE] Acc: 0.9267 | Ma