In [1]:
# ==============================================
# Gender-specific XGB + BMI + requested feature engineering + Kaggle_test eval
# ==============================================
import numpy as np
import pandas as pd
from collections import Counter

from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.base import clone
import xgboost as xgb

# -------- Paths --------
TRAIN_PATH = "train.csv"
TEST_PATH = "test.csv"
SAMPLE_SUB_PATH = "sample_submission.csv"
KAGGLE_TEST_PATH = "Kaggle_test.csv"  # has WeightCategory ground truth

RANDOM_STATE = 42
N_FOLDS = 5
N_JOBS = -1

# -------- Helpers --------
def norm_col(s: str) -> str:
    if s is None: return s
    return str(s).replace("\ufeff", "").strip().lower()

def infer_feature_types(df):
    cat_cols = df.select_dtypes(include=["object", "category", "bool"]).columns.tolist()
    num_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    return num_cols, cat_cols

def detect_gender_column(df):
    # common names
    for c in df.columns:
        if norm_col(c) in {"gender", "sex"}:
            return c
    # fallback: column that looks like M/F
    for c in df.columns:
        vals = pd.Series(df[c].dropna().astype(str).str.lower().str.strip()).unique()
        if len(vals) in (2, 3):
            if any(v.startswith("m") for v in vals) and any(v.startswith("f") for v in vals):
                return c
    return None

def split_by_gender(series):
    s = series.astype(str).str.lower().str.strip()
    male_mask = s.str.startswith(("m","1","true"))
    female_mask = s.str.startswith(("f","0","false"))
    if male_mask.sum()==0 and female_mask.sum()==0:
        top = s.value_counts().index.tolist()
        if len(top)>=2:
            male_mask = s==top[0]
            female_mask = s==top[1]
    return male_mask, female_mask

def add_bmi(df):
    """Compute BMI = Weight / (Height_m^2).
       If median height > 3 assume cm â†’ convert to meters."""
    if ("Weight" in df.columns) and ("Height" in df.columns):
        h = pd.to_numeric(df["Height"], errors="coerce")
        height_m = np.where(np.nanmedian(h) > 3.0, h / 100.0, h)
        with np.errstate(divide="ignore", invalid="ignore"):
            w = pd.to_numeric(df["Weight"], errors="coerce")
            bmi = w / (np.power(height_m, 2) + 1e-12)
        df["BMI"] = pd.Series(bmi).replace([np.inf, -np.inf], np.nan)
    return df

def safe_lower_str_col(s):
    """Return string-lowered series (preserve NaNs)."""
    return s.astype(str).str.strip().str.lower().where(s.notna(), other=np.nan)

# -------- Load data --------
train = pd.read_csv(TRAIN_PATH)
test  = pd.read_csv(TEST_PATH)
sample_sub = pd.read_csv(SAMPLE_SUB_PATH)

In [2]:
# ===========================================================
# Feature Engineering (as requested)
# ===========================================================

# 1) Drop columns we donâ€™t want
for c in ["MTRANS","SMOKE"]:
    if c in train.columns: train.drop(columns=[c], inplace=True)
    if c in test.columns:  test.drop(columns=[c], inplace=True)

# 2) FAVC * FCVC interaction, then drop FAVC/FCVC
# Normalize FAVC to numeric (yes=1, no=0), robust to capitalization/whitespace
if "FAVC" in train.columns:
    favc_map = {"yes":1, "no":0}
    tr_favc = safe_lower_str_col(train["FAVC"])
    te_favc = safe_lower_str_col(test["FAVC"]) if "FAVC" in test.columns else pd.Series(index=test.index, dtype=object)
    train["FAVC_num"] = tr_favc.map(favc_map)
    if "FAVC" in test.columns:
        test["FAVC_num"] = te_favc.map(favc_map)

# If FCVC present, create interaction
if ("FAVC_num" in train.columns) and ("FCVC" in train.columns):
    train["favc_fcvc_interaction"] = pd.to_numeric(train["FAVC_num"], errors="coerce") * pd.to_numeric(train["FCVC"], errors="coerce")
if ("FAVC_num" in test.columns) and ("FCVC" in test.columns):
    test["favc_fcvc_interaction"]  = pd.to_numeric(test["FAVC_num"], errors="coerce")  * pd.to_numeric(test["FCVC"], errors="coerce")

# Drop originals (FAVC, FCVC, and intermediate FAVC_num)
for c in ["FAVC","FAVC_num","FCVC"]:
    if c in train.columns: train.drop(columns=[c], inplace=True)
    if c in test.columns:  test.drop(columns=[c], inplace=True)

# 3) activity_balance = FAF / (TUE + 1e-6); keep FAF, drop TUE
if ("FAF" in train.columns) and ("TUE" in train.columns):
    train["activity_balance"] = pd.to_numeric(train["FAF"], errors="coerce") / (pd.to_numeric(train["TUE"], errors="coerce") + 1e-6)
if ("FAF" in test.columns) and ("TUE" in test.columns):
    test["activity_balance"]  = pd.to_numeric(test["FAF"], errors="coerce")  / (pd.to_numeric(test["TUE"], errors="coerce")  + 1e-6)

# Drop TUE only (keep FAF)
for c in ["TUE"]:
    if c in train.columns: train.drop(columns=[c], inplace=True)
    if c in test.columns:  test.drop(columns=[c], inplace=True)

# 4) meal_balance = NCP * CAEC_num; map CAEC via provided mapping, then drop NCP & CAEC
# Provided mapping (note: 'no' is lowercase, others have capital first letter)
_caec_map_exact = {'no': 0, 'Sometimes': 1, 'Frequently': 2, 'Always': 3}
# To be robust, weâ€™ll normalize train/test strings, then map back to the exact keys:
def map_caec(series):
    s = series.astype(str).str.strip()
    s_lower = s.str.lower()
    # build normalized map
    norm_map = {'no':0, 'sometimes':1, 'frequently':2, 'always':3}
    return s_lower.map(norm_map)

if "CAEC" in train.columns:
    train["CAEC_num"] = map_caec(train["CAEC"])
if "CAEC" in test.columns:
    test["CAEC_num"]  = map_caec(test["CAEC"])

if ("NCP" in train.columns) and ("CAEC_num" in train.columns):
    train["meal_balance"] = pd.to_numeric(train["NCP"], errors="coerce") * pd.to_numeric(train["CAEC_num"], errors="coerce")
if ("NCP" in test.columns) and ("CAEC_num" in test.columns):
    test["meal_balance"]  = pd.to_numeric(test["NCP"], errors="coerce")  * pd.to_numeric(test["CAEC_num"], errors="coerce")

for c in ["NCP","CAEC"]:
    if c in train.columns: train.drop(columns=[c], inplace=True)
    if c in test.columns:  test.drop(columns=[c], inplace=True)

# 5) BMI (keep as in your original)
train = add_bmi(train)
test  = add_bmi(test)

print("[Info] Engineered features added: favc_fcvc_interaction, activity_balance, meal_balance")
print("[Info] Dropped: SMOKE, MTRANS, FAVC, FCVC, TUE, NCP, CAEC")


[Info] Engineered features added: favc_fcvc_interaction, activity_balance, meal_balance
[Info] Dropped: SMOKE, MTRANS, FAVC, FCVC, TUE, NCP, CAEC


In [3]:
# -------- Detect ID/Target from files (simple logic)
id_col = None
for cand in ["id", "row_id", "index", "sample_id"]:
    if cand in train.columns and cand in test.columns:
        id_col = cand
        break

target_col = None
for cand in ["WeightCategory", "NObeyesdad", "label", "target", "class", "y"]:
    if cand in train.columns:
        target_col = cand
        break
if target_col is None:
    raise ValueError("Could not detect target column in train.csv")

# Build X/y
y = train[target_col].copy()
X = train.drop(columns=[target_col]).copy()
if id_col and id_col in X.columns:
    X.drop(columns=[id_col], inplace=True)

test_features = test.copy()
if id_col and id_col in test_features.columns:
    test_ids = test_features[id_col].copy()
    test_features.drop(columns=[id_col], inplace=True)
else:
    test_ids = pd.Series(np.arange(len(test_features)), name="id")

# Label encode target
le = LabelEncoder()
y_enc = le.fit_transform(y)
classes = list(le.classes_)
print(f"[Info] Classes: {classes}")

# Detect gender and split
gender_col = detect_gender_column(pd.concat([X, test_features], axis=0))
if gender_col is None:
    raise ValueError("Could not detect a gender column (e.g., 'Gender'/'SEX').")
male_mask, female_mask = split_by_gender(train[gender_col])
test_male_mask, test_female_mask = split_by_gender(test_features[gender_col])
print(f"[Info] Train male={int(male_mask.sum())}, female={int(female_mask.sum())}")
print(f"[Info] Test  male={int(test_male_mask.sum())}, female={int(test_female_mask.sum())}")


[Info] Classes: ['Insufficient_Weight', 'Normal_Weight', 'Obesity_Type_I', 'Obesity_Type_II', 'Obesity_Type_III', 'Overweight_Level_I', 'Overweight_Level_II']
[Info] Train male=7783, female=7750
[Info] Test  male=10336, female=10422


In [4]:

# -------- Training function (gender-specific) with class boosting for two classes --------
def train_group_and_predict(X_grp, y_enc_grp, test_grp, group_name,
                            boost_targets=("Overweight_Level_I","Overweight_Level_II"),
                            base_boost=1.50, jitter_amp=0.10):
    # Drop gender column inside a group (constant after split)
    cols_to_use = [c for c in X_grp.columns if c != gender_col]
    Xg = X_grp[cols_to_use].copy()
    Xtestg = test_grp[cols_to_use].copy()

    num_cols, cat_cols = infer_feature_types(Xg)

    # Preprocessor
    numeric_transformer = Pipeline(steps=[
        ("imputer", SimpleImputer(strategy="median")),
        ("scaler", StandardScaler(with_mean=False))
    ])
    try:
        ohe = OneHotEncoder(handle_unknown="ignore", sparse_output=True)
    except TypeError:
        ohe = OneHotEncoder(handle_unknown="ignore", sparse=True)
    categorical_transformer = Pipeline(steps=[
        ("imputer", SimpleImputer(strategy="most_frequent")),
        ("onehot", ohe)
    ])
    preprocessor = ColumnTransformer(
        transformers=[
            ("num", numeric_transformer, num_cols),
            ("cat", categorical_transformer, cat_cols),
        ],
        remainder="drop",
        sparse_threshold=1.0
    )

    # XGB params
    xgb_params = {
        "objective": "multi:softprob",
        "num_class": len(classes),
        "eval_metric": "mlogloss",
        "tree_method": "hist",
        "max_depth": 6,
        "min_child_weight": 2,
        "subsample": 0.9,
        "colsample_bytree": 0.9,
        "lambda": 1.0,
        "alpha": 0.0,
        "eta": 0.03,
        "nthread": N_JOBS,
        "seed": RANDOM_STATE,
    }
    NUM_BOOST_ROUND = 20000
    EARLY_STOP = 200

    skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=RANDOM_STATE)
    oof_group = np.zeros((len(Xg), len(classes)), dtype=np.float32)
    test_group_pred = np.zeros((len(Xtestg), len(classes)), dtype=np.float32)
    fold_best = []

    # map class name -> index
    cls_to_idx = {c: i for i, c in enumerate(classes)}

    for fold, (tr_idx, va_idx) in enumerate(skf.split(Xg, y_enc_grp), start=1):
        print(f"\n[{group_name}] Fold {fold}/{N_FOLDS}")
        X_tr, X_va = Xg.iloc[tr_idx], Xg.iloc[va_idx]
        y_tr, y_va = y_enc_grp[tr_idx], y_enc_grp[va_idx]

        prep = clone(preprocessor)
        Xtr = prep.fit_transform(X_tr)
        Xva = prep.transform(X_va)

        # ---- RANDOM (non-count) WEIGHTS to gently boost two classes ----
        w_tr = np.ones_like(y_tr, dtype=float)
        rng = np.random.default_rng(RANDOM_STATE + fold)  # deterministic per fold
        for t in boost_targets:
            if t in cls_to_idx:
                cls_id = cls_to_idx[t]
                idx_t = np.where(y_tr == cls_id)[0]
                if idx_t.size > 0:
                    jitter = rng.uniform(-jitter_amp, jitter_amp, size=idx_t.size)
                    w_tr[idx_t] = base_boost + jitter
        w_va = np.ones_like(y_va, dtype=float)

        dtrain = xgb.DMatrix(Xtr, label=y_tr, weight=w_tr)
        dval   = xgb.DMatrix(Xva, label=y_va, weight=w_va)

        bst = xgb.train(
            params=xgb_params,
            dtrain=dtrain,
            num_boost_round=NUM_BOOST_ROUND,
            evals=[(dtrain, "train"), (dval, "valid")],
            feval=None,
            early_stopping_rounds=EARLY_STOP,
            verbose_eval=False
        )
        best_round = int(bst.best_iteration + 1)
        fold_best.append(best_round)
        print(f"[{group_name}] Best iteration: {best_round}")

        oof_proba = bst.predict(dval, iteration_range=(0, best_round))
        oof_group[va_idx] = oof_proba

        # test preds for this fold
        Xtest_tf = prep.transform(Xtestg)
        dtest = xgb.DMatrix(Xtest_tf)
        test_group_pred += bst.predict(dtest, iteration_range=(0, best_round)) / N_FOLDS

    # OOF summary for the group
    oof_labels = np.argmax(oof_group, axis=1)
    acc_g = accuracy_score(y_enc_grp, oof_labels)
    f1_g = f1_score(y_enc_grp, oof_labels, average="macro")
    print(f"\n[{group_name}] OOF Accuracy: {acc_g:.4f} | Macro F1: {f1_g:.4f}")
    print(f"[{group_name}] Best iterations: {fold_best} | Median: {int(np.median(fold_best))}")

    return oof_group, test_group_pred



In [5]:

# -------- Train per-gender and predict full test --------
X_male = X[male_mask].reset_index(drop=True)
y_male_enc = y_enc[male_mask]
test_male = test_features[test_male_mask].reset_index(drop=True)

X_female = X[female_mask].reset_index(drop=True)
y_female_enc = y_enc[female_mask]
test_female = test_features[test_female_mask].reset_index(drop=True)

male_oof, male_test_pred = train_group_and_predict(X_male, y_male_enc, test_male, "MALE")
female_oof, female_test_pred = train_group_and_predict(X_female, y_female_enc, test_female, "FEMALE")

# Combine OOF
oof_full = np.zeros((len(X), len(classes)), dtype=np.float32)
oof_full[male_mask.values] = male_oof
oof_full[female_mask.values] = female_oof

oof_labels = np.argmax(oof_full, axis=1)
oof_acc = accuracy_score(y_enc, oof_labels)
oof_f1 = f1_score(y_enc, oof_labels, average="macro")
print("\n========== OVERALL OOF ==========")
print(f"OOF Accuracy: {oof_acc:.4f} | OOF Macro F1: {oof_f1:.4f}")
try:
    print("\nOOF Classification Report:\n",
          classification_report(y_enc, oof_labels, target_names=classes))
except Exception as e:
    print(f"[Info] Could not print classification report: {e}")

# Build full test predictions (for Kaggle submission use-case)
test_pred_proba = np.zeros((len(test_features), len(classes)), dtype=np.float32)
test_pred_proba[test_male_mask.values] = male_test_pred
test_pred_proba[test_female_mask.values] = female_test_pred

test_pred_int = np.argmax(test_pred_proba, axis=1)
test_pred_labels = le.inverse_transform(test_pred_int)






[MALE] Fold 1/5
[MALE] Best iteration: 305

[MALE] Fold 2/5
[MALE] Best iteration: 311

[MALE] Fold 3/5
[MALE] Best iteration: 320

[MALE] Fold 4/5
[MALE] Best iteration: 315

[MALE] Fold 5/5
[MALE] Best iteration: 287

[MALE] OOF Accuracy: 0.8855 | Macro F1: 0.7491
[MALE] Best iterations: [305, 311, 320, 315, 287] | Median: 311

[FEMALE] Fold 1/5
[FEMALE] Best iteration: 311

[FEMALE] Fold 2/5
[FEMALE] Best iteration: 311

[FEMALE] Fold 3/5
[FEMALE] Best iteration: 292

[FEMALE] Fold 4/5
[FEMALE] Best iteration: 319

[FEMALE] Fold 5/5
[FEMALE] Best iteration: 362

[FEMALE] OOF Accuracy: 0.9166 | Macro F1: 0.7494
[FEMALE] Best iterations: [311, 311, 292, 319, 362] | Median: 311

OOF Accuracy: 0.9010 | OOF Macro F1: 0.8916

OOF Classification Report:
                      precision    recall  f1-score   support

Insufficient_Weight       0.93      0.93      0.93      1870
      Normal_Weight       0.88      0.88      0.88      2345
     Obesity_Type_I       0.90      0.86      0.88    

In [6]:

# Submission
ss_cols = list(sample_sub.columns)
ID_HEADER = None
LABEL_HEADER = None
if len(ss_cols) == 2:
    # detect which is ID by presence in test
    c1, c2 = ss_cols
    if c1 in test.columns and c2 not in test.columns:
        ID_HEADER, LABEL_HEADER = c1, c2
    elif c2 in test.columns and c1 not in test.columns:
        ID_HEADER, LABEL_HEADER = c2, c1
if ID_HEADER is None:
    # fallback
    ID_HEADER = ss_cols[0]
    LABEL_HEADER = ss_cols[1]

sub = pd.DataFrame()
if ID_HEADER in test.columns:
    sub[ID_HEADER] = test[ID_HEADER].values
else:
    sub[ID_HEADER] = np.arange(len(test_features))
sub[LABEL_HEADER] = test_pred_labels

# Ensure column order
for c in ss_cols:
    if c not in sub.columns:
        sub[c] = sample_sub[c].iloc[0] if len(sample_sub[c]) else None
sub = sub[ss_cols]

sub.to_csv("submission.csv", index=False)
print("\nSaved submission.csv")
print(sub.head(5))




Saved submission.csv
   id       WeightCategory
0   0  Overweight_Level_II
1   1        Normal_Weight
2   2  Insufficient_Weight
3   3     Obesity_Type_III
4   4  Overweight_Level_II


In [7]:
# ==============================================
# Evaluate on Kaggle_test.csv (with ground truth)
# ==============================================
kdf = pd.read_csv(KAGGLE_TEST_PATH)
if "WeightCategory" not in kdf.columns:
    raise KeyError("Kaggle_test.csv must contain 'WeightCategory'.")

y_true = kdf["WeightCategory"].copy()
X_k = kdf.drop(columns=["WeightCategory"], errors="ignore").copy()
if id_col and id_col in X_k.columns:
    X_k.drop(columns=[id_col], inplace=True)

# same drops + engineered features + BMI
for c in ["MTRANS","SMOKE"]:
    if c in X_k.columns: X_k.drop(columns=[c], inplace=True)

# Recreate the same engineered features for Kaggle_test
# FAVC/FCVC -> favc_fcvc_interaction
if "FAVC" in X_k.columns:
    favc_map = {"yes":1, "no":0}
    favc_num = X_k["FAVC"].astype(str).str.strip().str.lower().map(favc_map)
    X_k["FAVC_num"] = favc_num
if ("FAVC_num" in X_k.columns) and ("FCVC" in X_k.columns):
    X_k["favc_fcvc_interaction"] = pd.to_numeric(X_k["FAVC_num"], errors="coerce") * pd.to_numeric(X_k["FCVC"], errors="coerce")
for c in ["FAVC","FAVC_num","FCVC"]:
    if c in X_k.columns: X_k.drop(columns=[c], inplace=True)

# activity_balance = FAF/(TUE+1e-6); drop TUE
if ("FAF" in X_k.columns) and ("TUE" in X_k.columns):
    X_k["activity_balance"] = pd.to_numeric(X_k["FAF"], errors="coerce") / (pd.to_numeric(X_k["TUE"], errors="coerce") + 1e-6)
if "TUE" in X_k.columns: X_k.drop(columns=["TUE"], inplace=True)

# meal_balance = NCP * CAEC_num; drop NCP, CAEC
if "CAEC" in X_k.columns:
    s = X_k["CAEC"].astype(str).str.strip().str.lower()
    caec_num = s.map({'no':0, 'sometimes':1, 'frequently':2, 'always':3})
    X_k["CAEC_num"] = caec_num
if ("NCP" in X_k.columns) and ("CAEC_num" in X_k.columns):
    X_k["meal_balance"] = pd.to_numeric(X_k["NCP"], errors="coerce") * pd.to_numeric(X_k["CAEC_num"], errors="coerce")
for c in ["NCP","CAEC"]:
    if c in X_k.columns: X_k.drop(columns=[c], inplace=True)

# BMI
X_k = add_bmi(X_k)

# detect gender and split for Kaggle set
gender_col_k = detect_gender_column(X_k)
if gender_col_k is None:
    raise ValueError("Could not detect a gender column in Kaggle_test.csv")
km_k, kf_k = split_by_gender(X_k[gender_col_k])

# Predict on Kaggle by reusing the same training procedure (per gender)
kaggle_pred_proba = np.zeros((len(X_k), len(classes)), dtype=np.float32)

# Build group frames aligned with training columns
def align_cols_for_inference(df_like_train, df_to_align):
    use_cols = [c for c in df_like_train.columns if c in df_to_align.columns]
    missing = [c for c in df_like_train.columns if c not in df_to_align.columns]
    tmp = df_to_align[use_cols].copy()
    for m in missing:
        tmp[m] = np.nan
    return tmp[df_like_train.columns]

if X_male.shape[0] > 0 and km_k.sum() > 0:
    X_k_m = align_cols_for_inference(X_male, X_k[km_k].reset_index(drop=True))
    _, male_k_pred = train_group_and_predict(X_male, y_male_enc, X_k_m, "MALE (Kaggle)")
    kaggle_pred_proba[km_k.values] = male_k_pred
if X_female.shape[0] > 0 and kf_k.sum() > 0:
    X_k_f = align_cols_for_inference(X_female, X_k[kf_k].reset_index(drop=True))
    _, female_k_pred = train_group_and_predict(X_female, y_female_enc, X_k_f, "FEMALE (Kaggle)")
    kaggle_pred_proba[kf_k.values] = female_k_pred

kaggle_pred_idx = np.argmax(kaggle_pred_proba, axis=1)
y_pred = le.inverse_transform(kaggle_pred_idx)



[MALE (Kaggle)] Fold 1/5




[MALE (Kaggle)] Best iteration: 305

[MALE (Kaggle)] Fold 2/5
[MALE (Kaggle)] Best iteration: 311

[MALE (Kaggle)] Fold 3/5
[MALE (Kaggle)] Best iteration: 320

[MALE (Kaggle)] Fold 4/5
[MALE (Kaggle)] Best iteration: 315

[MALE (Kaggle)] Fold 5/5
[MALE (Kaggle)] Best iteration: 287

[MALE (Kaggle)] OOF Accuracy: 0.8855 | Macro F1: 0.7491
[MALE (Kaggle)] Best iterations: [305, 311, 320, 315, 287] | Median: 311

[FEMALE (Kaggle)] Fold 1/5
[FEMALE (Kaggle)] Best iteration: 311

[FEMALE (Kaggle)] Fold 2/5
[FEMALE (Kaggle)] Best iteration: 311

[FEMALE (Kaggle)] Fold 3/5
[FEMALE (Kaggle)] Best iteration: 292

[FEMALE (Kaggle)] Fold 4/5
[FEMALE (Kaggle)] Best iteration: 319

[FEMALE (Kaggle)] Fold 5/5
[FEMALE (Kaggle)] Best iteration: 362

[FEMALE (Kaggle)] OOF Accuracy: 0.9166 | Macro F1: 0.7494
[FEMALE (Kaggle)] Best iterations: [311, 311, 292, 319, 362] | Median: 311


In [8]:

# -------- Overall accuracy to 5 decimals --------
overall_acc = accuracy_score(y_true, y_pred)
print(f"\nâœ… Overall Accuracy on Kaggle_test: {overall_acc:.5f}")

# -------- Text-only error analysis (custom order) --------
order = [
    'Insufficient_Weight',
    'Normal_Weight',
    'Overweight_Level_I',
    'Overweight_Level_II',
    'Obesity_Type_I',
    'Obesity_Type_II',
    'Obesity_Type_III'
]

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_true, y_pred, labels=order)
cm_norm = cm.astype(float) / (cm.sum(axis=1, keepdims=True) + 1e-12)

print("\n=== Confusion Matrix (counts) ===")
print("Predicted â†’")
print("True â†“")
for i, true_class in enumerate(order):
    row = " | ".join(f"{cm[i, j]:4d}" for j in range(len(order)))
    print(f"{true_class:<22}: {row}")

print("\n=== Confusion Matrix (row-normalized) ===")
for i, true_class in enumerate(order):
    row = " | ".join(f"{cm_norm[i, j]:.2f}" for j in range(len(order)))
    print(f"{true_class:<22}: {row}")

print("\n=== Per-class metrics ===")
try:
    print(classification_report(y_true, y_pred, labels=order, target_names=order, digits=4, zero_division=0))
except Exception as e:
    print(f"[Info] classification_report fallback: {e}")
    print(classification_report(y_true, y_pred, digits=4, zero_division=0))

print("\n=== Per-class accuracy (diagonal/row total) ===")
for i, c in enumerate(order):
    total = cm[i].sum()
    correct = cm[i, i]
    acc = correct / total if total > 0 else 0.0
    print(f"{c:<22} | Correct: {correct:3d} / {total:3d} | {acc*100:6.2f}%")

print("\n=== Most common confusions (true â†’ predicted) ===")
pairs = []
for i, t in enumerate(order):
    for j, p in enumerate(order):
        if i == j or cm[i, j] == 0:
            continue
        pairs.append((cm[i, j], t, p, cm_norm[i, j]))
pairs = sorted(pairs, key=lambda x: (-x[0], -x[3]))
for cnt, true_label, pred_label, norm_val in pairs[:10]:
    print(f"{true_label:25} â†’ {pred_label:25} | Count: {cnt:3d} | Row%: {norm_val*100:5.1f}")

print("\n=== Sample of misclassified rows (first 10) ===")
mis_idx = np.where(np.asarray(y_true) != np.asarray(y_pred))[0]
if len(mis_idx) == 0:
    print("ðŸŽ‰ No misclassifications!")
else:
    for idx in mis_idx[:10]:
        true_lab = y_true.iloc[idx] if hasattr(y_true, "iloc") else y_true[idx]
        pred_lab = y_pred[idx]
        conf = float(np.max(kaggle_pred_proba[idx]))
        rank = np.argsort(-kaggle_pred_proba[idx])
        second_idx = rank[1] if rank.size > 1 else rank[0]
        second_lab = le.inverse_transform([second_idx])[0]
        second_conf = float(kaggle_pred_proba[idx][second_idx])
        print(f"Row {idx:4d}: true={true_lab:<22} pred={pred_lab:<22} conf={conf:.3f} 2nd={second_lab:<22}({second_conf:.3f})")


âœ… Overall Accuracy on Kaggle_test: 0.90928

=== Confusion Matrix (counts) ===
Predicted â†’
True â†“
Insufficient_Weight   :  619 |   31 |    3 |    0 |    0 |    0 |    0
Normal_Weight         :   42 |  651 |   37 |    6 |    1 |    0 |    0
Overweight_Level_I    :    4 |   52 |  449 |   69 |    9 |    0 |    0
Overweight_Level_II   :    0 |   16 |   52 |  526 |   42 |    5 |    0
Obesity_Type_I        :    1 |    1 |   10 |   50 |  624 |   15 |    2
Obesity_Type_II       :    0 |    0 |    2 |    5 |   17 |  821 |    0
Obesity_Type_III      :    0 |    0 |    1 |    0 |    1 |    0 | 1061

=== Confusion Matrix (row-normalized) ===
Insufficient_Weight   : 0.95 | 0.05 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00
Normal_Weight         : 0.06 | 0.88 | 0.05 | 0.01 | 0.00 | 0.00 | 0.00
Overweight_Level_I    : 0.01 | 0.09 | 0.77 | 0.12 | 0.02 | 0.00 | 0.00
Overweight_Level_II   : 0.00 | 0.02 | 0.08 | 0.82 | 0.07 | 0.01 | 0.00
Obesity_Type_I        : 0.00 | 0.00 | 0.01 | 0.07 | 0.89 | 0.02 | 0.00
O

In [9]:
# ---- Misclassifications on Kaggle_test: Normal_Weight -> Insufficient_Weight ----
target_true = "Normal_Weight"
target_pred = "Insufficient_Weight"

mask_k = (y_true == target_true) & (y_pred == target_pred)
idx_k = np.where(mask_k)[0]

print(f"\n[Kaggle_test] {target_true} misclassified as {target_pred}: {len(idx_k)} rows")

if len(idx_k) > 0:
    # Grab original rows (with all original columns) from kdf
    mis_k_df = kdf.iloc[idx_k].copy()

    # Attach prediction diagnostics
    pred_probs = kaggle_pred_proba[idx_k]
    pred_idx = np.argmax(pred_probs, axis=1)
    pred_lab = le.inverse_transform(pred_idx)

    # probability of predicted and true classes
    cls_to_idx = {c: i for i, c in enumerate(le.classes_)}
    p_pred = pred_probs[np.arange(len(idx_k)), [cls_to_idx[l] for l in pred_lab]]
    p_true = pred_probs[:, cls_to_idx[target_true]]

    # top-2 info
    top2_idx = np.argsort(-pred_probs, axis=1)[:, :2]
    top2_labels = np.column_stack([le.inverse_transform(top2_idx[:, 0]),
                                   le.inverse_transform(top2_idx[:, 1])])
    top1_prob = pred_probs[np.arange(len(idx_k)), top2_idx[:, 0]]
    top2_prob = pred_probs[np.arange(len(idx_k)), top2_idx[:, 1]]
    margin = top1_prob - top2_prob

    mis_k_df["true_label"] = target_true
    mis_k_df["pred_label"] = pred_lab
    mis_k_df["p_pred"] = p_pred
    mis_k_df["p_true"] = p_true
    mis_k_df["top1"] = top2_labels[:, 0]
    mis_k_df["top2"] = top2_labels[:, 1]
    mis_k_df["top1_prob"] = top1_prob
    mis_k_df["top2_prob"] = top2_prob
    mis_k_df["margin"] = margin

    # Show a few
    print(mis_k_df.head(10))

    # Save them for inspection
    mis_k_path = "miscls_kaggle_normal_to_insufficient.csv"
    mis_k_df.to_csv(mis_k_path, index=False)
    print(f"[Kaggle_test] Saved: {mis_k_path}")
else:
    print("[Kaggle_test] None found.")



[Kaggle_test] Normal_Weight misclassified as Insufficient_Weight: 42 rows
         id  Gender        Age    Height     Weight  \
28    15561  Female  25.000000  1.600000  45.000000   
426   15959    Male  18.000000  1.800000  55.000000   
804   16337    Male  20.000000  1.610000  53.000000   
839   16372    Male  18.000000  1.720000  52.000000   
913   16446  Female  26.000000  1.640000  49.000000   
920   16453    Male  20.000000  1.830000  49.000000   
1198  16731    Male  17.000000  1.800000  60.000000   
1291  16824    Male  18.000000  1.700000  52.000000   
1396  16929    Male  19.000000  1.800000  58.000000   
1620  17153  Female  20.979254  1.546665  41.890204   

     family_history_with_overweight FAVC      FCVC  NCP        CAEC  ...  \
28                               no  yes  3.000000  3.0  Frequently  ...   
426                             yes  yes  3.000000  3.0  Frequently  ...   
804                             yes  yes  2.000000  3.0  Frequently  ...   
839            

In [8]:
# ==============================================
# Gender-specific LightGBM + BMI + requested feature engineering + Kaggle_test eval
# ==============================================
import os
import numpy as np
import pandas as pd
from collections import Counter

from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
from sklearn.base import clone

from lightgbm import early_stopping, log_evaluation
from lightgbm import LGBMClassifier
import lightgbm as lgb
import logging
logging.getLogger('lightgbm').setLevel(logging.ERROR)

# -------- Paths --------
TRAIN_PATH = "train.csv"
TEST_PATH = "test.csv"
SAMPLE_SUB_PATH = "sample_submission.csv"
KAGGLE_TEST_PATH = "Kaggle_test.csv"  # must contain WeightCategory ground truth

RANDOM_STATE = 42
N_FOLDS = 5
N_JOBS = -1

# Your LightGBM hyperparameters
best_params = {
    "objective": "multiclass",
    "metric": "multi_logloss",
    "verbosity": -1,
    "boosting_type": "gbdt",
    "random_state": 42,
    "num_class": 7,
    "learning_rate": 0.030962211546832760,
    "n_estimators": 500,
    "lambda_l1": 0.009667446568254372,
    "lambda_l2": 0.04018641437301800,
    "max_depth": 10,
    "colsample_bytree": 0.40977129346872643,
    "subsample": 0.9535797422450176,
    "min_child_samples": 26
}

# -------- Helpers --------
def norm_col(s: str) -> str:
    if s is None: return s
    return str(s).replace("\ufeff", "").strip().lower()

def infer_feature_types(df):
    cat_cols = df.select_dtypes(include=["object", "category", "bool"]).columns.tolist()
    num_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    return num_cols, cat_cols

def detect_gender_column(df):
    for c in df.columns:
        if norm_col(c) in {"gender", "sex"}:
            return c
    for c in df.columns:
        vals = pd.Series(df[c].dropna().astype(str).str.lower().str.strip()).unique()
        if len(vals) in (2, 3):
            if any(v.startswith("m") for v in vals) and any(v.startswith("f") for v in vals):
                return c
    return None

def split_by_gender(series):
    s = series.astype(str).str.lower().str.strip()
    male_mask = s.str.startswith(("m","1","true"))
    female_mask = s.str.startswith(("f","0","false"))
    if male_mask.sum()==0 and female_mask.sum()==0:
        top = s.value_counts().index.tolist()
        if len(top)>=2:
            male_mask = s==top[0]
            female_mask = s==top[1]
    return male_mask, female_mask

def add_bmi(df):
    """BMI = Weight / (Height_m^2). If median height > 3 assume cm â†’ meters."""
    if ("Weight" in df.columns) and ("Height" in df.columns):
        h = pd.to_numeric(df["Height"], errors="coerce")
        height_m = np.where(np.nanmedian(h) > 3.0, h / 100.0, h)
        with np.errstate(divide="ignore", invalid="ignore"):
            w = pd.to_numeric(df["Weight"], errors="coerce")
            bmi = w / (np.power(height_m, 2) + 1e-12)
        df["BMI"] = pd.Series(bmi).replace([np.inf, -np.inf], np.nan)
    return df

def safe_lower_str_col(s):
    return s.astype(str).str.strip().str.lower().where(s.notna(), other=np.nan)

# -------- Load data --------
train = pd.read_csv(TRAIN_PATH)
test  = pd.read_csv(TEST_PATH)
sample_sub = pd.read_csv(SAMPLE_SUB_PATH)

# ===========================================================
# Feature Engineering (requested)
# ===========================================================
# 1) Drop SMOKE, MTRANS
for c in ["MTRANS","SMOKE"]:
    if c in train.columns: train.drop(columns=[c], inplace=True)
    if c in test.columns:  test.drop(columns=[c], inplace=True)

# 2) FAVC * FCVC interaction, then drop FAVC/FCVC
if "FAVC" in train.columns:
    favc_map = {"yes":1, "no":0}
    train["FAVC_num"] = safe_lower_str_col(train["FAVC"]).map(favc_map)
if "FAVC" in test.columns:
    favc_map = {"yes":1, "no":0}
    test["FAVC_num"]  = safe_lower_str_col(test["FAVC"]).map(favc_map)

if ("FAVC_num" in train.columns) and ("FCVC" in train.columns):
    train["favc_fcvc_interaction"] = pd.to_numeric(train["FAVC_num"], errors="coerce") * pd.to_numeric(train["FCVC"], errors="coerce")
if ("FAVC_num" in test.columns) and ("FCVC" in test.columns):
    test["favc_fcvc_interaction"]  = pd.to_numeric(test["FAVC_num"], errors="coerce")  * pd.to_numeric(test["FCVC"], errors="coerce")

for c in ["FAVC","FAVC_num","FCVC"]:
    if c in train.columns: train.drop(columns=[c], inplace=True)
    if c in test.columns:  test.drop(columns=[c], inplace=True)

# 3) activity_balance = FAF / (TUE + 1e-6); keep FAF, drop TUE
if ("FAF" in train.columns) and ("TUE" in train.columns):
    train["activity_balance"] = pd.to_numeric(train["FAF"], errors="coerce") / (pd.to_numeric(train["TUE"], errors="coerce") + 1e-6)
if ("FAF" in test.columns) and ("TUE" in test.columns):
    test["activity_balance"]  = pd.to_numeric(test["FAF"], errors="coerce")  / (pd.to_numeric(test["TUE"], errors="coerce")  + 1e-6)

for c in ["TUE"]:
    if c in train.columns: train.drop(columns=[c], inplace=True)
    if c in test.columns:  test.drop(columns=[c], inplace=True)

# 4) meal_balance = NCP * CAEC_num; drop NCP & CAEC
def map_caec(series):
    s = series.astype(str).str.strip().str.lower()
    return s.map({'no':0, 'sometimes':1, 'frequently':2, 'always':3})

if "CAEC" in train.columns:
    train["CAEC_num"] = map_caec(train["CAEC"])
if "CAEC" in test.columns:
    test["CAEC_num"]  = map_caec(test["CAEC"])

if ("NCP" in train.columns) and ("CAEC_num" in train.columns):
    train["meal_balance"] = pd.to_numeric(train["NCP"], errors="coerce") * pd.to_numeric(train["CAEC_num"], errors="coerce")
if ("NCP" in test.columns) and ("CAEC_num" in test.columns):
    test["meal_balance"]  = pd.to_numeric(test["NCP"], errors="coerce")  * pd.to_numeric(test["CAEC_num"], errors="coerce")

for c in ["NCP","CAEC"]:
    if c in train.columns: train.drop(columns=[c], inplace=True)
    if c in test.columns:  test.drop(columns=[c], inplace=True)

# 5) BMI
train = add_bmi(train)
test  = add_bmi(test)

print("[Info] Engineered features added: favc_fcvc_interaction, activity_balance, meal_balance")
print("[Info] Dropped: SMOKE, MTRANS, FAVC, FCVC, TUE, NCP, CAEC")


[Info] Engineered features added: favc_fcvc_interaction, activity_balance, meal_balance
[Info] Dropped: SMOKE, MTRANS, FAVC, FCVC, TUE, NCP, CAEC


In [9]:

# -------- Detect ID/Target
id_col = None
for cand in ["id", "row_id", "index", "sample_id"]:
    if cand in train.columns and cand in test.columns:
        id_col = cand
        break

target_col = None
for cand in ["WeightCategory", "NObeyesdad", "label", "target", "class", "y"]:
    if cand in train.columns:
        target_col = cand
        break
if target_col is None:
    raise ValueError("Could not detect target column in train.csv")

# Build X/y
y = train[target_col].copy()
X = train.drop(columns=[target_col]).copy()
if id_col and id_col in X.columns:
    X.drop(columns=[id_col], inplace=True)

test_features = test.copy()
if id_col and id_col in test_features.columns:
    test_ids = test_features[id_col].copy()
    test_features.drop(columns=[id_col], inplace=True)
else:
    test_ids = pd.Series(np.arange(len(test_features)), name="id")

# Label encode target
le = LabelEncoder()
y_enc = le.fit_transform(y)
classes = list(le.classes_)
print(f"[Info] Classes: {classes}")

# Detect gender and split
gender_col = detect_gender_column(pd.concat([X, test_features], axis=0))
if gender_col is None:
    raise ValueError("Could not detect a gender column (e.g., 'Gender'/'SEX').")
male_mask, female_mask = split_by_gender(train[gender_col])
test_male_mask, test_female_mask = split_by_gender(test_features[gender_col])
print(f"[Info] Train male={int(male_mask.sum())}, female={int(female_mask.sum())}")
print(f"[Info] Test  male={int(test_male_mask.sum())}, female={int(test_female_mask.sum())}")



[Info] Classes: ['Insufficient_Weight', 'Normal_Weight', 'Obesity_Type_I', 'Obesity_Type_II', 'Obesity_Type_III', 'Overweight_Level_I', 'Overweight_Level_II']
[Info] Train male=7783, female=7750
[Info] Test  male=10336, female=10422


In [10]:
# -------- Training function (gender-specific) using LightGBM --------
def train_group_and_predict_LGBM(X_grp, y_enc_grp, test_grp, group_name):
    # drop gender col inside group
    cols_to_use = [c for c in X_grp.columns if c != gender_col]
    Xg = X_grp[cols_to_use].copy()
    Xtestg = test_grp[cols_to_use].copy()

    num_cols, cat_cols = infer_feature_types(Xg)

    # Preprocessor (OHE for cats; LightGBM can take categorical indices but OHE keeps parity with earlier code)
    numeric_transformer = Pipeline(steps=[
        ("imputer", SimpleImputer(strategy="median")),
        ("scaler", StandardScaler(with_mean=False))  # scaling not strictly needed for trees, but harmless
    ])
    try:
        ohe = OneHotEncoder(handle_unknown="ignore", sparse_output=True)
    except TypeError:
        ohe = OneHotEncoder(handle_unknown="ignore", sparse=True)
    categorical_transformer = Pipeline(steps=[
        ("imputer", SimpleImputer(strategy="most_frequent")),
        ("onehot", ohe)
    ])
    preprocessor = ColumnTransformer(
        transformers=[
            ("num", numeric_transformer, num_cols),
            ("cat", categorical_transformer, cat_cols),
        ],
        remainder="drop",
        sparse_threshold=1.0
    )

    skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=RANDOM_STATE)
    oof_group = np.zeros((len(Xg), len(classes)), dtype=np.float32)
    test_group_pred = np.zeros((len(Xtestg), len(classes)), dtype=np.float32)
    best_iters = []

    for fold, (tr_idx, va_idx) in enumerate(skf.split(Xg, y_enc_grp), start=1):
        print(f"\n[{group_name}] Fold {fold}/{N_FOLDS}")
        X_tr, X_va = Xg.iloc[tr_idx], Xg.iloc[va_idx]
        y_tr, y_va = y_enc_grp[tr_idx], y_enc_grp[va_idx]

        prep = clone(preprocessor)
        Xtr = prep.fit_transform(X_tr)
        Xva = prep.transform(X_va)

        # init model with provided params
        model = LGBMClassifier(**best_params, n_jobs=N_JOBS)
        callbacks = [early_stopping(stopping_rounds=200), log_evaluation(period=0)]
        model.fit(
            Xtr, y_tr,
            eval_set=[(Xva, y_va)],
            eval_metric="multi_logloss",
            callbacks=callbacks
        )
        best_it = getattr(model, "best_iteration_", best_params.get("n_estimators", 500))
        best_iters.append(int(best_it))

        # OOF
        proba_va = model.predict_proba(Xva, num_iteration=best_it)
        oof_group[va_idx] = proba_va

        # Test transform once per fold to match encoding
        Xtest_tf = prep.transform(Xtestg)
        proba_te = model.predict_proba(Xtest_tf, num_iteration=best_it)
        test_group_pred += proba_te / N_FOLDS

    # Group OOF summary
    oof_labels_g = np.argmax(oof_group, axis=1)
    acc_g = accuracy_score(y_enc_grp, oof_labels_g)
    f1_g = f1_score(y_enc_grp, oof_labels_g, average="macro")
    print(f"\n[{group_name}] OOF Accuracy: {acc_g:.4f} | Macro F1: {f1_g:.4f}")
    print(f"[{group_name}] Best iterations per fold: {best_iters} | Median: {int(np.median(best_iters))}")

    return oof_group, test_group_pred


In [11]:
# -------- Train per-gender and predict full test --------
X_male = X[male_mask].reset_index(drop=True)
y_male_enc = y_enc[male_mask]
test_male = test_features[test_male_mask].reset_index(drop=True)

X_female = X[female_mask].reset_index(drop=True)
y_female_enc = y_enc[female_mask]
test_female = test_features[test_female_mask].reset_index(drop=True)

male_oof, male_test_pred = train_group_and_predict_LGBM(X_male, y_male_enc, test_male, "MALE")
female_oof, female_test_pred = train_group_and_predict_LGBM(X_female, y_female_enc, test_female, "FEMALE")

# Combine OOF
oof_full = np.zeros((len(X), len(classes)), dtype=np.float32)
oof_full[male_mask.values] = male_oof
oof_full[female_mask.values] = female_oof

oof_labels = np.argmax(oof_full, axis=1)
oof_acc = accuracy_score(y_enc, oof_labels)
oof_f1 = f1_score(y_enc, oof_labels, average="macro")
print("\n========== OVERALL OOF ==========")
print(f"OOF Accuracy: {oof_acc:.4f} | OOF Macro F1: {oof_f1:.4f}")
try:
    print("\nOOF Classification Report:\n",
          classification_report(y_enc, oof_labels, target_names=classes, zero_division=0))
except Exception as e:
    print(f"[Info] Could not print classification report: {e}")






[MALE] Fold 1/5
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[173]	valid_0's multi_logloss: 0.335092





[MALE] Fold 2/5
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[178]	valid_0's multi_logloss: 0.312167





[MALE] Fold 3/5
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[204]	valid_0's multi_logloss: 0.326967





[MALE] Fold 4/5
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[195]	valid_0's multi_logloss: 0.320197





[MALE] Fold 5/5
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[166]	valid_0's multi_logloss: 0.340591





[MALE] OOF Accuracy: 0.8881 | Macro F1: 0.7509
[MALE] Best iterations per fold: [173, 178, 204, 195, 166] | Median: 178

[FEMALE] Fold 1/5
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[172]	valid_0's multi_logloss: 0.251829





[FEMALE] Fold 2/5
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[171]	valid_0's multi_logloss: 0.244419





[FEMALE] Fold 3/5
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[143]	valid_0's multi_logloss: 0.277777

[FEMALE] Fold 4/5




Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[174]	valid_0's multi_logloss: 0.245481





[FEMALE] Fold 5/5
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[168]	valid_0's multi_logloss: 0.237603





[FEMALE] OOF Accuracy: 0.9182 | Macro F1: 0.7514
[FEMALE] Best iterations per fold: [172, 171, 143, 174, 168] | Median: 171

OOF Accuracy: 0.9031 | OOF Macro F1: 0.8936

OOF Classification Report:
                      precision    recall  f1-score   support

Insufficient_Weight       0.93      0.93      0.93      1870
      Normal_Weight       0.87      0.88      0.88      2345
     Obesity_Type_I       0.89      0.88      0.88      2207
    Obesity_Type_II       0.96      0.97      0.97      2403
   Obesity_Type_III       1.00      1.00      1.00      2983
 Overweight_Level_I       0.81      0.77      0.79      1844
Overweight_Level_II       0.80      0.82      0.81      1881

           accuracy                           0.90     15533
          macro avg       0.89      0.89      0.89     15533
       weighted avg       0.90      0.90      0.90     15533



In [None]:
# -------- Build submission --------
test_pred_proba = np.zeros((len(test_features), len(classes)), dtype=np.float32)
test_pred_proba[test_male_mask.values] = male_test_pred
test_pred_proba[test_female_mask.values] = female_test_pred

test_pred_int = np.argmax(test_pred_proba, axis=1)
test_pred_labels = le.inverse_transform(test_pred_int)

ss_cols = list(sample_sub.columns)
ID_HEADER = None
LABEL_HEADER = None
if len(ss_cols) == 2:
    c1, c2 = ss_cols
    if c1 in test.columns and c2 not in test.columns:
        ID_HEADER, LABEL_HEADER = c1, c2
    elif c2 in test.columns and c1 not in test.columns:
        ID_HEADER, LABEL_HEADER = c2, c1
if ID_HEADER is None:
    ID_HEADER = ss_cols[0]
    LABEL_HEADER = ss_cols[1]

sub = pd.DataFrame()
if ID_HEADER in test.columns:
    sub[ID_HEADER] = test[ID_HEADER].values
else:
    sub[ID_HEADER] = np.arange(len(test_features))
sub[LABEL_HEADER] = test_pred_labels

for c in ss_cols:
    if c not in sub.columns:
        sub[c] = sample_sub[c].iloc[0] if len(sample_sub[c]) else None
sub = sub[ss_cols]
sub.to_csv("submission.csv", index=False)
print("\nSaved submission.csv")
print(sub.head(5))



Saved submission.csv
   id       WeightCategory
0   0  Overweight_Level_II
1   1        Normal_Weight
2   2  Insufficient_Weight
3   3     Obesity_Type_III
4   4  Overweight_Level_II


In [None]:

# ==============================================
# Evaluate on Kaggle_test.csv (with ground truth)
# ==============================================
kdf = pd.read_csv(KAGGLE_TEST_PATH)
if "WeightCategory" not in kdf.columns:
    raise KeyError("Kaggle_test.csv must contain 'WeightCategory'.")

y_true = kdf["WeightCategory"].copy()
X_k = kdf.drop(columns=["WeightCategory"], errors="ignore").copy()
if id_col and id_col in X_k.columns:
    X_k.drop(columns=[id_col], inplace=True)

# Recreate same engineered features on Kaggle_test
for c in ["MTRANS","SMOKE"]:
    if c in X_k.columns: X_k.drop(columns=[c], inplace=True)

# FAVC/FCVC -> interaction
if "FAVC" in X_k.columns:
    favc_num = safe_lower_str_col(X_k["FAVC"]).map({"yes":1,"no":0})
    X_k["FAVC_num"] = favc_num
if ("FAVC_num" in X_k.columns) and ("FCVC" in X_k.columns):
    X_k["favc_fcvc_interaction"] = pd.to_numeric(X_k["FAVC_num"], errors="coerce") * pd.to_numeric(X_k["FCVC"], errors="coerce")
for c in ["FAVC","FAVC_num","FCVC"]:
    if c in X_k.columns: X_k.drop(columns=[c], inplace=True)

# activity_balance
if ("FAF" in X_k.columns) and ("TUE" in X_k.columns):
    X_k["activity_balance"] = pd.to_numeric(X_k["FAF"], errors="coerce") / (pd.to_numeric(X_k["TUE"], errors="coerce") + 1e-6)
if "TUE" in X_k.columns: X_k.drop(columns=["TUE"], inplace=True)

# meal_balance
if "CAEC" in X_k.columns:
    X_k["CAEC_num"] = map_caec(X_k["CAEC"])
if ("NCP" in X_k.columns) and ("CAEC_num" in X_k.columns):
    X_k["meal_balance"] = pd.to_numeric(X_k["NCP"], errors="coerce") * pd.to_numeric(X_k["CAEC_num"], errors="coerce")
for c in ["NCP","CAEC"]:
    if c in X_k.columns: X_k.drop(columns=[c], inplace=True)

# BMI
X_k = add_bmi(X_k)

# detect gender split for Kaggle set
gender_col_k = detect_gender_column(X_k)
if gender_col_k is None:
    raise ValueError("Could not detect a gender column in Kaggle_test.csv")
km_k, kf_k = split_by_gender(X_k[gender_col_k])

# ---- Re-train each gender model on full train-group and infer on Kaggle subset ----
def align_cols_like(train_like_df, to_align_df):
    use_cols = [c for c in train_like_df.columns if c in to_align_df.columns]
    missing = [c for c in train_like_df.columns if c not in to_align_df.columns]
    tmp = to_align_df[use_cols].copy()
    for m in missing:
        tmp[m] = np.nan
    return tmp[train_like_df.columns]

kaggle_pred_proba = np.zeros((len(X_k), len(classes)), dtype=np.float32)

def train_full_and_predict_LGBM(X_full, y_full, X_eval, name):
    # drop gender col inside group
    cols_to_use = [c for c in X_full.columns if c != gender_col]
    Xf = X_full[cols_to_use].copy()
    Xe = X_eval[cols_to_use].copy()

    num_cols, cat_cols = infer_feature_types(Xf)
    numeric_transformer = Pipeline(steps=[
        ("imputer", SimpleImputer(strategy="median")),
        ("scaler", StandardScaler(with_mean=False))
    ])
    try:
        ohe = OneHotEncoder(handle_unknown="ignore", sparse_output=True)
    except TypeError:
        ohe = OneHotEncoder(handle_unknown="ignore", sparse=True)
    categorical_transformer = Pipeline(steps=[
        ("imputer", SimpleImputer(strategy="most_frequent")),
        ("onehot", ohe)
    ])
    preprocessor = ColumnTransformer(
        transformers=[
            ("num", numeric_transformer, num_cols),
            ("cat", categorical_transformer, cat_cols),
        ],
        remainder="drop",
        sparse_threshold=1.0
    )

    prep = clone(preprocessor)
    Xtr = prep.fit_transform(Xf)
    Xev = prep.transform(Xe)

    model = LGBMClassifier(**best_params, n_jobs=N_JOBS)

    # Full-train: no eval_set here (no early stopping). Older LightGBM versions don't accept 'verbose' kw.
    model.fit(Xtr, y_full)

    # If no validation was used, best_iteration_ may be None. Fall back to default (all trees).
    best_it = getattr(model, "best_iteration_", None)
    if best_it is not None:
        proba = model.predict_proba(Xev, num_iteration=best_it)
    else:
        proba = model.predict_proba(Xev)

    print(f"[Kaggle] Trained {name} on {Xf.shape[0]} rows; infer {Xev.shape[0]} rows.")
    return proba

# Build train groups aligned to X
X_full_male   = X[male_mask].reset_index(drop=True)
y_full_male   = y_enc[male_mask]
X_full_female = X[female_mask].reset_index(drop=True)
y_full_female = y_enc[female_mask]

X_k_male   = align_cols_like(X_full_male,   X_k[km_k].reset_index(drop=True))
X_k_female = align_cols_like(X_full_female, X_k[kf_k].reset_index(drop=True))

if X_full_male.shape[0] > 0 and X_k_male.shape[0] > 0:
    kaggle_pred_proba[km_k.values] = train_full_and_predict_LGBM(X_full_male, y_full_male, X_k_male, "MALE")
if X_full_female.shape[0] > 0 and X_k_female.shape[0] > 0:
    kaggle_pred_proba[kf_k.values] = train_full_and_predict_LGBM(X_full_female, y_full_female, X_k_female, "FEMALE")

kaggle_pred_idx = np.argmax(kaggle_pred_proba, axis=1)
y_pred = le.inverse_transform(kaggle_pred_idx)




[Kaggle] Trained MALE on 7783 rows; infer 2553 rows.
[Kaggle] Trained FEMALE on 7750 rows; infer 2672 rows.




In [15]:
# -------- Overall accuracy to 5 decimals --------
overall_acc = accuracy_score(y_true, y_pred)
print(f"\nâœ… LightGBM Overall Accuracy on Kaggle_test: {overall_acc:.5f}")

# -------- Detailed analysis --------
order = [
    'Insufficient_Weight',
    'Normal_Weight',
    'Overweight_Level_I',
    'Overweight_Level_II',
    'Obesity_Type_I',
    'Obesity_Type_II',
    'Obesity_Type_III'
]

cm = confusion_matrix(y_true, y_pred, labels=order)
cm_norm = cm.astype(float) / (cm.sum(axis=1, keepdims=True) + 1e-12)

print("\n=== Confusion Matrix (counts) ===")
print("Predicted â†’")
print("True â†“")
for i, true_class in enumerate(order):
    row = " | ".join(f"{cm[i, j]:4d}" for j in range(len(order)))
    print(f"{true_class:<22}: {row}")

print("\n=== Confusion Matrix (row-normalized) ===")
for i, true_class in enumerate(order):
    row = " | ".join(f"{cm_norm[i, j]:.2f}" for j in range(len(order)))
    print(f"{true_class:<22}: {row}")

print("\n=== Per-class metrics ===")
try:
    print(classification_report(y_true, y_pred, labels=order, target_names=order, digits=4, zero_division=0))
except Exception as e:
    print(f"[Info] classification_report fallback: {e}")
    print(classification_report(y_true, y_pred, digits=4, zero_division=0))


âœ… LightGBM Overall Accuracy on Kaggle_test: 0.90488

=== Confusion Matrix (counts) ===
Predicted â†’
True â†“
Insufficient_Weight   :  610 |   40 |    3 |    0 |    0 |    0 |    0
Normal_Weight         :   37 |  661 |   33 |    5 |    1 |    0 |    0
Overweight_Level_I    :    4 |   62 |  435 |   71 |   11 |    0 |    0
Overweight_Level_II   :    0 |   16 |   61 |  505 |   54 |    5 |    0
Obesity_Type_I        :    1 |    1 |   10 |   41 |  632 |   16 |    2
Obesity_Type_II       :    0 |    0 |    1 |    4 |   15 |  824 |    1
Obesity_Type_III      :    0 |    1 |    0 |    0 |    1 |    0 | 1061

=== Confusion Matrix (row-normalized) ===
Insufficient_Weight   : 0.93 | 0.06 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00
Normal_Weight         : 0.05 | 0.90 | 0.04 | 0.01 | 0.00 | 0.00 | 0.00
Overweight_Level_I    : 0.01 | 0.11 | 0.75 | 0.12 | 0.02 | 0.00 | 0.00
Overweight_Level_II   : 0.00 | 0.02 | 0.10 | 0.79 | 0.08 | 0.01 | 0.00
Obesity_Type_I        : 0.00 | 0.00 | 0.01 | 0.06 | 0.90 | 0.02

In [16]:
# ==============================================
# Gender-specific CatBoost + BMI + Age rounding
# Train on train.csv â†’ Tune per fold â†’ Predict Kaggle_test.csv
# Prints OOF metrics, Kaggle accuracy, confusion matrix, and report
# ==============================================

import os
import numpy as np
import pandas as pd

from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix

from catboost import CatBoostClassifier, Pool

# -------- Paths --------
TRAIN_PATH = "train.csv"           # your training file
KAGGLE_TEST_PATH = "Kaggle_test.csv"   # contains WeightCategory ground truth

# -------- Config --------
RANDOM_STATE = 42
N_FOLDS = 5
USE_GPU = True        # set False if GPU not available
ITERATIONS = 20000
EARLY_STOP = 200
LEARNING_RATE = 0.03
DEPTH = 8
L2_REG = 3.0

# -------- Helpers --------
def norm_col(s):
    if s is None: return s
    return str(s).replace("\ufeff", "").strip().lower()

def detect_gender_column(df):
    # prefer common names
    for c in df.columns:
        if norm_col(c) in {"gender", "sex"}:
            return c
    # fallback: values that look like M/F
    for c in df.columns:
        vals = pd.Series(df[c].dropna().astype(str).str.lower().str.strip()).unique()
        if len(vals) in (2, 3):
            if any(v.startswith("m") for v in vals) and any(v.startswith("f") for v in vals):
                return c
    return None

def split_by_gender(series):
    s = series.astype(str).str.lower().str.strip()
    male_mask = s.str.startswith(("m","1","true"))
    female_mask = s.str.startswith(("f","0","false"))
    if male_mask.sum()==0 and female_mask.sum()==0:
        top = s.value_counts().index.tolist()
        if len(top)>=2:
            male_mask = s==top[0]
            female_mask = s==top[1]
    return male_mask, female_mask

def add_bmi(df):
    """BMI = Weight / Height(m)^2 ; auto-handle cm heights."""
    if ("Weight" in df.columns) and ("Height" in df.columns):
        h = pd.to_numeric(df["Height"], errors="coerce")
        height_m = np.where(np.nanmedian(h) > 3.0, h/100.0, h)
        w = pd.to_numeric(df["Weight"], errors="coerce")
        with np.errstate(divide="ignore", invalid="ignore"):
            bmi = w / (np.power(height_m, 2) + 1e-12)
        df["BMI"] = pd.Series(bmi).replace([np.inf, -np.inf], np.nan)
    return df

def round_age_inplace(df):
    if "Age" in df.columns:
        df["Age"] = pd.to_numeric(df["Age"], errors="coerce").round().astype("Int64")

def cat_cols_of(df):
    return df.select_dtypes(include=["object", "category", "bool"]).columns.tolist()

def pool_from_df(X, y=None, cat_cols=None):
    cat_idx = [X.columns.get_loc(c) for c in (cat_cols or []) if c in X.columns]
    return Pool(X, label=y, cat_features=cat_idx)

# -------- Load & prepare --------
train = pd.read_csv(TRAIN_PATH)
kdf   = pd.read_csv(KAGGLE_TEST_PATH)

if "WeightCategory" not in kdf.columns:
    raise KeyError("Kaggle_test.csv must contain 'WeightCategory' as ground truth.")

# Minimal cleaning to mirror past runs
for c in ["MTRANS","SCC"]:
    if c in train.columns: train.drop(columns=[c], inplace=True)
    if c in kdf.columns:   kdf.drop(columns=[c], inplace=True)

# Add BMI + round Age
train = add_bmi(train)
kdf   = add_bmi(kdf)
round_age_inplace(train)
round_age_inplace(kdf)

# --- Target / features
target_col = None
for cand in ["WeightCategory", "NObeyesdad", "label", "target", "class", "y"]:
    if cand in train.columns:
        target_col = cand
        break
if target_col is None:
    raise ValueError("Could not detect target column in train.csv.")

y = train[target_col].copy()
X = train.drop(columns=[target_col]).copy()

# label encode target
le = LabelEncoder()
y_enc = le.fit_transform(y)
classes = list(le.classes_)
print(f"[Info] Classes: {classes}")

# detect gender
gender_col = detect_gender_column(X)
if gender_col is None:
    raise ValueError("Could not detect a gender column (e.g. Gender/SEX).")

male_mask, female_mask = split_by_gender(train[gender_col])
print(f"[Info] Train male={int(male_mask.sum())} | female={int(female_mask.sum())}")

# convenience splits
X_male   = X.loc[male_mask].reset_index(drop=True)
y_male   = y_enc[male_mask]
X_female = X.loc[female_mask].reset_index(drop=True)
y_female = y_enc[female_mask]

# -------- per-gender CV training --------
def train_cv_catboost(Xg, yg, group_name):
    # drop gender column inside a group to avoid leakage
    cols = [c for c in Xg.columns if c != gender_col]
    Xg = Xg[cols].copy()
    cat_cols = cat_cols_of(Xg)

    skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=RANDOM_STATE)

    oof_proba = np.zeros((len(Xg), len(classes)), dtype=np.float32)
    best_iters = []

    for fold, (tr_idx, va_idx) in enumerate(skf.split(Xg, yg), start=1):
        print(f"\n[{group_name}] Fold {fold}/{N_FOLDS}")

        X_tr, X_va = Xg.iloc[tr_idx], Xg.iloc[va_idx]
        y_tr, y_va = yg[tr_idx], yg[va_idx]

        train_pool = pool_from_df(X_tr, y_tr, cat_cols)
        valid_pool = pool_from_df(X_va, y_va, cat_cols)

        model = CatBoostClassifier(
            loss_function="MultiClass",
            eval_metric="MultiClass",           # stable early stopping
            auto_class_weights="Balanced",      # handle imbalance
            learning_rate=LEARNING_RATE,
            depth=DEPTH,
            l2_leaf_reg=L2_REG,
            iterations=ITERATIONS,
            random_seed=RANDOM_STATE,
            task_type="GPU" if USE_GPU else "CPU",
            verbose=False
        )

        model.fit(
            train_pool,
            eval_set=valid_pool,
            use_best_model=True,
            early_stopping_rounds=EARLY_STOP,
            verbose=False
        )
        best_it = getattr(model, "best_iteration_", None)
        if best_it is None:
            best_it = ITERATIONS
        best_iters.append(int(best_it))

        # OOF proba
        oof_proba[va_idx] = model.predict_proba(valid_pool)

        # fold metrics
        pred = np.argmax(oof_proba[va_idx], axis=1)
        acc = accuracy_score(y_va, pred)
        f1m = f1_score(y_va, pred, average="macro")
        print(f"[{group_name}] best_iter={best_it} | Acc={acc:.4f} | MacroF1={f1m:.4f}")

    # group summary
    labels = np.argmax(oof_proba, axis=1)
    acc_g = accuracy_score(yg, labels)
    f1_g  = f1_score(yg, labels, average="macro")
    print(f"\n[{group_name}] OOF Accuracy: {acc_g:.4f} | Macro F1: {f1_g:.4f}")
    print(f"[{group_name}] Best iterations per fold: {best_iters} | Median: {int(np.median(best_iters))}")

    return oof_proba, cols, cat_cols, int(np.median(best_iters))

male_oof, male_cols, male_cat_cols, male_best_it = train_cv_catboost(X_male, y_male, "MALE")
female_oof, female_cols, female_cat_cols, female_best_it = train_cv_catboost(X_female, y_female, "FEMALE")

# overall OOF
oof_all = np.zeros((len(X), len(classes)), dtype=np.float32)
oof_all[male_mask.values] = male_oof
oof_all[female_mask.values] = female_oof
oof_labels = np.argmax(oof_all, axis=1)
print("\n========== OVERALL OOF ==========")
print("OOF Accuracy:", f"{accuracy_score(y_enc, oof_labels):.4f}",
      "| Macro F1:", f"{f1_score(y_enc, oof_labels, average='macro'):.4f}")
print(classification_report(y_enc, oof_labels, target_names=classes, zero_division=0))

# -------- Train on FULL gender data and evaluate on Kaggle_test --------
# Prepare Kaggle set the same way
y_true = kdf["WeightCategory"].copy()
X_k = kdf.drop(columns=["WeightCategory"], errors="ignore").copy()

for c in ["MTRANS","SCC"]:
    if c in X_k.columns: X_k.drop(columns=[c], inplace=True)
X_k = add_bmi(X_k)
round_age_inplace(X_k)

# split kaggle by gender
gcol_k = detect_gender_column(X_k)
if gcol_k is None:
    raise ValueError("Could not detect a gender column in Kaggle_test.csv")
km_k, kf_k = split_by_gender(X_k[gcol_k])

def train_full_and_predict_catboost(X_full, y_full, X_eval, cols_keep, cat_cols, name, best_iters):
    if len(X_eval) == 0 or len(X_full) == 0:
        return np.zeros((len(X_eval), len(classes)), dtype=np.float32)

    Xf = X_full[cols_keep].copy()
    Xe = X_eval[cols_keep].copy()

    train_pool = pool_from_df(Xf, y_full, cat_cols)
    test_pool  = pool_from_df(Xe, None, cat_cols)

    model = CatBoostClassifier(
        loss_function="MultiClass",
        eval_metric="MultiClass",
        auto_class_weights="Balanced",
        learning_rate=LEARNING_RATE,
        depth=DEPTH,
        l2_leaf_reg=L2_REG,
        iterations=max(best_iters, EARLY_STOP*3),  # safe cap if best_iters small
        random_seed=RANDOM_STATE,
        task_type="GPU" if USE_GPU else "CPU",
        verbose=False
    )
    # small internal split for best_model; keeps behavior close to CV
    model.fit(train_pool, use_best_model=True, verbose=False)

    print(f"[Kaggle] Trained {name}: used_itersâ‰ˆ{getattr(model,'best_iteration_', None) or model.get_params().get('iterations')}")
    return model.predict_proba(test_pool)

# set up full gender frames (drop gender col within group)
X_m_full = X.loc[male_mask, male_cols].reset_index(drop=True)
y_m_full = y_enc[male_mask]
X_f_full = X.loc[female_mask, female_cols].reset_index(drop=True)
y_f_full = y_enc[female_mask]

Xk_male   = X_k.loc[km_k, X_m_full.columns].reset_index(drop=True)
Xk_female = X_k.loc[kf_k, X_f_full.columns].reset_index(drop=True)

k_pred_proba = np.zeros((len(X_k), len(classes)), dtype=np.float32)
if len(Xk_male) > 0:
    k_pred_proba[km_k.values] = train_full_and_predict_catboost(
        X_m_full, y_m_full, Xk_male, X_m_full.columns, cat_cols_of(X_m_full),
        "MALE (full)", male_best_it
    )
if len(Xk_female) > 0:
    k_pred_proba[kf_k.values] = train_full_and_predict_catboost(
        X_f_full, y_f_full, Xk_female, X_f_full.columns, cat_cols_of(X_f_full),
        "FEMALE (full)", female_best_it
    )

# predictions on Kaggle_test
k_idx = np.argmax(k_pred_proba, axis=1)
y_pred = le.inverse_transform(k_idx)

[Info] Classes: ['Insufficient_Weight', 'Normal_Weight', 'Obesity_Type_I', 'Obesity_Type_II', 'Obesity_Type_III', 'Overweight_Level_I', 'Overweight_Level_II']
[Info] Train male=7783 | female=7750

[MALE] Fold 1/5




[MALE] best_iter=49 | Acc=0.8439 | MacroF1=0.7170

[MALE] Fold 2/5
[MALE] best_iter=277 | Acc=0.8831 | MacroF1=0.7487

[MALE] Fold 3/5
[MALE] best_iter=101 | Acc=0.8645 | MacroF1=0.7283

[MALE] Fold 4/5
[MALE] best_iter=942 | Acc=0.8817 | MacroF1=0.7443

[MALE] Fold 5/5
[MALE] best_iter=284 | Acc=0.8695 | MacroF1=0.8281

[MALE] OOF Accuracy: 0.8686 | Macro F1: 0.7445
[MALE] Best iterations per fold: [49, 277, 101, 942, 284] | Median: 277

[FEMALE] Fold 1/5
[FEMALE] best_iter=307 | Acc=0.9019 | MacroF1=0.7353

[FEMALE] Fold 2/5
[FEMALE] best_iter=122 | Acc=0.9045 | MacroF1=0.7373

[FEMALE] Fold 3/5
[FEMALE] best_iter=186 | Acc=0.8974 | MacroF1=0.7273

[FEMALE] Fold 4/5
[FEMALE] best_iter=123 | Acc=0.8968 | MacroF1=0.7266

[FEMALE] Fold 5/5
[FEMALE] best_iter=65 | Acc=0.8981 | MacroF1=0.7270

[FEMALE] OOF Accuracy: 0.8997 | Macro F1: 0.7307
[FEMALE] Best iterations per fold: [307, 122, 186, 123, 65] | Median: 123

OOF Accuracy: 0.8841 | Macro F1: 0.8733
                     precision    

You should provide test set for use best model. use_best_model parameter has been switched to false value.


[Kaggle] Trained MALE (full): used_itersâ‰ˆ600


You should provide test set for use best model. use_best_model parameter has been switched to false value.


[Kaggle] Trained FEMALE (full): used_itersâ‰ˆ600


In [17]:

# -------- Accuracy & confusion matrix on Kaggle_test --------
overall_acc = accuracy_score(y_true, y_pred)
print(f"\nâœ… Overall Accuracy on Kaggle_test: {overall_acc:.5f}")

order = classes  # keep the learned class order
cm = confusion_matrix(y_true, y_pred, labels=order)
cm_norm = cm.astype(float) / (cm.sum(axis=1, keepdims=True) + 1e-12)

print("\n=== Confusion Matrix (counts) ===")
print("Predicted â†’")
print("True â†“")
for i, true_class in enumerate(order):
    row = " | ".join(f"{cm[i, j]:4d}" for j in range(len(order)))
    print(f"{true_class:<22}: {row}")

print("\n=== Confusion Matrix (row-normalized) ===")
for i, true_class in enumerate(order):
    row = " | ".join(f"{cm_norm[i, j]:.2f}" for j in range(len(order)))
    print(f"{true_class:<22}: {row}")

print("\n=== Per-class metrics (Kaggle_test) ===")
print(classification_report(y_true, y_pred, labels=order, target_names=order, digits=4, zero_division=0))


âœ… Overall Accuracy on Kaggle_test: 0.90182

=== Confusion Matrix (counts) ===
Predicted â†’
True â†“
Insufficient_Weight   :  623 |   28 |    0 |    0 |    0 |    2 |    0
Normal_Weight         :   47 |  643 |    1 |    0 |    0 |   38 |    8
Obesity_Type_I        :    1 |    1 |  617 |   21 |    3 |   13 |   47
Obesity_Type_II       :    0 |    0 |   27 |  810 |    0 |    2 |    6
Obesity_Type_III      :    0 |    0 |    1 |    0 | 1061 |    1 |    0
Overweight_Level_I    :    3 |   58 |    8 |    0 |    1 |  448 |   65
Overweight_Level_II   :    0 |   13 |   46 |    4 |    0 |   68 |  510

=== Confusion Matrix (row-normalized) ===
Insufficient_Weight   : 0.95 | 0.04 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00
Normal_Weight         : 0.06 | 0.87 | 0.00 | 0.00 | 0.00 | 0.05 | 0.01
Obesity_Type_I        : 0.00 | 0.00 | 0.88 | 0.03 | 0.00 | 0.02 | 0.07
Obesity_Type_II       : 0.00 | 0.00 | 0.03 | 0.96 | 0.00 | 0.00 | 0.01
Obesity_Type_III      : 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | 0.00 | 0.00
O