In [None]:
from sklearn.model_selection import StratifiedKFold

X_cv = np.concatenate([X_train, X_val])
y_cv = np.concatenate([y_train, y_val])

skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=SEED)

cv_results = []
fold = 0

for train_idx, test_idx in skf.split(X_cv, y_cv):
    fold += 1
    X_tr, X_te = X_cv[train_idx], X_cv[test_idx]
    y_tr, y_te = y_cv[train_idx], y_cv[test_idx]

    # internal 80/20 split for training/validation (same as TabNet CV)
    X_tr2, X_va, y_tr2, y_va = train_test_split(
        X_tr, y_tr,
        test_size=0.2,
        random_state=SEED,
        stratify=y_tr
    )

    print(f"\n--- XGBoost Fold {fold} ---")

    clf_fold = XGBClassifier(**xgb_best_params)

    clf_fold.fit(X_tr2, y_tr2)

    y_pred_fold = clf_fold.predict(X_te)

    acc  = accuracy_score(y_te, y_pred_fold)
    prec = precision_score(y_te, y_pred_fold, zero_division=0)
    rec  = recall_score(y_te, y_pred_fold, zero_division=0)
    f1   = f1_score(y_te, y_pred_fold, zero_division=0)

    tn, fp, fn, tp = confusion_matrix(y_te, y_pred_fold).ravel()
    far = fp / (fp + tn) if (fp + tn) > 0 else 0.0

    print(
        f"Fold {fold} – "
        f"Acc:{acc:.4f}, Prec:{prec:.4f}, Rec:{rec:.4f}, "
        f"F1:{f1:.4f}, FAR:{far:.6f}"
    )

    cv_results.append([acc, prec, rec, f1, far])

cv_results = np.array(cv_results)
metrics = ["Accuracy", "Precision", "Recall", "F1-score", "FAR"]

print("\n===== 3-Fold Stratified Cross-Validation (XGBoost) – Summary (mean ± std) =====")
for i, m in enumerate(metrics):
    mean_val = cv_results[:, i].mean()
    std_val = cv_results[:, i].std()
    print(f"{m:<10}: {mean_val:.4f} ± {std_val:.4f}")

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

xgb_importances = xgb_clf.feature_importances_
xgb_feat_imp_df = pd.DataFrame({
    "Feature": feature_names,
    "Importance": xgb_importances
}).sort_values(by="Importance", ascending=False)

sns.set_theme(style="whitegrid")

plt.rcParams["font.family"] = "Arial"
plt.rcParams["font.size"] = 10           
plt.rcParams["axes.titlesize"] = 11      
plt.rcParams["axes.labelsize"] = 10
plt.rcParams["xtick.labelsize"] = 9
plt.rcParams["ytick.labelsize"] = 9
plt.rcParams["legend.fontsize"] = 9

top_n = 10
top_feat = xgb_feat_imp_df.head(top_n)

plt.figure(figsize=(8, 5))
ax = sns.barplot(
    data=top_feat,
    x="Importance",
    y="Feature",
    hue="Feature",       
    dodge=False,
    legend=False,
    palette="Blues_r"
)

ax.set_xlim(0, top_feat["Importance"].max() * 1.1)
sns.despine(top=True, right=True)

plt.xlabel("Feature Importance")
plt.ylabel("Feature")
plt.tight_layout()
plt.show()
