# 05 — Model Optimization & Final Selection## HumanForYou — Employee Attrition Prediction---### Objective**Fine-tune the top model(s)** from the benchmark to maximize predictive performance:1. Hyperparameter tuning via GridSearchCV / RandomizedSearchCV2. Threshold calibration to optimize the Precision-Recall trade-off3. Feature importance analysis (SHAP for explainability)4. Fairness evaluation on sensitive variables5. Final model selection and business recommendations> **Adapted from** the confidence calibration methodology used in a previous detection project — same threshold optimization logic applied to classification.> This notebook expects preprocessed data from **03** and benchmark results from **04**.

## Section 1: Setup

In [None]:
# ==============================================================================
# IMPORTS
# ==============================================================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import time
import joblib
from pathlib import Path
warnings.filterwarnings("ignore")

from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, StratifiedKFold
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, classification_report, confusion_matrix,
    roc_curve, precision_recall_curve, make_scorer
)
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier

plt.rcParams.update({"figure.figsize": (12, 6), "figure.dpi": 100})
sns.set_theme(style="whitegrid")

# --- Path Configuration ---
_cwd = Path.cwd()
if (_cwd / "data" / "raw").exists():
    PROJECT_ROOT = _cwd
elif (_cwd.parent / "data" / "raw").exists():
    PROJECT_ROOT = _cwd.parent
else:
    PROJECT_ROOT = Path(r"c:\Users\yanis\Documents\CESI\A5\AI Project\HumanForYou")

OUTPUT_DIR = str(PROJECT_ROOT / "outputs")

# Load data
X_train = pd.read_csv(f"{OUTPUT_DIR}/X_train.csv")
X_test  = pd.read_csv(f"{OUTPUT_DIR}/X_test.csv")
y_train = pd.read_csv(f"{OUTPUT_DIR}/y_train.csv").squeeze()
y_test  = pd.read_csv(f"{OUTPUT_DIR}/y_test.csv").squeeze()

# Non-SMOTE for fair CV
X_train_ns = pd.read_csv(f"{OUTPUT_DIR}/X_train_no_smote.csv")
y_train_ns = pd.read_csv(f"{OUTPUT_DIR}/y_train_no_smote.csv").squeeze()

# Load benchmark results to pick top models
benchmark = pd.read_csv(f"{OUTPUT_DIR}/model_benchmark_results.csv")
print("Top 3 from benchmark:")
print(benchmark.head(3)[["Model", "F1-Score", "AUC-ROC"]].to_string(index=False))

## Section 2: Hyperparameter TuningTune the top candidate models using GridSearchCV with stratified 5-fold cross-validation.> **Note**: We tune on non-SMOTE data with `class_weight='balanced'` to let the model handle imbalance natively during CV.

In [None]:
# ==============================================================================
# HYPERPARAMETER GRIDS
# ==============================================================================

TUNING_CONFIGS = {
    "Random Forest": {
        "model": RandomForestClassifier(random_state=42, n_jobs=-1, class_weight="balanced"),
        "params": {
            "n_estimators": [100, 200, 300],
            "max_depth": [10, 15, 20, None],
            "min_samples_split": [2, 5, 10],
            "min_samples_leaf": [1, 2, 4],
        }
    },
    "XGBoost": {
        "model": XGBClassifier(random_state=42, eval_metric="logloss", verbosity=0),
        "params": {
            "n_estimators": [100, 200, 300],
            "max_depth": [3, 5, 7, 9],
            "learning_rate": [0.01, 0.05, 0.1, 0.2],
            "subsample": [0.7, 0.8, 1.0],
            "scale_pos_weight": [1, 3, 5],
        }
    },
    "Gradient Boosting": {
        "model": GradientBoostingClassifier(random_state=42),
        "params": {
            "n_estimators": [100, 200, 300],
            "max_depth": [3, 5, 7],
            "learning_rate": [0.01, 0.05, 0.1],
            "min_samples_split": [2, 5, 10],
            "subsample": [0.7, 0.8, 1.0],
        }
    },
}

print(f"Tuning {len(TUNING_CONFIGS)} models...")
for name, cfg in TUNING_CONFIGS.items():
    n_combos = np.prod([len(v) for v in cfg["params"].values()])
    print(f"  {name}: {n_combos} parameter combinations")

In [None]:
# ==============================================================================
# GRID SEARCH / RANDOMIZED SEARCH
# ==============================================================================

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
tuned_models = {}

for name, cfg in TUNING_CONFIGS.items():
    print(f"\n{'='*60}")
    print(f"Tuning: {name}")
    print(f"{'='*60}")
    
    n_combos = np.prod([len(v) for v in cfg["params"].values()])
    
    # Use RandomizedSearchCV if search space is large
    if n_combos > 200:
        search = RandomizedSearchCV(
            cfg["model"], cfg["params"], n_iter=50,
            scoring="f1", cv=cv, n_jobs=-1, random_state=42, verbose=0
        )
        search_type = "RandomizedSearchCV (50 iters)"
    else:
        search = GridSearchCV(
            cfg["model"], cfg["params"],
            scoring="f1", cv=cv, n_jobs=-1, verbose=0
        )
        search_type = "GridSearchCV"
    
    t0 = time.time()
    search.fit(X_train_ns, y_train_ns)
    elapsed = time.time() - t0
    
    tuned_models[name] = search.best_estimator_
    
    print(f"  Search: {search_type}")
    print(f"  Best F1 (CV): {search.best_score_:.4f}")
    print(f"  Best params:  {search.best_params_}")
    print(f"  Time: {elapsed:.1f}s")

## Section 3: Threshold CalibrationFor each tuned model, sweep classification thresholds to find the **optimal operating point** that balances Precision and Recall for HumanForYou's business needs.> In HR attrition, **Recall is slightly more valuable** than Precision — missing an at-risk employee is more costly than a false alert.

In [None]:
# ==============================================================================
# THRESHOLD CALIBRATION
# ==============================================================================

def calibrate_threshold(model, X_test, y_test, name, target_recall=0.75):
    """
    Sweep thresholds and find the optimal operating point.
    Priority: maximize F1, with a minimum recall constraint.
    """
    y_proba = model.predict_proba(X_test)[:, 1]
    
    thresholds = np.arange(0.1, 0.91, 0.01)
    records = []
    
    for t in thresholds:
        y_pred = (y_proba >= t).astype(int)
        prec = precision_score(y_test, y_pred, zero_division=0)
        rec  = recall_score(y_test, y_pred, zero_division=0)
        f1   = f1_score(y_test, y_pred, zero_division=0)
        records.append({"threshold": t, "precision": prec, "recall": rec, "f1": f1})
    
    df_cal = pd.DataFrame(records)
    
    # Find best threshold: maximize F1 where recall >= target
    candidates = df_cal[df_cal["recall"] >= target_recall]
    if len(candidates) > 0:
        best_row = candidates.loc[candidates["f1"].idxmax()]
    else:
        best_row = df_cal.loc[df_cal["f1"].idxmax()]
    
    return df_cal, best_row

calibration_results = {}
fig, axes = plt.subplots(1, len(tuned_models), figsize=(6 * len(tuned_models), 5))
if len(tuned_models) == 1:
    axes = [axes]

for i, (name, model) in enumerate(tuned_models.items()):
    df_cal, best = calibrate_threshold(model, X_test, y_test, name)
    calibration_results[name] = {"table": df_cal, "best": best}
    
    axes[i].plot(df_cal["threshold"], df_cal["precision"], label="Precision", linewidth=2)
    axes[i].plot(df_cal["threshold"], df_cal["recall"], label="Recall", linewidth=2)
    axes[i].plot(df_cal["threshold"], df_cal["f1"], label="F1-Score", linewidth=2, linestyle="--")
    axes[i].axvline(best["threshold"], color="red", linestyle=":", alpha=0.7,
                    label=f"Optimal: {best['threshold']:.2f}")
    axes[i].set_xlabel("Threshold")
    axes[i].set_ylabel("Score")
    axes[i].set_title(f"{name}", fontweight="bold")
    axes[i].legend(fontsize=9)
    axes[i].set_xlim(0.1, 0.9)
    axes[i].set_ylim(0, 1.02)
    
    print(f"{name} — Optimal threshold: {best['threshold']:.2f}")
    print(f"  Precision: {best['precision']:.4f}  Recall: {best['recall']:.4f}  F1: {best['f1']:.4f}")

plt.suptitle("Threshold Calibration — Precision / Recall Trade-off", fontsize=14, fontweight="bold")
plt.tight_layout()
plt.savefig(f"{OUTPUT_DIR}/threshold_calibration.png", bbox_inches="tight")
plt.show()

## Section 4: Feature Importance & ExplainabilityUse both built-in feature importances and SHAP values for interpretability.> **ALTAI Requirement 4 (Transparency)**: The model must be explainable to HR stakeholders.

In [None]:
# ==============================================================================
# FEATURE IMPORTANCE — Built-in (Tree models)
# ==============================================================================

# Pick the best tuned model for detailed analysis
best_name = max(calibration_results, key=lambda k: calibration_results[k]["best"]["f1"])
best_model = tuned_models[best_name]
print(f"Detailed analysis on: {best_name}")

# Feature importance (if tree-based)
if hasattr(best_model, "feature_importances_"):
    importances = pd.Series(best_model.feature_importances_, index=X_train.columns)
    top20 = importances.nlargest(20)
    
    fig, ax = plt.subplots(figsize=(10, 8))
    top20.sort_values().plot(kind="barh", ax=ax, color="#3498db")
    ax.set_xlabel("Feature Importance")
    ax.set_title(f"Top 20 Feature Importances — {best_name}", fontweight="bold")
    plt.tight_layout()
    plt.savefig(f"{OUTPUT_DIR}/feature_importance.png", bbox_inches="tight")
    plt.show()

In [None]:
# ==============================================================================
# SHAP VALUES — Explainability
# ==============================================================================

try:
    import shap
    
    explainer = shap.TreeExplainer(best_model)
    # Use a sample for speed
    X_sample = X_test.sample(min(500, len(X_test)), random_state=42)
    shap_values = explainer.shap_values(X_sample)
    
    # If binary classification, take class 1 SHAP values
    if isinstance(shap_values, list):
        shap_vals = shap_values[1]
    else:
        shap_vals = shap_values
    
    # Summary plot
    fig, ax = plt.subplots(figsize=(12, 10))
    shap.summary_plot(shap_vals, X_sample, show=False, max_display=20)
    plt.title(f"SHAP Summary — {best_name}", fontweight="bold")
    plt.tight_layout()
    plt.savefig(f"{OUTPUT_DIR}/shap_summary.png", bbox_inches="tight")
    plt.show()
    
    print("SHAP analysis complete — see plot for feature impact directions.")
    
except ImportError:
    print("SHAP not installed. Install with: pip install shap")
    print("Falling back to built-in feature importance only.")

## Section 5: Fairness Evaluation**ALTAI Requirement 5**: Check for discriminatory bias in predictions across sensitive groups.

In [None]:
# ==============================================================================
# FAIRNESS METRICS — Using unscaled data for correct group identification
# ==============================================================================

# Load unscaled data (binary 0/1 columns, not standardized floats)
import os
unscaled_test_path = f"{OUTPUT_DIR}/X_test_unscaled.csv"
if os.path.exists(unscaled_test_path):
    X_test_unscaled = pd.read_csv(unscaled_test_path)
    print("Using unscaled test data for fairness analysis.")
else:
    # Fallback: use scaled data with rounding for binary columns
    X_test_unscaled = X_test.copy()
    print("WARNING: unscaled data not found, using scaled data (may be inaccurate).")

# Find sensitive columns
gender_cols = [c for c in X_test_unscaled.columns if "Gender" in c]
marital_cols = [c for c in X_test_unscaled.columns if "MaritalStatus" in c]

print("\nFAIRNESS EVALUATION")
print("=" * 65)

best_threshold = calibration_results[best_name]["best"]["threshold"]
y_proba = best_model.predict_proba(X_test)[:, 1]
y_pred = (y_proba >= best_threshold).astype(int)

def fairness_report(X_unscaled, y_true, y_pred, group_col, group_name):
    """Compute per-group metrics for fairness analysis."""
    print(f"\n  {group_name} (column: {group_col})")
    print(f"  {'-'*50}")
    
    groups = X_unscaled[group_col].unique()
    rates = {}
    for g in sorted(groups):
        mask = X_unscaled[group_col] == g
        n = mask.sum()
        if n == 0:
            continue
        tp_rate = recall_score(y_true[mask.values], y_pred[mask.values], zero_division=0)
        pred_pos_rate = y_pred[mask.values].mean()
        rates[g] = {"n": n, "recall": tp_rate, "pred_positive_rate": pred_pos_rate}
        label = f"Group {g}" if isinstance(g, (int, float)) else str(g)
        print(f"    {label} (n={n}): Recall={tp_rate:.3f}, Pred+ rate={pred_pos_rate:.3f}")
    
    # Disparate impact
    if len(rates) >= 2:
        vals = [v["pred_positive_rate"] for v in rates.values() if v["pred_positive_rate"] > 0]
        if len(vals) >= 2:
            di = min(vals) / max(vals)
            print(f"    -> Disparate Impact: {di:.3f} {'OK (> 0.8)' if di > 0.8 else 'WARNING: potential bias (< 0.8)'}")
    return rates

for col in gender_cols:
    fairness_report(X_test_unscaled, y_test, y_pred, col, "Gender")

for col in marital_cols[:1]:
    fairness_report(X_test_unscaled, y_test, y_pred, col, "Marital Status")

## Section 6: Final Model Export & Business Recommendations

In [None]:
# ==============================================================================
# FINAL MODEL — Save
# ==============================================================================

best_threshold = calibration_results[best_name]["best"]["threshold"]

# Save model
model_path = f"{OUTPUT_DIR}/final_model.joblib"
joblib.dump(best_model, model_path)

# Save operating point
import json
operating_point = {
    "model_name": best_name,
    "optimal_threshold": float(best_threshold),
    "test_metrics": {
        "precision": float(calibration_results[best_name]["best"]["precision"]),
        "recall": float(calibration_results[best_name]["best"]["recall"]),
        "f1": float(calibration_results[best_name]["best"]["f1"]),
    },
    "best_params": {str(k): str(v) for k, v in best_model.get_params().items()}
}

with open(f"{OUTPUT_DIR}/operating_point.json", "w") as f:
    json.dump(operating_point, f, indent=2)

print(f"Final model saved: {model_path}")
print(f"Operating point: threshold = {best_threshold:.2f}")
print(f"  Precision: {operating_point['test_metrics']['precision']:.4f}")
print(f"  Recall:    {operating_point['test_metrics']['recall']:.4f}")
print(f"  F1-Score:  {operating_point['test_metrics']['f1']:.4f}")

In [None]:
# ==============================================================================
# CLASSIFICATION REPORT — Final model at optimal threshold
# ==============================================================================

y_final = (best_model.predict_proba(X_test)[:, 1] >= best_threshold).astype(int)

print(f"FINAL CLASSIFICATION REPORT — {best_name} @ threshold={best_threshold:.2f}")
print("=" * 65)
print(classification_report(y_test, y_final, target_names=["Stay", "Leave"]))

# Confusion matrix
fig, ax = plt.subplots(figsize=(8, 6))
cm = confusion_matrix(y_test, y_final)
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", ax=ax,
            xticklabels=["Stay", "Leave"], yticklabels=["Stay", "Leave"],
            annot_kws={"size": 16})
ax.set_ylabel("Actual", fontsize=12)
ax.set_xlabel("Predicted", fontsize=12)
ax.set_title(f"Final Model — {best_name} (threshold={best_threshold:.2f})", fontsize=14, fontweight="bold")
plt.tight_layout()
plt.savefig(f"{OUTPUT_DIR}/final_confusion_matrix.png", bbox_inches="tight")
plt.show()

In [None]:
# ==============================================================================
# BUSINESS RECOMMENDATIONS
# ==============================================================================

print("=" * 65)
print("BUSINESS RECOMMENDATIONS FOR HUMANFORYOU")
print("=" * 65)

if hasattr(best_model, "feature_importances_"):
    top5 = importances.nlargest(5)
    print(f"""
Based on {best_name} analysis, the top attrition risk factors are:

{chr(10).join(f"  {i+1}. {feat} (importance: {imp:.4f})" for i, (feat, imp) in enumerate(top5.items()))}

RECOMMENDED ACTIONS:
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
1. EARLY WARNING SYSTEM
   Deploy this model monthly to score all employees.
   Flag those with predicted attrition probability > {best_threshold:.0%} for HR review.

2. TARGETED RETENTION
   Focus retention efforts on the top risk factors identified above.
   Design specific programs addressing each factor.

3. MONITORING & GOVERNANCE
   Retrain the model annually with fresh data.
   Monitor fairness metrics quarterly (disparate impact > 0.8).
   Maintain human oversight: model flags → HR interview → decision.

4. ETHICAL SAFEGUARDS
   Never use the model as sole basis for employment decisions.
   Ensure transparency: employees can request explanation of their risk score.
   Regular bias audits per ALTAI guidelines.
""")

print("\n✓ Project pipeline complete.")
print("  Deliverables generated in:", OUTPUT_DIR)