In [None]:
# 03_model_evaluation.ipynb
# -----------------------------------------------------------
# Purpose: Train and evaluate machine learning models for credit risk scoring.
# Steps:
# 1. Load processed feature data
# 2. Split data into training and test sets
# 3. Train baseline Logistic Regression and XGBoost models
# 4. Evaluate using accuracy, precision, recall, F1-score, and ROC-AUC
# 5. Perform model explainability using SHAP
# 6. Save the best model
# -----------------------------------------------------------

# Import libraries
from __future__ import annotations
import pandas as pd
import numpy as np
from typing import Tuple, Dict

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, roc_auc_score,
    confusion_matrix, classification_report
)
import xgboost as xgb
import shap
import matplotlib.pyplot as plt
import joblib

# ---------------------------
# 1. Load Processed Data
# ---------------------------

def load_features(filepath: str) -> pd.DataFrame:
    """
    Load the processed feature dataset.

    Args:
        filepath (str): Path to the CSV file.
    Returns:
        pd.DataFrame: Loaded DataFrame.
    """
    return pd.read_csv(filepath)

data_path: str = "../data/processed/credit_data_features.csv"
df: pd.DataFrame = load_features(data_path)

display(df.head())
display(df.info())

# ---------------------------
# 2. Define Features and Target
# ---------------------------

TARGET_COL: str = "loan_status"  # Adjust if your target column has a different name

if TARGET_COL not in df.columns:
    raise ValueError(f"Target column '{TARGET_COL}' not found in dataset.")

X: pd.DataFrame = df.drop(columns=[TARGET_COL])
y: pd.Series = df[TARGET_COL]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"Training set size: {X_train.shape}, Test set size: {X_test.shape}")

# ---------------------------
# 3. Model Training
# ---------------------------

def train_logistic_regression(X_train: pd.DataFrame, y_train: pd.Series) -> LogisticRegression:
    """
    Train a logistic regression classifier.

    Args:
        X_train (pd.DataFrame): Training features.
        y_train (pd.Series): Training target.
    Returns:
        LogisticRegression: Trained logistic regression model.
    """
    model = LogisticRegression(max_iter=1000, solver='liblinear')
    model.fit(X_train, y_train)
    return model

def train_xgboost(X_train: pd.DataFrame, y_train: pd.Series) -> xgb.XGBClassifier:
    """
    Train an XGBoost classifier.

    Args:
        X_train (pd.DataFrame): Training features.
        y_train (pd.Series): Training target.
    Returns:
        xgb.XGBClassifier: Trained XGBoost model.
    """
    model = xgb.XGBClassifier(
        n_estimators=200,
        learning_rate=0.05,
        max_depth=4,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42,
        eval_metric='logloss'
    )
    model.fit(X_train, y_train)
    return model

log_reg_model = train_logistic_regression(X_train, y_train)
xgb_model = train_xgboost(X_train, y_train)

# ---------------------------
# 4. Evaluation
# ---------------------------

def evaluate_model(model, X_test: pd.DataFrame, y_test: pd.Series) -> Dict[str, float]:
    """
    Evaluate a model using common classification metrics.

    Args:
        model: Trained model (sklearn or XGBoost).
        X_test (pd.DataFrame): Test features.
        y_test (pd.Series): True test labels.
    Returns:
        Dict[str, float]: Metrics including accuracy, precision, recall, F1, and AUC.
    """
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else None
    
    metrics = {
        "accuracy": accuracy_score(y_test, y_pred),
        "precision": precision_score(y_test, y_pred, zero_division=0),
        "recall": recall_score(y_test, y_pred, zero_division=0),
        "f1_score": f1_score(y_test, y_pred, zero_division=0),
        "roc_auc": roc_auc_score(y_test, y_pred_proba) if y_pred_proba is not None else np.nan
    }
    return metrics

log_reg_metrics = evaluate_model(log_reg_model, X_test, y_test)
xgb_metrics = evaluate_model(xgb_model, X_test, y_test)

print("\nLogistic Regression Metrics:\n", log_reg_metrics)
print("\nXGBoost Metrics:\n", xgb_metrics)

# Classification report
print("\nLogistic Regression Classification Report:\n")
print(classification_report(y_test, log_reg_model.predict(X_test)))

print("\nXGBoost Classification Report:\n")
print(classification_report(y_test, xgb_model.predict(X_test)))

# ---------------------------
# 5. Model Explainability with SHAP
# ---------------------------

def explain_model_with_shap(model, X_sample: pd.DataFrame) -> None:
    """
    Explain a model's predictions using SHAP values.

    Args:
        model: Trained model (e.g., XGBoost).
        X_sample (pd.DataFrame): Sample of data for SHAP explanation.
    """
    explainer = shap.Explainer(model, X_sample)
    shap_values = explainer(X_sample)

    # Summary plot
    shap.summary_plot(shap_values, X_sample, plot_type="bar")
    plt.show()

# Run SHAP explanation for XGBoost (using a sample of test data)
X_sample = X_test.sample(200, random_state=42)
explain_model_with_shap(xgb_model, X_sample)

# ---------------------------
# 6. Save Best Model
# ---------------------------

best_model = xgb_model if xgb_metrics['roc_auc'] > log_reg_metrics['roc_auc'] else log_reg_model
model_path = "../app/model.pkl"
joblib.dump(best_model, model_path)
print(f"Best model saved at {model_path}")
