In [3]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.6.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Downloading optuna-4.6.0-py3-none-any.whl (404 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m404.7/404.7 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.10.1-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.10.1 optuna-4.6.0


In [None]:
"""
Advanced LLM-Augmented Credit Risk Pipeline (XGBoost + Optuna + WOE + SHAP + GPT-4o)

Dataset: UCI Default of Credit Card Clients (30,000 observations)
"""

import pandas as pd
import numpy as np
import shap
import optuna
import openai
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, brier_score_loss
from sklearn.calibration import CalibratedClassifierCV
from sklearn.preprocessing import KBinsDiscretizer
from xgboost import XGBClassifier

openai.api_key = ""



URL = "https://archive.ics.uci.edu/ml/machine-learning-databases/00350/default%20of%20credit%20card%20clients.xls"
df = pd.read_excel(URL, header=1)

df.rename(columns={"default payment next month": "target"}, inplace=True)
df = df.dropna()



# Add debt-to-income proxy (using bill amounts / income)
df["income_log"] = np.log(df["LIMIT_BAL"] + 1)
df["avg_bill"] = df[[f"BILL_AMT{i}" for i in range(1,7)]].mean(axis=1)
df["avg_pay_amt"] = df[[f"PAY_AMT{i}" for i in range(1,7)]].mean(axis=1)

# Utilization (bill / credit limit)
df["utilization"] = df["avg_bill"] / (df["LIMIT_BAL"] + 1)

# Payment consistency (variance of payments)
df["pay_var"] = df[[f"PAY_AMT{i}" for i in range(1,7)]].var(axis=1)

# Next-month delinquency signal: PAY_0 is most predictive
df["delq_severity"] = df["PAY_0"].clip(lower=0)



bin_cols = ["utilization", "avg_bill", "avg_pay_amt", "pay_var"]

woe_transformers = {}
for col in bin_cols:
    kb = KBinsDiscretizer(n_bins=8, encode="ordinal", strategy="quantile")
    df[col + "_bin"] = kb.fit_transform(df[[col]])
    woe_transformers[col] = kb

# Replace bins with numeric codes
bin_features = [col + "_bin" for col in bin_cols]


model_features = [
    "LIMIT_BAL", "SEX", "EDUCATION", "MARRIAGE", "AGE",
    "delq_severity", "income_log"
] + bin_features

X = df[model_features]
y = df["target"]


# -------------------------------------------------------------------------
# 5. TRAIN / TEST SPLIT
# -------------------------------------------------------------------------

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, stratify=y, random_state=42
)



def objective(trial):
    params = {
        "max_depth": trial.suggest_int("max_depth", 3, 8),
        "eta": trial.suggest_float("eta", 0.01, 0.3),
        "subsample": trial.suggest_float("subsample", 0.5, 0.9),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 0.9),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 10),
        "lambda": trial.suggest_float("lambda", 1e-3, 10.0),
        "alpha": trial.suggest_float("alpha", 1e-3, 10.0),
    }

    model = XGBClassifier(
        n_estimators=400,
        objective="binary:logistic",
        eval_metric="logloss",
        tree_method="hist",
        **params
    )

    model.fit(X_train, y_train)
    preds = model.predict_proba(X_test)[:, 1]
    auc = roc_auc_score(y_test, preds)
    return auc


print("\nRunning Optuna hyperparameter tuning (this will take 30–60 seconds)...")
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=25)

print("\nBest Parameters:", study.best_params)




best_params = study.best_params

model = XGBClassifier(
    n_estimators=500,
    objective="binary:logistic",
    eval_metric="logloss",
    tree_method="hist",
    **best_params
)

cal_model = CalibratedClassifierCV(model, cv=3, method="sigmoid")
cal_model.fit(X_train, y_train)

test_proba = cal_model.predict_proba(X_test)[:, 1]
auc = roc_auc_score(y_test, test_proba)
brier = brier_score_loss(y_test, test_proba)

print(f"\nAUC = {auc:.4f}")
print(f"Brier Score = {brier:.4f} (lower is better)")




fitted_xgb = cal_model.calibrated_classifiers_[0].estimator

explainer = shap.TreeExplainer(fitted_xgb)
shap_values = explainer.shap_values(X_test)

# pick one borrower
idx = X_test.index[5]
x_row = X_test.loc[idx]
shap_row = shap_values[X_test.index.get_loc(idx)]
prob_default = float(test_proba[X_test.index.get_loc(idx)])





REASON_DICT = {
    "utilization_bin": "High credit utilization pattern",
    "avg_bill_bin": "Elevated outstanding balance levels",
    "avg_pay_amt_bin": "Low payment capacity",
    "pay_var_bin": "Inconsistent payment behavior",
    "delq_severity": "Recent delinquency severity",
}

def build_adverse_reasons(x_row, shap_row, top_k=4):
    pairs = sorted(zip(x_row.index, shap_row), key=lambda x: -x[1])[:top_k]
    reasons = []
    for f, c in pairs:
        readable = REASON_DICT.get(f, f.replace("_", " ").title())
        reasons.append(f"{readable} (SHAP {c:+.3f})")
    return reasons

reasons = build_adverse_reasons(x_row, shap_row)

print("\nAdverse Action Reasons:")
for r in reasons:
    print("-", r)




def make_underwriting_prompt(features, shap_row, reasons, prob):
    feat_str = "\n".join([f"- {k}: {v}" for k, v in features.items()])
    reasons_str = "\n".join([f"- {r}" for r in reasons])

    return f"""
You are an expert senior credit underwriter AND a model-risk governance reviewer.
You have access to borrower data, risk estimates, model explanations, and credit policy rules.

Borrower Snapshot:
{feat_str}

Model Estimated Probability of Default: {prob:.2%}

Principal Risk Drivers:
{reasons_str}

TASKS:
1. Provide a **regulator-aligned underwriting narrative** explaining the risk in plain language
   without referencing SHAP or model internals.
2. Provide **4–6 borrower-friendly improvement steps** (credit behavior, balances, utilization).
3. Provide a **Model Risk Management (MRM) note**:
   - Check for anomalous SHAP patterns
   - Check for potential drift signals
   - Check if risk drivers are consistent with credit policy
4. Provide a **Fair Lending Compliance note** ensuring:
   - No protected-class features are used
   - Explanation does not proxy for race, gender, age improperly

Answer in structured sections.
"""

import os
from openai import OpenAI
client = OpenAI(api_key="")

def call_gpt_4o(prompt):
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "user", "content": prompt}
        ],
        temperature=0.2,
    )
    return response.choices[0].message.content


prompt = make_underwriting_prompt(x_row, shap_row, reasons, prob_default)

print("\n--- GPT-4o Advanced Underwriting Narrative ---\n")
print(call_gpt_4o(prompt))


[I 2025-11-28 21:23:06,048] A new study created in memory with name: no-name-2fb2381d-5e31-4573-b4e7-79b333594c68



Running Optuna hyperparameter tuning (this will take 30–60 seconds)...


[I 2025-11-28 21:23:06,378] Trial 0 finished with value: 0.7588653053145651 and parameters: {'max_depth': 6, 'eta': 0.2348583284433932, 'subsample': 0.698944536152778, 'colsample_bytree': 0.5211833892988962, 'min_child_weight': 7, 'lambda': 3.2371892104652122, 'alpha': 8.192697039681098}. Best is trial 0 with value: 0.7588653053145651.
[I 2025-11-28 21:23:06,763] Trial 1 finished with value: 0.7550788067844494 and parameters: {'max_depth': 6, 'eta': 0.09002447617931815, 'subsample': 0.7220018565666114, 'colsample_bytree': 0.8658371883257701, 'min_child_weight': 9, 'lambda': 5.518848967871336, 'alpha': 2.0349555101336114}. Best is trial 0 with value: 0.7588653053145651.
[I 2025-11-28 21:23:07,062] Trial 2 finished with value: 0.7643462443934446 and parameters: {'max_depth': 5, 'eta': 0.06670489904761134, 'subsample': 0.6217369759869346, 'colsample_bytree': 0.7885311381706236, 'min_child_weight': 9, 'lambda': 5.216314475262835, 'alpha': 3.8482387506986453}. Best is trial 2 with value: 0.


Best Parameters: {'max_depth': 3, 'eta': 0.015412006804828274, 'subsample': 0.8840462863313182, 'colsample_bytree': 0.5407486695380321, 'min_child_weight': 3, 'lambda': 0.14603740382266173, 'alpha': 4.706843321494689}

AUC = 0.7695
Brier Score = 0.1379 (lower is better)

Adverse Action Reasons:
- Sex (SHAP +0.081)
- High credit utilization pattern (SHAP +0.078)
- Age (SHAP +0.072)
- Marriage (SHAP +0.029)

--- GPT-4o Advanced Underwriting Narrative ---

### 1. Regulator-Aligned Underwriting Narrative

The borrower has a credit limit of $230,000 and is currently exhibiting a probability of default estimated at 11.51%. The key risk factors identified include the borrower's gender, high credit utilization, age, and marital status. The borrower is utilizing a significant portion of their available credit, which indicates a higher risk of financial strain. Additionally, the borrower's age and marital status contribute to the risk profile, as these factors can influence financial stability 