# Part A: PD Computation

### Import libraries

In [None]:
# Load libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
import joblib

### Customize

In [None]:
# Customize parameters
my_credit_data = "0901 Credit_data.csv"
my_features = ['Income', 'Credit_Score', 'Loan', 'Age']
# part of the data to be used for testing
my_test_size=0.3  
# names of the output models
my_logit="0901 logistic_credit_model.joblib"
my_scaler="0901 credit_scaler.joblib"


###  Load and prepare the dataset

In [None]:
# Load historical credit data for training
df = pd.read_csv(my_credit_data)

# Prepare features and target
features = my_features
X = df[features]
y = df['Default']  # 0 = repaid, 1 = defaulted

# Standardize features to improve model performance
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

### Train and save model

In [None]:
# Train logistic regression model
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=my_test_size, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

# Logistic Regression Equation
coefficients = model.coef_[0]
intercept = model.intercept_[0]
equation_terms = [f"{coef:.4f}*{feature}" for coef, feature in zip(coefficients, features)]
logistic_equation = f"P(Default) = 1 / (1 + exp(-({intercept:.4f} + " + " + ".join(equation_terms) + ")) )"

print("\nLogistic Regression Equation:")
print(logistic_equation)

# Save model and scaler for reuse
joblib.dump(model, my_logit)
joblib.dump(scaler, my_scaler)

# Part B: Model Validation 

### Import libraries

In [None]:
# Import libraries
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import roc_auc_score, roc_curve, classification_report, confusion_matrix
from matplotlib.backends.backend_pdf import PdfPages

### Customize

In [None]:
#Customize parameters
my_pdf="0901 Model_Validation.pdf"
my_roc="0901 ROC_Curve.png"
my_matrix="0901 Confusion_Matrix.png"

### Predict probabilities and compute validation metrics

In [None]:
# Predict probabilities and binary classes
y_proba_test = model.predict_proba(X_test)[:, 1]
y_pred = (y_proba_test >= 0.5).astype(int)

# Compute confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)

# Classification report
report = classification_report(y_test, y_pred, digits=6)

# AUC and Gini
auc = roc_auc_score(y_test, y_proba_test)
gini = 2 * auc - 1

# ROC Curve
fpr, tpr, thresholds = roc_curve(y_test, y_proba_test)

# KS Statistic (no chart)
df_ks = pd.DataFrame({'y_true': y_test, 'y_proba': y_proba_test})
df_ks.sort_values('y_proba', ascending=False, inplace=True)
df_ks['event'] = df_ks['y_true']
df_ks['non_event'] = 1 - df_ks['y_true']
df_ks['cum_event_rate'] = df_ks['event'].cumsum() / df_ks['event'].sum()
df_ks['cum_non_event_rate'] = df_ks['non_event'].cumsum() / df_ks['non_event'].sum()
df_ks['ks'] = np.abs(df_ks['cum_event_rate'] - df_ks['cum_non_event_rate'])
ks_stat = df_ks['ks'].max()
ks_idx = df_ks['ks'].idxmax()
ks_threshold = df_ks.loc[ks_idx, 'y_proba']

### Display results on screen and save files 

In [None]:
# --- Display on Screen ---
print("\nClassification Report:\n", report)
print(f"AUC: {auc:.6f}")
print(f"Gini Coefficient: {gini:.6f}")
print(f"KS Statistic: {ks_stat:.6f} at Threshold = {ks_threshold:.4f}")
print("\nConfusion Matrix:\n", conf_matrix)


   

In [None]:
 # Save Outputs to PDF and PNGs
with PdfPages(my_pdf) as pdf:

    # ROC Curve
    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, label=f"ROC Curve (AUC = {auc:.6f})")
    plt.plot([0, 1], [0, 1], linestyle='--', color='gray')
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title("Receiver Operating Characteristic (ROC) Curve")
    plt.legend(loc="lower right")
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(my_roc)
    pdf.savefig()
    plt.show()
    plt.close()
# Confusion Matrix
    plt.figure(figsize=(6, 5))
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
                xticklabels=["Repaid", "Defaulted"], 
                yticklabels=["Repaid", "Defaulted"])
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title("Confusion Matrix")
    plt.tight_layout()
    plt.savefig(my_matrix)
    pdf.savefig()
    plt.show()
    plt.close()

    # Text Summary Page
    fig, ax = plt.subplots(figsize=(8.5, 11))
    ax.axis("off")
    text = f"""
    Model Validation Report

    AUC Score: {auc:.6f}
    Gini Coefficient: {gini:.6f}
    KS Statistic: {ks_stat:.6f} at Threshold = {ks_threshold:.4f}

    Classification Report:
    {report}
    """
    ax.text(0, 1, text, ha="left", va="top", fontsize=12, family="monospace")
    pdf.savefig()
    plt.close()

print("\nROC, Confusion Matrix displayed and saved as PNGs.")
print("Model validation PDF saved as 'Model_Validation.pdf'")
