In [4]:
import os
import dash
import uuid
from dash import dcc, html
from dash.dependencies import Input, Output
import pandas as pd
import joblib
import numpy as np
import shap
import matplotlib.pyplot as plt

# Load the pre-trained model
model_path = r"C:\Users\david\Documents\School\DSC 450\loan_default_xgb_model.pkl"
try:
    model = joblib.load(model_path)
except FileNotFoundError:
    raise FileNotFoundError(f"Model file not found at {model_path}. Ensure the file path is correct.")

# Define the feature columns used during model training
training_features = [
    "Cert", "Fund", "Credit Score", "Loan Amount", "Income",
    "Debt-to-Income Ratio", "Past Defaults", "Employment Status_Self-Employed",
    "Employment Status_Unemployed", "Credit Score Category_Low",
    "Credit Score Category_Medium", "Income Category_Low", "Income Category_Medium",
    "Employment Status Simplified_Non-Employed"
]

# Ensure 'assets' directory exists
os.makedirs("assets", exist_ok=True)

# Function to generate SHAP plots with unique filenames
def generate_shap_plot(input_data, model):
    explainer = shap.TreeExplainer(model)
    shap_values = explainer(input_data)

    # Ensure SHAP values are properly indexed
    shap_values_array = shap_values.values if hasattr(shap_values, "values") else shap_values

    # Create SHAP summary plot
    plt.figure(figsize=(8, 6))
    shap.summary_plot(shap_values_array, input_data, show=False)

    # Generate a unique filename for each SHAP plot
    unique_filename = f"shap_summary_plot_{uuid.uuid4().hex}.png"
    shap_plot_path = os.path.join("assets", unique_filename)

    # Save the SHAP plot
    plt.savefig(shap_plot_path, bbox_inches="tight")
    plt.close()

    return shap_plot_path  # Return the saved plot path

# Initialize the Dash app
app = dash.Dash(__name__)

# App layout
app.layout = html.Div([
    html.H1("Loan Default Prediction App"),
    html.P("Enter details below to predict whether a loan applicant will default:"),
    
    html.Div([
        html.Label("Credit Score:"),
        dcc.Input(id="credit_score", type="number", placeholder="Enter credit score"),
        
        html.Label("Loan Amount ($):"),
        dcc.Input(id="loan_amount", type="number", placeholder="Enter loan amount"),
        
        html.Label("Employment Status:"),
        dcc.Dropdown(
            id="employment_status",
            options=[
                {"label": "Employed", "value": "Employed"},
                {"label": "Self-Employed", "value": "Self-Employed"},
                {"label": "Unemployed", "value": "Unemployed"}
            ],
            placeholder="Select employment status"
        ),
        
        html.Label("Income ($):"),
        dcc.Input(id="income", type="number", placeholder="Enter income"),
        
        html.Label("Debt-to-Income Ratio:"),
        dcc.Input(id="dti_ratio", type="number", placeholder="Enter DTI ratio", step=0.01),
        
        html.Label("Past Defaults:"),
        dcc.Input(id="past_defaults", type="number", placeholder="Enter number of past defaults")
    ]),

    html.Button("Predict", id="predict_button", n_clicks=0),

    html.Div(id="prediction_output", style={"margin-top": "20px", "font-weight": "bold"}),

    html.Img(id="shap_plot", style={"margin-top": "20px", "width": "600px"})  # Display SHAP plot
])

# Callback to process input and predict
@app.callback(
    [Output("prediction_output", "children"),
     Output("shap_plot", "src")],  # Update SHAP plot dynamically
    [Input("predict_button", "n_clicks")],
    [Input("credit_score", "value"),
     Input("loan_amount", "value"),
     Input("employment_status", "value"),
     Input("income", "value"),
     Input("dti_ratio", "value"),
     Input("past_defaults", "value")]
)
def predict_default(n_clicks, credit_score, loan_amount, employment_status, income, dti_ratio, past_defaults):
    if n_clicks > 0:
        past_defaults_scaled = np.exp(past_defaults) if past_defaults is not None else 1.0
        
        employment_status_mapping = {
            "Employed": {"Employment Status_Self-Employed": 0, "Employment Status_Unemployed": 0},
            "Self-Employed": {"Employment Status_Self-Employed": 1, "Employment Status_Unemployed": 0},
            "Unemployed": {"Employment Status_Self-Employed": 0, "Employment Status_Unemployed": 1}
        }
        employment_encoded = employment_status_mapping.get(employment_status, {
            "Employment Status_Self-Employed": 0,
            "Employment Status_Unemployed": 0
        })

        if credit_score < 600:
            credit_score_cat = {"Credit Score Category_Low": 1, "Credit Score Category_Medium": 0}
        elif 600 <= credit_score < 750:
            credit_score_cat = {"Credit Score Category_Low": 0, "Credit Score Category_Medium": 1}
        else:
            credit_score_cat = {"Credit Score Category_Low": 0, "Credit Score Category_Medium": 0}

        if income < 50000:
            income_cat = {"Income Category_Low": 1, "Income Category_Medium": 0}
        elif 50000 <= income < 100000:
            income_cat = {"Income Category_Low": 0, "Income Category_Medium": 1}
        else:
            income_cat = {"Income Category_Low": 0, "Income Category_Medium": 0}

        employment_simplified = {
            "Employment Status Simplified_Non-Employed": 1 if employment_status != "Employed" else 0
        }

        input_data = pd.DataFrame([{
            "Cert": 0,  # Placeholder
            "Fund": 0,  # Placeholder
            "Credit Score": credit_score,
            "Loan Amount": loan_amount,
            "Income": income,
            "Debt-to-Income Ratio": dti_ratio,
            "Past Defaults": past_defaults_scaled,
            **employment_encoded,
            **credit_score_cat,
            **income_cat,
            **employment_simplified
        }])

        input_data = input_data[training_features]

        # Ensure SHAP receives a numpy array
        input_data_np = input_data.values

        prediction = model.predict(input_data_np)[0]
        prediction_prob = model.predict_proba(input_data_np)[0][1]

        # Generate SHAP plot and get the saved file path
        shap_plot_path = generate_shap_plot(input_data_np, model)

        result_text = (
            f"The applicant is likely to default (Probability: {prediction_prob:.2f})"
            if prediction == 1 or prediction_prob > 0.5
            else f"The applicant is unlikely to default (Probability: {prediction_prob:.2f})"
        )

        return result_text, f"/assets/{os.path.basename(shap_plot_path)}"  # Return relative path

    return "Enter details and click 'Predict' to see the result.", ""

# Run the app
if __name__ == "__main__":
    app.run_server(debug=True, port=8051)
