In [1]:
# ============================================
# MODEL TRAINING SCRIPT (train_model.py)
# ============================================
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import SMOTE
import joblib

# Load data
data = pd.read_excel("credit_card_data1.xlsx")
data = data.drop(columns=['ID'])
target = 'default payment next month'

# Feature engineering
data['AVG_BILL_AMT'] = data[['BILL_AMT1','BILL_AMT2','BILL_AMT3','BILL_AMT4','BILL_AMT5','BILL_AMT6']].mean(axis=1)
data['AVG_PAY_AMT'] = data[['PAY_AMT1','PAY_AMT2','PAY_AMT3','PAY_AMT4','PAY_AMT5','PAY_AMT6']].mean(axis=1)
data['PAY_RATIO'] = data['AVG_PAY_AMT'] / (data['AVG_BILL_AMT'] + 1)
data['LIMIT_USAGE'] = data['AVG_BILL_AMT'] / (data['LIMIT_BAL'] + 1)
data['PAY_HISTORY_MEAN'] = data[['PAY_0','PAY_2','PAY_3','PAY_4','PAY_5','PAY_6']].mean(axis=1)

# Define features and target
X = data.drop(columns=[target])
y = data[target]

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Scale
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# SMOTE
sm = SMOTE(random_state=42)
X_res, y_res = sm.fit_resample(X_train_scaled, y_train)

# Train model
rf = RandomForestClassifier(n_estimators=200, random_state=42)
rf.fit(X_res, y_res)

# Save model and scaler
joblib.dump(rf, "credit_default_model.pkl")
joblib.dump(scaler, "scaler.pkl")
joblib.dump(X.columns.tolist(), "feature_columns.pkl")

print("‚úÖ Model, Scaler, and Feature Columns saved successfully!")


‚úÖ Model, Scaler, and Feature Columns saved successfully!


In [5]:
# ============================================
# FIXED DASH APP ‚Äî Credit Card Default Predictor
# ============================================
import dash
from dash import dcc, html
from dash.dependencies import Input, Output, State
import pandas as pd
import numpy as np
import joblib

# Load trained model and preprocessing tools
model = joblib.load("credit_default_model.pkl")
scaler = joblib.load("scaler.pkl")
feature_cols = joblib.load("feature_columns.pkl")

# Initialize app
app = dash.Dash(__name__)
app.title = "Credit Card Default Predictor"

# Layout
app.layout = html.Div([
    html.H2("üí≥ Credit Card Default Prediction", style={'textAlign': 'center'}),

    html.Div([
        html.Label("LIMIT_BAL"),
        dcc.Input(id="LIMIT_BAL", type="number", value=50000),

        html.Label("AGE"),
        dcc.Input(id="AGE", type="number", value=35),

        html.Label("SEX (1=Male, 2=Female)"),
        dcc.Input(id="SEX", type="number", value=2),

        html.Label("EDUCATION (1=Grad, 2=Uni, 3=HS, etc.)"),
        dcc.Input(id="EDUCATION", type="number", value=2),

        html.Label("MARRIAGE (1=Married, 2=Single)"),
        dcc.Input(id="MARRIAGE", type="number", value=1),

        html.Label("PAY_0 (Most recent payment delay, -1=On-time, 0=OK, 1+=Delay)"),
        dcc.Input(id="PAY_0", type="number", value=0),

        html.Label("AVG_BILL_AMT (Average Bill Amount)"),
        dcc.Input(id="AVG_BILL_AMT", type="number", value=20000),

        html.Label("AVG_PAY_AMT (Average Payment Amount)"),
        dcc.Input(id="AVG_PAY_AMT", type="number", value=5000),
    ], style={'display': 'grid', 'gridTemplateColumns': '1fr 1fr', 'gap': '10px'}),

    html.Br(),
    html.Button("Predict Default", id="predict_btn", n_clicks=0, style={'width': '100%', 'padding': '10px'}),
    html.Br(), html.Br(),
    html.Div(id="prediction_output", style={'textAlign': 'center', 'fontSize': 20, 'fontWeight': 'bold'})
])

# Prediction callback
@app.callback(
    Output("prediction_output", "children"),
    [Input("predict_btn", "n_clicks")],
    [
        State("LIMIT_BAL", "value"),
        State("AGE", "value"),
        State("SEX", "value"),
        State("EDUCATION", "value"),
        State("MARRIAGE", "value"),
        State("PAY_0", "value"),
        State("AVG_BILL_AMT", "value"),
        State("AVG_PAY_AMT", "value")
    ]
)
def predict_default(n_clicks, LIMIT_BAL, AGE, SEX, EDUCATION, MARRIAGE, PAY_0, AVG_BILL_AMT, AVG_PAY_AMT):
    if n_clicks == 0:
        return ""

    # Derived features
    PAY_RATIO = AVG_PAY_AMT / (AVG_BILL_AMT + 1)
    LIMIT_USAGE = AVG_BILL_AMT / (LIMIT_BAL + 1)
    PAY_HISTORY_MEAN = PAY_0  # Simplified: using recent delay as proxy

    # Construct full feature vector dynamically
    # Fill missing PAY_2‚ÄìPAY_6 etc. with reasonable placeholders (0 or mean)
    input_dict = {
        'LIMIT_BAL': LIMIT_BAL,
        'SEX': SEX,
        'EDUCATION': EDUCATION,
        'MARRIAGE': MARRIAGE,
        'AGE': AGE,
        'PAY_0': PAY_0,
        'PAY_2': 0, 'PAY_3': 0, 'PAY_4': 0, 'PAY_5': 0, 'PAY_6': 0,
        'BILL_AMT1': AVG_BILL_AMT, 'BILL_AMT2': AVG_BILL_AMT, 'BILL_AMT3': AVG_BILL_AMT,
        'BILL_AMT4': AVG_BILL_AMT, 'BILL_AMT5': AVG_BILL_AMT, 'BILL_AMT6': AVG_BILL_AMT,
        'PAY_AMT1': AVG_PAY_AMT, 'PAY_AMT2': AVG_PAY_AMT, 'PAY_AMT3': AVG_PAY_AMT,
        'PAY_AMT4': AVG_PAY_AMT, 'PAY_AMT5': AVG_PAY_AMT, 'PAY_AMT6': AVG_PAY_AMT,
        'AVG_BILL_AMT': AVG_BILL_AMT,
        'AVG_PAY_AMT': AVG_PAY_AMT,
        'PAY_RATIO': PAY_RATIO,
        'LIMIT_USAGE': LIMIT_USAGE,
        'PAY_HISTORY_MEAN': PAY_HISTORY_MEAN
    }

    # Create DataFrame with same columns as training
    df_input = pd.DataFrame([input_dict])
    df_input = df_input.reindex(columns=feature_cols, fill_value=0)

    # Scale + predict
    scaled_input = scaler.transform(df_input)
    pred = model.predict(scaled_input)[0]
    prob = model.predict_proba(scaled_input)[0][1]

    result = "‚ö†Ô∏è High Risk of Default" if pred == 1 else "‚úÖ Low Risk (No Default)"
    return f"{result} (Probability: {prob:.2f})"

# Run app
if __name__ == '__main__':
    app.run(debug=True)
