In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load the data
data = pd.read_csv("balanced_loan_dataset.csv", encoding='ISO-8859-1')

# Drop unwanted columns
data = data.drop(columns=[
    'Unnamed: 0', 'id', 'member_id', 
    'open_acc', 'funded_amnt_inv', 'next_pymnt_d',
    'last_credit_pull_d', 'last_pymnt_d', 'emp_length',
    'mths_since_last_delinq', 'issue_d'
], errors='ignore')

# Fix percentage column
data['revol_util'] = data['revol_util'].astype(str).str.replace('%', '')
data['revol_util'] = pd.to_numeric(data['revol_util'], errors='coerce')

# Fill missing values in 'revol_util' with the mean
mean_value = data['revol_util'].mean()
data['revol_util'] = data['revol_util'].fillna(mean_value)



# Drop any rows that still have missing values
data = data.dropna()

# Use only these columns for training and prediction
selected_features = ['loan_amnt', 'int_rate', 'installment', 'annual_inc', 'dti', 'revol_util', 'total_pymnt']

# Define X and y
X = data[selected_features]
y = data["repay_fail"]

# Encode text columns into numbers
for column in data.select_dtypes(include='object').columns:
    data[column] = LabelEncoder().fit_transform(data[column].astype(str))


# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=15)

In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
predictions = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, predictions))

Accuracy: 0.906


In [45]:
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier(
    random_state=21,max_depth=10)
model.fit(X_train, y_train)
predictions = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, predictions))

Accuracy: 0.9456666666666667


In [49]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(
    max_depth=10,)
model.fit(X_train, y_train)
predictions = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, predictions))

Accuracy: 0.9400833333333334


In [33]:
import xgboost as xgb

model = xgb.XGBClassifier(random_state=21)
model.fit(X_train, y_train)
predictions = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, predictions))

Accuracy: 0.974


In [51]:
import gradio as gr
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from fpdf import FPDF

# === Final Prediction Function with Reasoning ===
def calculate_and_predict(loan_amnt, int_rate, installment, annual_income, 
                          monthly_debt, credit_balance, credit_limit, term_months):

    # Derived Metrics
    dti = round((monthly_debt / (annual_income / 12)) * 100, 2)
    revol_util = round((credit_balance / credit_limit) * 100, 2)
    total_pymnt = round(loan_amnt + (loan_amnt * int_rate * term_months / (100 * 12)), 2)

    # Prediction Input
    input_data = [[loan_amnt, int_rate, installment, annual_income, dti, revol_util, total_pymnt]]
    prediction = model.predict(input_data)[0]

    try:
        probs = model.predict_proba(input_data)[0]
    except:
        probs = [0.5, 0.5]

    # Generate Pie Chart
    labels = ['🟢 Repaid (0)', '🔴 Defaulted (1)']
    fig, ax = plt.subplots()
    ax.pie(probs, labels=labels, autopct='%1.1f%%', colors=['#aed581', '#ef9a9a'])
    ax.set_title("Prediction Probability Breakdown")
    chart_path = "prediction_chart.png"
    fig.savefig(chart_path)
    plt.close(fig)

    # Final Prediction Message
    plain_result = "Loan will be REPAID" if prediction == 0 else "Loan will DEFAULT"
    final_result = f"<div style='color:{'#2e7d32' if prediction == 0 else '#c62828'};font-weight:bold;font-size:18px;'>🟢 {plain_result}</div>"

    # Reasoning
    reasons = []
    if dti < 35:
        reasons.append("✅ Low Debt-to-Income Ratio")
    else:
        reasons.append("⚠️ High Debt-to-Income Ratio")

    if revol_util < 30:
        reasons.append("✅ Low Revolving Credit Usage")
    else:
        reasons.append("⚠️ High Credit Usage")

    if annual_income > 100000:
        reasons.append("✅ Good Annual Income")
    else:
        reasons.append("⚠️ Low Annual Income")

    if int_rate < 15 and installment < 0.4 * (annual_income / 12):
        reasons.append("✅ Affordable Interest and EMI")
    else:
        reasons.append("⚠️ Risky Interest/EMI")

    reason_block = "<br>".join(reasons)

    return f"{dti}%", f"{revol_util}%", f"₹{total_pymnt}", final_result, reason_block, chart_path

# === Gradio UI ===
with gr.Blocks() as ui:
    gr.Markdown("# 💼 Loan Default Prediction Tool")
    gr.Markdown("Use the form on the left to enter loan application details. Results will be displayed on the right.")

    with gr.Row():
        with gr.Column():
            annual_income = gr.Number(label="🧾 Annual Income (₹)")
            monthly_debt = gr.Number(label="💳 Total Monthly Debt (₹)")
            credit_balance = gr.Number(label="🔄 Credit Balance (₹)")
            credit_limit = gr.Number(label="🏦 Credit Limit (₹)")
            loan_amnt = gr.Number(label="💰 Loan Amount (₹)")
            int_rate = gr.Number(label="📈 Interest Rate (%)")
            term_months = gr.Number(label="⏳ Loan Term (months)")
            installment = gr.Number(label="📆 Installment (₹/month)")
            submit = gr.Button("🔍 Predict Now")

        with gr.Column():
            dti_out = gr.Textbox(label="📉 DTI (%)")
            revol_out = gr.Textbox(label="🔁 Revolving Utilization (%)")
            total_pymnt_out = gr.Textbox(label="💸 Total Payment Made")
            pred_out = gr.HTML(label="🎯 Final Prediction")
            reason_out = gr.HTML(label="📌 Reason for Decision")
            chart_out = gr.Image(label="📈 Prediction Chart")

    submit.click(
        fn=calculate_and_predict,
        inputs=[loan_amnt, int_rate, installment, annual_income, monthly_debt, credit_balance, credit_limit, term_months],
        outputs=[dti_out, revol_out, total_pymnt_out, pred_out, reason_out, chart_out]
    )

ui.launch(share=True)


* Running on local URL:  http://127.0.0.1:7864
* Running on public URL: https://07634f5a457c4eaaf9.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


