<a href="https://colab.research.google.com/github/Venkat18-bit/Credit_Scoring_Model/blob/main/Credit_Scoring_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install gradio --quiet

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# Set random seed for reproducibility
np.random.seed(42)

# Generate synthetic dataset with 1000 samples
n_samples = 1000
data = {
    'income': np.random.normal(50000, 15000, n_samples),
    'debts': np.random.exponential(20000, n_samples),
    'payment_history': np.random.poisson(2, n_samples),
    'credit_utilization': np.random.uniform(0, 1, n_samples)
}
df = pd.DataFrame(data)

# Simulate target: creditworthy (1) if income > 40k, debts < 30k, payment_history < 3, utilization < 0.5
conditions = (
    (df['income'] > 40000) & (df['debts'] < 30000) &
    (df['payment_history'] < 3) & (df['credit_utilization'] < 0.5)
)
df['creditworthy'] = conditions.astype(int)

# Feature engineering: Add derived features
df['debt_to_income_ratio'] = df['debts'] / df['income']
df['total_late_payments_score'] = df['payment_history'] * 10

# Split into train/test (80/20)
X = df.drop('creditworthy', axis=1)
y = df['creditworthy']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Dataset created and split. Shape:", df.shape)
df.head()

Dataset created and split. Shape: (1000, 7)


Unnamed: 0,income,debts,payment_history,credit_utilization,creditworthy,debt_to_income_ratio,total_late_payments_score
0,57450.712295,3666.022703,1,0.027107,1,0.063812,10
1,47926.035482,2208.976346,0,0.783502,0,0.046091,0
2,59715.328072,20235.682294,3,0.010513,0,0.338869,30
3,72845.447846,24515.89879,2,0.76189,0,0.336547,20
4,46487.699379,641.914935,0,0.893304,0,0.013808,0


In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, classification_report
from sklearn.preprocessing import StandardScaler
import joblib

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train models
models = {
    'Logistic Regression': LogisticRegression(random_state=42),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42)
}

trained_models = {}
evaluations = {}

for name, model in models.items():
    if name == 'Logistic Regression':
        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)
        y_prob = model.predict_proba(X_test_scaled)[:, 1]
    else:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        y_prob = model.predict_proba(X_test)[:, 1]

    trained_models[name] = model
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_prob)
    evaluations[name] = {'Precision': precision, 'Recall': recall, 'F1-Score': f1, 'ROC-AUC': roc_auc}

# Print evaluations
for name, metrics in evaluations.items():
    print(f"\n{name} Metrics:")
    for metric, value in metrics.items():
        print(f"{metric}: {value:.4f}")


Logistic Regression Metrics:
Precision: 0.7273
Recall: 0.5854
F1-Score: 0.6486
ROC-AUC: 0.9449

Decision Tree Metrics:
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
ROC-AUC: 1.0000

Random Forest Metrics:
Precision: 1.0000
Recall: 0.9268
F1-Score: 0.9620
ROC-AUC: 1.0000


In [None]:
import gradio as gr
import numpy as np

def predict_creditworthiness(income, debts, payment_history, credit_utilization, model_name):
    # Feature engineering
    debt_to_income = debts / income if income > 0 else 0
    total_late_score = payment_history * 10

    # Create input vector
    input_data = np.array([[income, debts, payment_history, credit_utilization, debt_to_income, total_late_score]])

    # Scale input
    input_scaled = scaler.transform(input_data)

    # Get model
    model = trained_models[model_name]
    if model_name == 'Logistic Regression':
        prediction = model.predict(input_scaled)[0]
        probability = model.predict_proba(input_scaled)[0][1]
    else:
        prediction = model.predict(input_data)[0]
        probability = model.predict_proba(input_data)[0][1]

    result = "Creditworthy" if prediction == 1 else "Not Creditworthy"
    return f"Prediction: {result} (Probability: {probability:.2%})"

def show_evaluations(model_name):
    metrics = evaluations[model_name]
    return f"Precision: {metrics['Precision']:.4f}\nRecall: {metrics['Recall']:.4f}\nF1-Score: {metrics['F1-Score']:.4f}\nROC-AUC: {metrics['ROC-AUC']:.4f}"

# Gradio interface
with gr.Blocks(title="Credit Scoring Model") as demo:
    gr.Markdown("# Credit Scoring Model")
    gr.Markdown("Predict creditworthiness using financial data. Select a model and input values.")

    with gr.Row():
        with gr.Column():
            model_choice = gr.Dropdown(choices=list(trained_models.keys()), label="Select Model", value="Random Forest")
            income_input = gr.Number(label="Income (USD)", value=50000)
            debts_input = gr.Number(label="Debts (USD)", value=10000)
            payment_history_input = gr.Number(label="Late Payments (last year)", value=1)
            credit_utilization_input = gr.Slider(0, 1, label="Credit Utilization Ratio", value=0.3)
            predict_btn = gr.Button("Predict")

        with gr.Column():
            output = gr.Textbox(label="Prediction Result")
            eval_btn = gr.Button("Show Model Evaluation")
            eval_output = gr.Textbox(label="Metrics (Precision, Recall, F1, ROC-AUC)")

    predict_btn.click(predict_creditworthiness, inputs=[income_input, debts_input, payment_history_input, credit_utilization_input, model_choice], outputs=output)
    eval_btn.click(show_evaluations, inputs=model_choice, outputs=eval_output)

# Launch Gradio (public URL provided by Colab)
demo.launch(debug=True, share=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://364c464c68a89cb59a.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


