<a href="https://colab.research.google.com/github/Vernikkumar/CodeAlpha_MachineLearning/blob/main/CreditScoring.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Credit Scoring Model - Simple Version
# Author: Your Name

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# -----------------------------
# Step 1: Create Sample Dataset
# -----------------------------
# (In real projects, load from CSV instead)
np.random.seed(42)
n = 500  # number of samples

data = pd.DataFrame({
    "income": np.random.randint(2000, 10000, n),
    "debts": np.random.randint(0, 5000, n),
    "payment_history": np.random.randint(0, 2, n),  # 0 = bad, 1 = good
    "age": np.random.randint(18, 65, n),
    "Creditworthy": np.random.randint(0, 2, n)      # Target (0 = No, 1 = Yes)
})

print("Sample Data:")
print(data.head())

# -----------------------------
# Step 2: Features and Target
# -----------------------------
X = data.drop("Creditworthy", axis=1)
y = data["Creditworthy"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features (important for Logistic Regression)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# -----------------------------
# Step 3: Train Models
# -----------------------------
models = {
    "Logistic Regression": LogisticRegression(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier()
}

# -----------------------------
# Step 4: Evaluate Models
# -----------------------------
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    roc = roc_auc_score(y_test, y_pred)

    print(f"\n{name}:")
    print(f"Accuracy:  {acc:.2f}")
    print(f"Precision: {prec:.2f}")
    print(f"Recall:    {rec:.2f}")
    print(f"F1-Score:  {f1:.2f}")
    print(f"ROC-AUC:   {roc:.2f}")


Sample Data:
   income  debts  payment_history  age  Creditworthy
0    9270   1895                0   63             0
1    9603   2733                0   40             1
2    2860   3863                0   49             1
3    7390   4721                0   34             0
4    7226   4014                0   34             1

Logistic Regression:
Accuracy:  0.53
Precision: 0.57
Recall:    0.54
F1-Score:  0.55
ROC-AUC:   0.53

Decision Tree:
Accuracy:  0.42
Precision: 0.46
Recall:    0.43
F1-Score:  0.44
ROC-AUC:   0.42

Random Forest:
Accuracy:  0.52
Precision: 0.55
Recall:    0.59
F1-Score:  0.57
ROC-AUC:   0.51
