In [1]:
# ================================
# Loan Approval Prediction using CatBoost (Best for this dataset)
# ================================

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

from catboost import CatBoostClassifier

# 1. Load Dataset
df = pd.read_csv("loan_data_set.csv")

# 2. Drop Loan_ID
df.drop(columns=["Loan_ID"], inplace=True)

# 3. Handle Missing Values
for col in df.columns:
    if df[col].dtype == "object":
        df[col].fillna(df[col].mode()[0], inplace=True)
    else:
        df[col].fillna(df[col].median(), inplace=True)

# 4. Target Encoding
y = df["Loan_Status"].map({"N": 0, "Y": 1})
X = df.drop("Loan_Status", axis=1)

# 5. Identify categorical features
cat_features = [i for i, col in enumerate(X.columns) if X[col].dtype == "object"]

# 6. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

# 7. Model
model = CatBoostClassifier(
    iterations=600,
    depth=6,
    learning_rate=0.03,
    loss_function="Logloss",
    eval_metric="F1",
    class_weights=[1.3, 1.0],  # penalize false approvals
    random_state=42,
    verbose=False
)

# 8. Train
model.fit(X_train, y_train, cat_features=cat_features)

# 9. Predict
y_pred = model.predict(X_test)

# 10. Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting value

Accuracy: 0.8292682926829268

Confusion Matrix:
 [[24 14]
 [ 7 78]]

Classification Report:
               precision    recall  f1-score   support

           0       0.77      0.63      0.70        38
           1       0.85      0.92      0.88        85

    accuracy                           0.83       123
   macro avg       0.81      0.77      0.79       123
weighted avg       0.83      0.83      0.82       123

