<a href="https://colab.research.google.com/github/Zuhair0000/Loan_Approval_Prediction/blob/main/loan_approval_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Import Libraries**

In [47]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# **Load Dataset**

In [48]:
df = pd.read_csv("loan_approval_dataset.csv")
df.columns = df.columns.str.strip()

In [49]:
df.isnull().sum()

Unnamed: 0,0
loan_id,0
no_of_dependents,0
education,0
self_employed,0
income_annum,0
loan_amount,0
loan_term,0
cibil_score,0
residential_assets_value,0
commercial_assets_value,0


In [50]:
df.columns

Index(['loan_id', 'no_of_dependents', 'education', 'self_employed',
       'income_annum', 'loan_amount', 'loan_term', 'cibil_score',
       'residential_assets_value', 'commercial_assets_value',
       'luxury_assets_value', 'bank_asset_value', 'loan_status'],
      dtype='object')

In [51]:
X = df.drop(columns=["loan_id", "loan_status"])
y = df["loan_status"]

# **Train-Test Split**

In [52]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# **Data Preprocessing**

In [53]:
categorical_features = ["education", "self_employed" ]

numerical_features = ["no_of_dependents", "income_annum", "loan_amount",
                      "loan_term", "cibil_score", "residential_assets_value",
                      "commercial_assets_value", "luxury_assets_value", "bank_asset_value"]

In [54]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [55]:
categorical_enc = Pipeline([
    ("impouter", SimpleImputer(strategy="most_frequent")),
    ("encoder", OneHotEncoder())
])

numerical_enc = Pipeline([
    ("imputer", SimpleImputer(strategy="mean")),
    ("scaler", StandardScaler())
])

preprocessor = ColumnTransformer(transformers=[
    ("cat", categorical_enc, categorical_features),
    ("num", numerical_enc, numerical_features)
])

In [56]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)

# **Model Training**

### **Logistic Regressio**

In [57]:
from sklearn.linear_model import LogisticRegression

lr = Pipeline([
    ("preprocessing", preprocessor),
    ("model", LogisticRegression(random_state=42))
])
lr.fit(X_train, y_train)

In [58]:
lr_pred = lr.predict(X_test)

### **Random Forest**

In [59]:
from sklearn.ensemble import RandomForestClassifier
rf = Pipeline([
    ("preprocessing", preprocessor),
    ("model", RandomForestClassifier(n_estimators=200, criterion='entropy', random_state=42))
])
rf.fit(X_train, y_train)

In [60]:
rf_pred = rf.predict(X_test)

### **XGB model**

In [61]:
from xgboost import XGBClassifier

xgb = Pipeline([
    ("preprocessing", preprocessor),
    ("model", XGBClassifier())
])

xgb.fit(X_train, y_train)

In [62]:
xgb_pred = xgb.predict(X_test)

# **Evaluation & Comparison**

In [63]:
from sklearn.metrics import(
    accuracy_score,
    f1_score,
    confusion_matrix,
    classification_report
    )

In [64]:
def evaluate_model(y_pred, y_test):
    return{
        "Accuracy Score": accuracy_score(y_pred, y_test),
        "F1_score": f1_score(y_pred, y_test),
        "Confusion": confusion_matrix(y_pred, y_test)
    }

In [65]:
lr_result = evaluate_model(lr_pred, y_test)
lr_result

{'Accuracy Score': 0.905152224824356,
 'F1_score': 0.8716323296354992,
 'Confusion': array([[498,  43],
        [ 38, 275]])}

In [66]:
rf_result = evaluate_model(rf_pred, y_test)
rf_result

{'Accuracy Score': 0.9800936768149883,
 'F1_score': 0.9732283464566929,
 'Confusion': array([[528,   9],
        [  8, 309]])}

In [67]:
xgb_result = evaluate_model(xgb_pred, y_test)
xgb_result

{'Accuracy Score': 0.9824355971896955,
 'F1_score': 0.976303317535545,
 'Confusion': array([[530,   9],
        [  6, 309]])}

In [68]:
results_df = pd.DataFrame({
    "Logistic Regression": lr_result,
    "Random Forest": rf_result,
    "Gradient Boosting": xgb_result
}).T

results_df

Unnamed: 0,Accuracy Score,F1_score,Confusion
Logistic Regression,0.905152,0.871632,"[[498, 43], [38, 275]]"
Random Forest,0.980094,0.973228,"[[528, 9], [8, 309]]"
Gradient Boosting,0.982436,0.976303,"[[530, 9], [6, 309]]"
