# **ELEVVO PATHWAYS MACHINE LEARNING INTERNSHIP**
---
# **Task 04: Loan Approval Prediction Description**
---
# **Submitted By: Muhammad Usman**

## **Import all important Libraries**

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE
import warnings
warnings.filterwarnings('ignore')

## **Load the Dataset**

In [None]:
df = pd.read_csv('/content/loan_approval_dataset.csv')
df.head(3)

Unnamed: 0,loan_id,no_of_dependents,education,self_employed,income_annum,loan_amount,loan_term,cibil_score,residential_assets_value,commercial_assets_value,luxury_assets_value,bank_asset_value,loan_status
0,1,2,Graduate,No,9600000,29900000,12,778,2400000,17600000,22700000,8000000,Approved
1,2,0,Not Graduate,Yes,4100000,12200000,8,417,2700000,2200000,8800000,3300000,Rejected
2,3,3,Graduate,No,9100000,29700000,20,506,7100000,4500000,33300000,12800000,Rejected


## **DATA PREPROCESSING**

## **Define Categorial and Numerical Columns**

In [19]:
num_cols = ['no_of_dependents', 'income_annum', 'loan_amount', 'loan_term',
            'cibil_score', 'residential_assets_value', 'commercial_assets_value',
            'luxury_assets_value', 'bank_asset_value']
cat_cols = ['education', 'self_employed']

## **Impute Numerical Columns with median**

In [20]:
df.columns = df.columns.str.strip()
num_imputer = SimpleImputer(strategy='median')
df[num_cols] = num_imputer.fit_transform(df[num_cols])

## **Impute categorical columns with mode of data**

In [21]:
cat_imputer = SimpleImputer(strategy='most_frequent')
df[cat_cols] = cat_imputer.fit_transform(df[cat_cols])

## **Encode categorical variables**

In [22]:
le = LabelEncoder()
for col in cat_cols:
    df[col] = le.fit_transform(df[col])

## **Encode categorical variables**

In [None]:
X = df.drop(['loan_id', 'loan_status'], axis=1)  # Drop loan_id
y = df['loan_status']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## **Feature Scaling**

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## **Function to train and evaluate model**


In [None]:
def evaluate_model(model, X_train, X_test, y_train, y_test, model_name):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"\n{model_name} Results:")
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

## **Train and evaluate Logistic Regression**

In [None]:
lr = LogisticRegression(random_state=42)
evaluate_model(lr, X_train, X_test, y_train, y_test, "Logistic Regression")


Logistic Regression Results:
Confusion Matrix:
[[498  38]
 [ 43 275]]
Classification Report:
              precision    recall  f1-score   support

    Approved       0.92      0.93      0.92       536
    Rejected       0.88      0.86      0.87       318

    accuracy                           0.91       854
   macro avg       0.90      0.90      0.90       854
weighted avg       0.90      0.91      0.90       854



## **Train and evaluate Decision Tree**

In [None]:
dt = DecisionTreeClassifier(random_state=42)
evaluate_model(dt, X_train, X_test, y_train, y_test, "Decision Tree")


Decision Tree Results:
Confusion Matrix:
[[527   9]
 [ 10 308]]
Classification Report:
              precision    recall  f1-score   support

    Approved       0.98      0.98      0.98       536
    Rejected       0.97      0.97      0.97       318

    accuracy                           0.98       854
   macro avg       0.98      0.98      0.98       854
weighted avg       0.98      0.98      0.98       854



## **Initializing SMOTE for handling class imbalance**

In [None]:
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

 ## **Train and evaluate models with SMOTE**

In [None]:
print("\nResults with SMOTE:")
evaluate_model(lr, X_train_smote, X_test, y_train_smote, y_test, "Logistic Regression (SMOTE)")


Results with SMOTE:

Logistic Regression (SMOTE) Results:
Confusion Matrix:
[[486  50]
 [ 28 290]]
Classification Report:
              precision    recall  f1-score   support

    Approved       0.95      0.91      0.93       536
    Rejected       0.85      0.91      0.88       318

    accuracy                           0.91       854
   macro avg       0.90      0.91      0.90       854
weighted avg       0.91      0.91      0.91       854



In [18]:
evaluate_model(dt, X_train_smote, X_test, y_train_smote, y_test, "Decision Tree (SMOTE)")


Decision Tree (SMOTE) Results:
Confusion Matrix:
[[524  12]
 [ 10 308]]
Classification Report:
              precision    recall  f1-score   support

    Approved       0.98      0.98      0.98       536
    Rejected       0.96      0.97      0.97       318

    accuracy                           0.97       854
   macro avg       0.97      0.97      0.97       854
weighted avg       0.97      0.97      0.97       854

