In [1]:
# IMPORTING THE NECESSARY LIBRARIES

import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder         # USED FOR ENCODING THE STRING VALUS
from sklearn.model_selection import train_test_split   # USED FOR SPLITING DATAS FOR TRAINING AND TESTING
from sklearn.linear_model import LogisticRegression    # UESD FOR IMPORTING LODISTIC REGRESSION MODEL
from sklearn.tree import DecisionTreeClassifier        # UESD FOR IMPORTING DECISION TREE MODEL
from sklearn.ensemble import RandomForestClassifier    # UESD FOR IMPORTING RANDOM FOREST CLASSIFIER MODEL
from sklearn.metrics import accuracy_score as ac       # USED FOR ACCURACY TEST
from sklearn.metrics import confusion_matrix           # USED TO CREATE CONFUSION MATRIX

In [2]:
# LOADING THE DATASET AND DROPING COLUMN OF "Loan_ID" WHICH IS NOT NECESSARY.

data = pd.read_excel("loan-predictionUC.csv.xlsx")
data = data.drop(columns="Loan_ID")

In [None]:
# VIEWING THE COLUMN NAMES

data.columns.values.tolist()

In [4]:
# DROPING THE NaN VALUES FROM THE DATASET

data = data.dropna()

In [5]:
# MAKING DUMMIES FOR "Dependents" COLUMN.

data = pd.get_dummies(data, columns=["Dependents"])

In [6]:
# CONVERTING STRING VALUES TO INTEGER

le = LabelEncoder()
data["Gender"] = le.fit_transform(data["Gender"])                # FEMALE=0 , MALE=1
data["Married"] = le.fit_transform(data["Married"])              # NO =0 , YES =1 
data["Education"] = le.fit_transform(data["Education"])          # Graduate=0 , NOT Graduate=1
data["Self_Employed"] = le.fit_transform(data["Self_Employed"])  # NO =0 , YES = 1 
data["Property_Area"] = le.fit_transform(data["Property_Area"])  # Rural=0 ,Semi-urban =1 ,Semiurban =2 , Urban=3 

In [None]:
# DEFINING THE FEATURE AND LABEL DATASETS AND SPLITING THE DATASET TO TRAIN AND TEST DATAS

feature = data.drop(columns='Loan_Status')
label = data['Loan_Status']
feature_train,feature_test,label_train,label_test = train_test_split(feature,label,test_size=0.25)

In [None]:
print(feature_train.shape)
print(feature_test.shape)
print(label_train.shape)
print(label_test.shape)

In [None]:
# DEFINING THE ML MODELS 
LR = LogisticRegression()
DT = DecisionTreeClassifier()
RF = RandomForestClassifier()

# TRAINING THE MODELS

LR.fit(feature_train,label_train)
DT.fit(feature_train,label_train)
RF.fit(feature_train,label_train)


In [None]:
# CHECKING THE TRAINING DATA ACCURACY
print("TRAIN DATA ACCURACY")
print("Logistic Regression:",ac(label_train,LR.predict(feature_train)))
print("Decision Tree:",ac(label_train,DT.predict(feature_train)))
print("Random Forest:",ac(label_train,RF.predict(feature_train)))

In [None]:
# CHECKING THE TESTING DATA ACCURACY

print("TEST DATA ACCURACY")
print("Logistic Regression:",ac(label_test,LR.predict(feature_test)))
print("Decision Tree:",ac(label_test,DT.predict(feature_test))) 
print("Random Forest:",ac(label_test,RF.predict(feature_test)))

In [None]:
# MAKING THE PREDICTION DATASET

new_data = np.array([1, 1, 0, 1, 3000, 0, 120.0, 360.0, 1.0, 3,1,0,0,0])

new_data = new_data.reshape(1, -1)
new_data = np.repeat(new_data, 120, axis=0)

# MAKING PREDICTIONS USING THE MODELS
lr_prediction = LR.predict(new_data)
dt_prediction = DT.predict(new_data)
rf_prediction = RF.predict(new_data)

# PRINTING THE PREDICTIONS

print("Logistic Regression:", lr_prediction)
print("Decision Tree:", dt_prediction)
print("Random Forest:", rf_prediction)



In [None]:
# CREATING THE CONFUSION MATRICS FOR THE MODELS

lr_cm = confusion_matrix(label_test, lr_prediction)
dt_cm = confusion_matrix(label_test, dt_prediction)
rf_cm = confusion_matrix(label_test, rf_prediction)

lr_table = pd.DataFrame(lr_cm, columns=['True Positive', 'False Positive'], index=['Loan Approved', 'Loan Denied'])
dt_table = pd.DataFrame(dt_cm, columns=['True Positive', 'False Positive'], index=['Loan Approved', 'Loan Denied'])
rf_table = pd.DataFrame(rf_cm, columns=['True Positive', 'False Positive'], index=['Loan Approved', 'Loan Denied'])

print("Logistic Regression:")
print(lr_table.to_string())
print("----------------------------------------------")
print("Decision Tree:")
print(dt_table.to_string())
print("----------------------------------------------")
print("Random Forest:")
print(rf_table.to_string())
print("----------------------------------------------")