<a href="https://colab.research.google.com/github/ankur-omar/loan-approval-prediction/blob/master/Loan_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

  # **`Loan Prediction Project Using Neural network`**




In [56]:
#import some neccessary library
import numpy as np
import pandas as pd


# **Data Preprocessing**

In [57]:
#upload the data set 
data =pd.read_csv("loan_training.csv")


In [58]:
#check top 5 record in data
data.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


In [59]:
#check the datatype of features
data.dtypes

Loan_ID               object
Gender                object
Married               object
Dependents            object
Education             object
Self_Employed         object
ApplicantIncome        int64
CoapplicantIncome    float64
LoanAmount           float64
Loan_Amount_Term     float64
Credit_History       float64
Property_Area         object
Loan_Status           object
dtype: object

In [60]:
#check the shape of data
data.shape

(614, 13)

In [61]:
#check missing values in columns
data.isnull().sum()

Loan_ID               0
Gender               13
Married               3
Dependents           15
Education             0
Self_Employed        32
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
Loan_Status           0
dtype: int64

# **Filling missing Values**

# 1--> Categorical- using mode


In [62]:
# filling missing values of categorical variables with mode
data["Gender"].fillna(data["Gender"].mode()[0],inplace =True)
data["Married"].fillna(data["Married"].mode()[0],inplace =True)
data["Dependents"].fillna(data["Dependents"].mode()[0],inplace =True)
data["Self_Employed"].fillna(data["Self_Employed"].mode()[0],inplace =True)
#data["LoanAmount"].fillna(data["LoanAmount"].mode()[0],inplace =True)
data["Credit_History"].fillna(data["Credit_History"].mode()[0],inplace =True)
data["Loan_Amount_Term"].fillna(data["Loan_Amount_Term"].mode()[0],inplace =True)

In [63]:
# for Continuous data with mean

data["LoanAmount"].fillna(data["LoanAmount"].mean(),inplace =True)

In [64]:
data.isnull().sum()

Loan_ID              0
Gender               0
Married              0
Dependents           0
Education            0
Self_Employed        0
ApplicantIncome      0
CoapplicantIncome    0
LoanAmount           0
Loan_Amount_Term     0
Credit_History       0
Property_Area        0
Loan_Status          0
dtype: int64

# **Convert Categories into Number**


In [65]:
#converting categories into number using map function

data["Gender"] =data["Gender"].map({"Male":0,"Female":1})
data["Married"] =data["Married"].map({"Yes":1,"No":0})
data["Dependents"] =data["Dependents"].map({"0":0,"1":1,"2":2,"3+":3})
data["Education"] =data["Education"].map({"Graduate":1,"Not Graduate":0})
data["Self_Employed"] =data["Self_Employed"].map({"Yes":1,"No":0})
data["Property_Area"] =data["Property_Area"].map({"Rural":0,"Semiurban":1,"Urban":3})
data["Loan_Status"] =data["Loan_Status"].map({"Y":1,"N":0})


In [66]:
data.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,0,0,0,1,0,5849,0.0,146.412162,360.0,1.0,3,1
1,LP001003,0,1,1,1,0,4583,1508.0,128.0,360.0,1.0,0,0
2,LP001005,0,1,0,1,1,3000,0.0,66.0,360.0,1.0,3,1
3,LP001006,0,1,0,0,0,2583,2358.0,120.0,360.0,1.0,3,1
4,LP001008,0,0,0,1,0,6000,0.0,141.0,360.0,1.0,3,1


# **Bringing all the variable in range 0 to 1**

# Xnorm =X-X_min/X_max-X_min

In [67]:
#bringing variable in the range 0 to 1
for i in data.columns[1:]:
  data[i] =(data[i]-data[i].min())/(data[i].max()-data[i].min())


In [68]:
data.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,0.0,0.0,0.0,1.0,0.0,0.070489,0.0,0.19886,0.74359,1.0,1.0,1.0
1,LP001003,0.0,1.0,0.333333,1.0,0.0,0.05483,0.036192,0.172214,0.74359,1.0,0.0,0.0
2,LP001005,0.0,1.0,0.0,1.0,1.0,0.03525,0.0,0.082489,0.74359,1.0,1.0,1.0
3,LP001006,0.0,1.0,0.0,0.0,0.0,0.030093,0.056592,0.160637,0.74359,1.0,1.0,1.0
4,LP001008,0.0,0.0,0.0,1.0,0.0,0.072356,0.0,0.191027,0.74359,1.0,1.0,1.0


In [69]:
#saving the preprocessed data
data.to_csv("pre_processed_loan_data.csv",index =False)

# **Steps to Build a Neural Network Using Keras**

**1->** Loading the dataset
**2->** Creating training and validation set
**3->** Defining the architecture of the model
**4->** Compiling the Model(define loss function,optimizer)
**5->** Training the model
**6->** Evaluating model performance on training and validation set

# **1-> Loading the Dataset**

In [70]:
import matplotlib.pyplot as plt
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
%matplotlib inline


In [71]:
#Loading Pre-processed dataset
data1 =pd.read_csv("pre_processed_loan_data.csv")


In [72]:
data1.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,0.0,0.0,0.0,1.0,0.0,0.070489,0.0,0.19886,0.74359,1.0,1.0,1.0
1,LP001003,0.0,1.0,0.333333,1.0,0.0,0.05483,0.036192,0.172214,0.74359,1.0,0.0,0.0
2,LP001005,0.0,1.0,0.0,1.0,1.0,0.03525,0.0,0.082489,0.74359,1.0,1.0,1.0
3,LP001006,0.0,1.0,0.0,0.0,0.0,0.030093,0.056592,0.160637,0.74359,1.0,1.0,1.0
4,LP001008,0.0,0.0,0.0,1.0,0.0,0.072356,0.0,0.191027,0.74359,1.0,1.0,1.0


In [73]:
data1.isnull().sum()

Loan_ID              0
Gender               0
Married              0
Dependents           0
Education            0
Self_Employed        0
ApplicantIncome      0
CoapplicantIncome    0
LoanAmount           0
Loan_Amount_Term     0
Credit_History       0
Property_Area        0
Loan_Status          0
dtype: int64

In [74]:
data1.dtypes

Loan_ID               object
Gender               float64
Married              float64
Dependents           float64
Education            float64
Self_Employed        float64
ApplicantIncome      float64
CoapplicantIncome    float64
LoanAmount           float64
Loan_Amount_Term     float64
Credit_History       float64
Property_Area        float64
Loan_Status          float64
dtype: object

In [75]:
#drop the Loan_Id 
data1 =data1.drop("Loan_ID",axis =1)

In [76]:
data1.shape

(614, 12)

In [77]:
#separating the independent and dependent variable

#storing all the independent variables as X
x =data1.drop("Loan_Status",axis =1)

#storing dependent variable as y
y =data1["Loan_Status"]



In [78]:
x.shape,y.shape

((614, 11), (614,))

# **2-> Creating Training And Validation Set**

In [79]:
#creating training and validation set

#stratify will make sure that the distribution of classes in training and validation set it similar
#random state to regenerate the same training and validation set
#test size 0.2 will keep 20% data in validation  set and remaining 80% data i training set

x_train,x_test,y_train,y_test  =train_test_split(x,y,stratify =data1['Loan_Status'],random_state =10, test_size = 0.2)



In [80]:
#shape of training and validation set

(x_train.shape,y_train.shape),(x_test.shape,y_test.shape)

(((491, 11), (491,)), ((123, 11), (123,)))

# **3-> Defining The Architecture Of the Model**

In [81]:
import keras
import tensorflow as tf


In [82]:
#importing the sequential model

from keras.models import Sequential

In [83]:
#importing different layers from keras

from keras.layers import InputLayer,Dense

In [84]:
#number of input neurons
x_train.shape

(491, 11)

In [85]:
#number of feature in data
x_train.shape[1]

11

In [86]:
#defining input_neurons
input_neurons =x_train.shape[1]


In [87]:
# number of output neuraons
#since loan prediction is binary classification problem hence we willhave only 1 output layer



In [88]:
# defining output neurons
output_neurons =1


In [89]:
# number of hidden layers and hidden neuraons

#it is a hyperparameter  and we can pick the hidden layers and hidden neuron as our own


In [90]:
#define hidden layer and neuron  in each layer
number_of_hidden_layer =2
neuron_hidden_layer_1 =10
neuron_hidden_layer_2 =5


In [91]:
# define activation of different layer
# i have picked relu as a activation fucntion for hidden layer
#since it is a binary classification problem i have used sigmoid fuction in the last layer


In [92]:
#defining the architecture of the model 
model = Sequential()
model.add(InputLayer(input_shape =(input_neurons,)))
model.add(Dense(units =neuron_hidden_layer_1,activation ='relu'))
model.add(Dense(units =neuron_hidden_layer_2,activation ='relu'))
model.add(Dense(units =output_neurons,activation ='sigmoid'))


In [93]:
# show the summary about model architecture
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 10)                120       
_________________________________________________________________
dense_4 (Dense)              (None, 5)                 55        
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 6         
Total params: 181
Trainable params: 181
Non-trainable params: 0
_________________________________________________________________


In [94]:
#number of parameters between input and first hidden layer
input_neurons*neuron_hidden_layer_1

110

In [95]:
# adding the bias for each neuraon in first hidden layer
input_neurons*neuron_hidden_layer_1+10

120

In [96]:
#number of parameters between  first hidden layer and second hidden layer
neuron_hidden_layer_1*neuron_hidden_layer_2+5

55

In [97]:
#number of parameters between second hidden layer nad output layer

neuron_hidden_layer_2*output_neurons+1

6

# **Compiling the Model (define loss function , optimizer)**

In [98]:
model.compile(loss ='binary_crossentropy',optimizer ='Adam',metrics =['accuracy'])

# **5-> Training The Model**

In [103]:
model_history =model.fit(x_train,y_train, epochs =50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


# **6-> Evaluating Model Performance On Validation set**

In [104]:
predictions =model.predict_classes(x_test)

Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).


In [105]:
#calculating the accuracy on validation set
accuracy_score(y_test,predictions)

0.8130081300813008