## Importing Library

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

## Loading Data

In [None]:
dataset = pd.read_csv('Loan_Application.csv')

In [None]:
dataset.head()

Unnamed: 0,CustomerNumber,CreditScore,City,Gender,Age,Home_Living_Since,Avg_Acc_Bal,Assets,First Loan,Co-applicant,Mon_Sal,Loan
0,1,619,Bangalore,Female,41,2,0.0,1,1,1,50674.44,1
1,2,608,Hyderabad,Female,40,1,41903.93,1,0,1,56271.29,0
2,3,502,Bangalore,Female,41,8,79830.4,3,1,0,56965.785,1
3,4,699,Bangalore,Female,38,1,0.0,2,0,0,46913.315,0
4,5,850,Hyderabad,Female,42,2,62755.41,1,1,1,39542.05,0


## Data Pre-processing

### Dummy Variables



In [None]:
dataset_dummy = pd.get_dummies(dataset, drop_first=True)

In [None]:
dataset_dummy.head()

Unnamed: 0,CustomerNumber,CreditScore,Age,Home_Living_Since,Avg_Acc_Bal,Assets,First Loan,Co-applicant,Mon_Sal,Loan,City_Chennai,City_Hyderabad,Gender_Male
0,1,619,41,2,0.0,1,1,1,50674.44,1,0,0,0
1,2,608,40,1,41903.93,1,0,1,56271.29,0,0,1,0
2,3,502,41,8,79830.4,3,1,0,56965.785,1,0,0,0
3,4,699,38,1,0.0,2,0,0,46913.315,0,0,0,0
4,5,850,42,2,62755.41,1,1,1,39542.05,0,0,1,0


In [None]:
dataset_dummy.columns

Index(['CustomerNumber', 'CreditScore', 'Age', 'Home_Living_Since',
       'Avg_Acc_Bal', 'Assets', 'First Loan', 'Co-applicant', 'Mon_Sal',
       'Loan', 'City_Chennai', 'City_Hyderabad', 'Gender_Male'],
      dtype='object')

In [None]:
X = dataset_dummy.loc[:, ['CreditScore', 'Age', 'Home_Living_Since',
       'Avg_Acc_Bal', 'Assets', 'First Loan', 'Co-applicant', 'Mon_Sal',       
       'City_Chennai', 'City_Hyderabad', 'Gender_Male']].values

In [None]:
y = dataset_dummy.loc[:, ['Loan']].values

### Training-Testing Splitting

In [None]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

### Normalization - Feature Standardization

In [None]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## Model Building - ANN


In [None]:
# Importing the Keras libraries and packages
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout

### NN Architecture

In [None]:
# Initialising the ANN
classifier = Sequential()

# Adding the input layer and the first hidden layer
classifier.add(Dense(units = 6, activation = 'relu', input_dim = 11))

In [None]:
# Adding regularization

#classifier.add(Dropout(rate = 0.1))

Dropout parameter of 0.1 means 10% of the neurons would be disabled at each iteration.
If required, increase by 0.1 to improve the accuracy until 0.5.
So, that means p = 1, means no neurons and that is under-fitting.
In general, don't go over 0.5.

In [None]:
# Adding the second hidden layer
classifier.add(Dense(units = 6, activation = 'relu'))

In [None]:
# Adding regularization
# classifier.add(Dropout(rate = 0.1))

In [None]:
# Adding the output layer
classifier.add(Dense(units = 1, activation = 'sigmoid'))

### Model Compilation

In [None]:
# Compiling the ANN
classifier.compile(optimizer = 'sgd', loss = 'binary_crossentropy', metrics = ['accuracy'])

### Model Fitting

In [None]:
# Fitting the ANN to the Training set

classifier.fit(X_train, y_train, batch_size = 10, epochs = 5, validation_data=(X_test, y_test))

Train on 8000 samples, validate on 2000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7feacb5d6cc0>

## Model Evaluation

In [None]:
# Predicting the Test set results
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)

In [None]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[1543   52]
 [ 291  114]]


## Early Stopping and Model Check Point

In [None]:
early_stoppping = EarlyStopping(monitor='val_loss', verbose=1, patience=10)
best_model = ModelCheckpoint('best_model.h5', monitor='val_acc', verbose=1, save_best_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                              patience=5, min_lr=0.001)

In [None]:
classifier.fit(X_train, y_train, batch_size = 10, epochs = 5, validation_data=(X_test, y_test), callbacks=[early_stoppping, best_model, reduce_lr ])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f9363c02b38>

## Grid Search CV

In [None]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense

def build_classifier(optimizer):
    classifier = Sequential()
 
    classifier.add(Dense(units = 15, activation = 'relu', input_dim = 11))
    classifier.add(Dense(units = 10, activation = 'relu'))
    #classifier.add(Dropout(p = 0.1))
    classifier.add(Dense(units = 1, activation = 'sigmoid'))
    #classifier.add(Dropout(p = 0.1))    
    classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy']) 
    return classifier
    
classifier = KerasClassifier(build_fn = build_classifier)

parameters = {'batch_size': [100, 300],
              #'epochs': [100, 500],
              'epochs': [2, 3],
              'optimizer': ['adam', 'rmsprop']}

grid_search = GridSearchCV(estimator = classifier,
                           param_grid = parameters,
                           scoring = 'accuracy',
                            cv=2) #10 

In [None]:
grid_search = grid_search.fit(X_train, y_train)

Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3


In [None]:
best_parameters = grid_search.best_params_  
best_accuracy = grid_search.best_score_ 

print('best_parameters: ', best_parameters)
print('best accuracy: ', best_accuracy)

best_parameters:  {'batch_size': 100, 'epochs': 3, 'optimizer': 'rmsprop'}
best accuracy:  0.7983750000000001
