In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import seaborn as sns
import math
import tensorflow as tf
import keras
import keras_tuner as kt

from tensorflow.keras import callbacks
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense, Activation, Dropout, InputLayer, Input, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import Accuracy
from keras import layers, Sequential
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import scale
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
from keras import backend as K

In [2]:
df = pd.read_csv('../split_train_test_data./loan_approval_dataset.csv')
df = df.rename(columns=lambda x: x.strip())
print(df.shape)
print(df.dtypes)

(4269, 13)
loan_id                      int64
no_of_dependents             int64
education                   object
self_employed               object
income_annum                 int64
loan_amount                  int64
loan_term                    int64
cibil_score                  int64
residential_assets_value     int64
commercial_assets_value      int64
luxury_assets_value          int64
bank_asset_value             int64
loan_status                 object
dtype: object


In [3]:
df = df.drop('loan_id', axis=1).copy()
df = df.replace({'self_employed': {' No': 0, ' Yes': 1}})
df = df.replace({'education': {' Not Graduate': 0, ' Graduate': 1}})
df = df.replace({'loan_status': {' Approved': 1, ' Rejected': 0}})
# df.head()
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4269 entries, 0 to 4268
Data columns (total 12 columns):
 #   Column                    Non-Null Count  Dtype
---  ------                    --------------  -----
 0   no_of_dependents          4269 non-null   int64
 1   education                 4269 non-null   int64
 2   self_employed             4269 non-null   int64
 3   income_annum              4269 non-null   int64
 4   loan_amount               4269 non-null   int64
 5   loan_term                 4269 non-null   int64
 6   cibil_score               4269 non-null   int64
 7   residential_assets_value  4269 non-null   int64
 8   commercial_assets_value   4269 non-null   int64
 9   luxury_assets_value       4269 non-null   int64
 10  bank_asset_value          4269 non-null   int64
 11  loan_status               4269 non-null   int64
dtypes: int64(12)
memory usage: 400.3 KB


In [4]:
X = df.drop('loan_status', axis=1).copy()
y = df['loan_status'].copy()

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=345, test_size=0.2)
X_train = scale(X_train)
X_test = scale(X_test)

y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

print(f"""
X_train shape = {X_train.shape}
X_test shape = {X_test.shape}
y_train shape = {y_train.shape}
y_test shape = {y_test.shape}
""")


X_train shape = (3415, 11)
X_test shape = (854, 11)
y_train shape = (3415, 2)
y_test shape = (854, 2)



In [8]:
print(y_train[2])

[1. 0.]


## Create Model

In [18]:
def build_model(hp):
    model = Sequential()
    counter=0
    model.add(Input((17,)))
    for i in range(hp.Int('num_layers',min_value=1,max_value=10)):
        model.add(Dense(hp.Int('units'+str(i),min_value=8,max_value=40,step=2),activation=hp.Choice('activation' + str(i),values=['relu'])))
        model.add(Dropout(hp.Choice('drp'+str(i),values=[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9])))
    model.add(Dense(2, activation='softmax'))
    
    model.compile(optimizer=hp.Choice('optimizer',values=['adam', 'nadam']),loss='binary_crossentropy',metrics=['accuracy'])
    return model

In [19]:
tuner=kt.RandomSearch(build_model,objective='val_accuracy',max_trials=50)

In [20]:
tuner.search(X_train,y_train,epochs=5,validation_split=0.2)
tuner.get_best_hyperparameters()[0].values

Trial 10 Complete [00h 00m 13s]
val_accuracy: 0.898975133895874

Best val_accuracy So Far: 0.9019033908843994
Total elapsed time: 00h 01m 37s


{'num_layers': 8,
 'units0': 26,
 'activation0': 'relu',
 'drp0': 0.4,
 'optimizer': 'nadam',
 'units1': 34,
 'activation1': 'relu',
 'drp1': 0.3,
 'units2': 8,
 'activation2': 'relu',
 'drp2': 0.1,
 'units3': 34,
 'activation3': 'relu',
 'drp3': 0.3,
 'units4': 34,
 'activation4': 'relu',
 'drp4': 0.7,
 'units5': 18,
 'activation5': 'relu',
 'drp5': 0.4,
 'units6': 12,
 'activation6': 'relu',
 'drp6': 0.5,
 'units7': 38,
 'activation7': 'relu',
 'drp7': 0.2,
 'units8': 40,
 'activation8': 'relu',
 'drp8': 0.9,
 'units9': 24,
 'activation9': 'relu',
 'drp9': 0.6}

In [35]:
model=tuner.get_best_models(num_models=1)[0]
model.summary()

  saveable.load_own_variables(weights_store.get(inner_path))


In [44]:
model = keras.Sequential()

model.add(Input((12,)))
model.add(Dense(12,activation='tanh'))
model.add(Dropout(0))
model.add(Dense(32,activation='tanh'))
model.add(Dropout(0))
model.add(Dense(36,activation='sigmoid'))
model.add(Dropout(0))
model.add(Dense(16,activation='relu'))
model.add(Dropout(0))
model.add(Dense(32,activation='tanh'))
model.add(Dropout(0))
# model.add(Dense(32,activation='tanh'))
# model.add(Dropout(0.4))
# model.add(Dense(16,activation='relu'))
# model.add(Dropout(0.6))
# model.add(Dense(40,activation='sigmoid'))
# model.add(Dropout(0.4))
model.add(Dense(2,activation='softmax'))
model.summary()
model.compile(optimizer='nadam',loss='binary_crossentropy',metrics=['accuracy'])

In [40]:
# Creating the callback feature to stop the training in-Between, in case of no improvement
call=callbacks.EarlyStopping(patience=20,min_delta=0.0001,restore_best_weights=True)
# Fitting the model
history=model.fit(X_train,y_train,validation_data=(X_val,y_val),epochs=50, callbacks=[call])

Epoch 1/50
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 9ms/step - accuracy: 0.6334 - loss: 0.6246 - val_accuracy: 0.6413 - val_loss: 0.8142
Epoch 2/50
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.9059 - loss: 0.2593 - val_accuracy: 0.6164 - val_loss: 1.0429
Epoch 3/50
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.9164 - loss: 0.2264 - val_accuracy: 0.5915 - val_loss: 1.0946
Epoch 4/50
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9312 - loss: 0.1978 - val_accuracy: 0.5944 - val_loss: 1.2400
Epoch 5/50
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.9367 - loss: 0.1950 - val_accuracy: 0.5944 - val_loss: 1.2327
Epoch 6/50
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9376 - loss: 0.1834 - val_accuracy: 0.5974 - val_loss: 1.3260
Epoch 7/50
[1m86/86[0m [32m━━━━━━━━━━