## Imports

In [1]:
# Import all relevant packages
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import preprocessing

## Mapping and Updating Data

In [2]:
raw_data = pd.read_csv('loan_approval_dataset.csv')
data = raw_data.copy()

data[' loan_status'] = data[' loan_status'].map({' Approved': 1, ' Rejected': 0})
data[' education'] = data[' education'].map({' Graduate': 1, ' Not Graduate': 0})
data[' self_employed'] = data[' self_employed'].map({' Yes': 1, ' No': 0})

data.to_csv('loan_updated_data.csv', header = False)


## Create Inputs and Targets

In [3]:
raw_csv_data = np.loadtxt('loan_updated_data.csv', delimiter = ',')
all_unscaled_inputs = raw_csv_data[:, 1:-1]
all_targets = raw_csv_data[:, -1]
all_targets

array([1., 0., 0., ..., 0., 1., 1.])

In [21]:
shuffle_indices = np.arange(all_unscaled_inputs.shape[0])
np.random.shuffle(shuffle_indices)

all_unscaled_inputs = all_unscaled_inputs[shuffle_indices]
all_targets = all_targets[shuffle_indices]


## Balancing the Dataset

In [22]:
num_zero_targets = 4269 - int(np.sum(all_targets))

one_targets_count = 0
indices_to_remove = []

for i in range(all_targets.shape[0]):
    
    if all_targets[i] == 1:
        one_targets_count += 1
        
        if one_targets_count > num_zero_targets:
            indices_to_remove.append(i)

unscaled_updated_inputs = np.delete(all_unscaled_inputs, indices_to_remove, axis = 0)
updated_targets = np.delete(all_targets, indices_to_remove, axis = 0)

    

## Standardize Inputs

In [23]:
scaled_inputs = preprocessing.scale(unscaled_updated_inputs)

## Shuffle Once More

In [24]:
shuffle_indices = np.arange(scaled_inputs.shape[0])
np.random.shuffle(shuffle_indices)

scaled_shuffled_inputs = scaled_inputs[shuffle_indices]
shuffled_targets = updated_targets[shuffle_indices]

## Splitting into Training, Validation, and Test

In [26]:
sample_count = scaled_shuffled_inputs.shape[0]

training_count = int(sample_count * 0.8)
validation_count = int(sample_count * 0.1)
test_count = int(sample_count * 0.1)

training_inputs = scaled_shuffled_inputs[:training_count]
training_targets = shuffled_targets[:training_count]

validation_inputs = scaled_shuffled_inputs[training_count: training_count + validation_count]
validation_targets = shuffled_targets[training_count: training_count + validation_count]

test_inputs = scaled_shuffled_inputs[training_count+validation_count:]
test_targets = shuffled_targets[training_count + validation_count:]


print(np.sum(training_targets), training_count, np.sum(training_targets) / training_count)
print(np.sum(validation_targets), validation_count, np.sum(validation_targets) / validation_count)
print(np.sum(test_targets), test_count, np.sum(test_targets) / test_count)

1289.0 2580 0.4996124031007752
169.0 322 0.5248447204968945
155.0 322 0.4813664596273292


## Move data to an external npz file

In [27]:
np.savez('Training Data', inputs= training_inputs, targets = training_targets)
np.savez('Validation Data', inputs = validation_inputs, targets = validation_targets)
np.savez('Testing Data', inputs = test_inputs, targets = test_targets)

## Create final input and target variables for training / verifying / testing the model

In [28]:
npz = np.load('Training Data.npz')

inputs_for_training, targets_for_training = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)

npz = np.load('Validation Data.npz')

inputs_for_validation, targets_for_validation = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)

npz = np.load('Testing Data.npz')

inputs_for_testing, targets_for_testing = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)




Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  inputs_for_training, targets_for_training = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  inputs_for_training, targets_for_training = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  inputs_for_validation, targets_for_validation = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  inputs_for_validation, targets_for_validation = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.o

In [40]:
input_size = 11
output_size = 2
hidden_layer_size = 150
BATCH_SIZE = 75
EPOCHS = 100

model = tf.keras.Sequential([
    
    tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
    tf.keras.layers.Dense(output_size, activation = 'softmax')
])

model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])

early_stopping = tf.keras.callbacks.EarlyStopping(patience=2)

model.fit(inputs_for_training,
          targets_for_training,
          batch_size = BATCH_SIZE,
          epochs = EPOCHS,
          callbacks = [early_stopping],
          validation_data = (inputs_for_validation, targets_for_validation),
          verbose = 2
         )

Epoch 1/100
35/35 - 1s - loss: 0.4196 - accuracy: 0.8399 - val_loss: 0.2543 - val_accuracy: 0.9068 - 1s/epoch - 37ms/step
Epoch 2/100
35/35 - 0s - loss: 0.2238 - accuracy: 0.9186 - val_loss: 0.2246 - val_accuracy: 0.9130 - 124ms/epoch - 4ms/step
Epoch 3/100
35/35 - 0s - loss: 0.1967 - accuracy: 0.9318 - val_loss: 0.2243 - val_accuracy: 0.9161 - 129ms/epoch - 4ms/step
Epoch 4/100
35/35 - 0s - loss: 0.1824 - accuracy: 0.9345 - val_loss: 0.2237 - val_accuracy: 0.9130 - 137ms/epoch - 4ms/step
Epoch 5/100
35/35 - 0s - loss: 0.1718 - accuracy: 0.9364 - val_loss: 0.1985 - val_accuracy: 0.9224 - 180ms/epoch - 5ms/step
Epoch 6/100
35/35 - 0s - loss: 0.1603 - accuracy: 0.9450 - val_loss: 0.1966 - val_accuracy: 0.9193 - 148ms/epoch - 4ms/step
Epoch 7/100
35/35 - 0s - loss: 0.1496 - accuracy: 0.9488 - val_loss: 0.1952 - val_accuracy: 0.9193 - 150ms/epoch - 4ms/step
Epoch 8/100
35/35 - 0s - loss: 0.1437 - accuracy: 0.9523 - val_loss: 0.1906 - val_accuracy: 0.9317 - 147ms/epoch - 4ms/step
Epoch 9/10

<keras.callbacks.History at 0x18f5c333400>

In [41]:
test_loss, test_accuracy = model.evaluate(inputs_for_testing, targets_for_testing)




In [42]:
print('\nTest loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))


Test loss: 0.16. Test accuracy: 94.75%
