In [16]:
import tensorflow as tf
from keras import layers, models
from keras.models import Sequential
from keras.layers import Dense, Dropout
import pandas as pd 
import random 
from sklearn.model_selection import train_test_split

random.seed(42) #in case we will use random somewhere

data = pd.read_csv("../data/processed/processed_credit_risk_dataset.csv")

In [17]:
y = data['loan_status']
X = data.drop('loan_status',axis = 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, stratify=y, random_state= 42) #stratify to handle imbalance in target lables

In [18]:
model = Sequential([
    Dense(64, input_dim=X_train.shape[1], activation='relu'),
    Dense(32, activation='relu'),
    Dropout(0.3), # Dropout layer to prevent overfitting 
    Dense(1, activation='sigmoid') # Output layer for binary classification
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [19]:
from keras.optimizers import Adam

model.compile(optimizer=Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [20]:
model.summary()


In [21]:
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))


Epoch 1/100
[1m764/764[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 836us/step - accuracy: 0.6771 - loss: 173.3493 - val_accuracy: 0.7823 - val_loss: 1.8633
Epoch 2/100
[1m764/764[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 584us/step - accuracy: 0.7822 - loss: 2.2094 - val_accuracy: 0.7822 - val_loss: 0.7867
Epoch 3/100
[1m764/764[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 657us/step - accuracy: 0.7756 - loss: 0.6290 - val_accuracy: 0.7820 - val_loss: 0.5262
Epoch 4/100
[1m764/764[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 631us/step - accuracy: 0.7838 - loss: 0.6071 - val_accuracy: 0.7820 - val_loss: 0.5249
Epoch 5/100
[1m764/764[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 529us/step - accuracy: 0.7787 - loss: 0.5289 - val_accuracy: 0.7820 - val_loss: 0.5244
Epoch 6/100
[1m764/764[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 520us/step - accuracy: 0.7776 - loss: 0.5405 - val_accuracy: 0.7820 - val_loss: 0.5243
Epoch 7/

In [22]:
# Trainin NN is quite fast so we can try to do more epochs to see if we can get better results

history = model.fit(X_train, y_train, epochs=500, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/500
[1m764/764[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 738us/step - accuracy: 0.7870 - loss: 0.5632 - val_accuracy: 0.7849 - val_loss: 0.5203
Epoch 2/500
[1m764/764[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 629us/step - accuracy: 0.7846 - loss: 0.5209 - val_accuracy: 0.7843 - val_loss: 0.5206
Epoch 3/500
[1m764/764[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 561us/step - accuracy: 0.7858 - loss: 0.5189 - val_accuracy: 0.7859 - val_loss: 0.5191
Epoch 4/500
[1m764/764[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 565us/step - accuracy: 0.7871 - loss: 0.5171 - val_accuracy: 0.7855 - val_loss: 0.5174
Epoch 5/500
[1m764/764[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 693us/step - accuracy: 0.7811 - loss: 0.5240 - val_accuracy: 0.7898 - val_loss: 0.5096
Epoch 6/500
[1m764/764[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 569us/step - accuracy: 0.7851 - loss: 0.5193 - val_accuracy: 0.7849 - val_loss: 0.5209
Epoch 7/50

In [24]:
# Adding more epochs did not improve the model so we will stick with 100 epochs
# Predicting on test data

y_pred = model.predict(X_test)

# Converting the predicted values to binary
y_pred = [1 if i>=0.5 else 0 for i in y_pred]

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

print("Accuracy: ", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix: \n", confusion_matrix(y_test, y_pred))
print("\nClassification Report: \n", classification_report(y_test, y_pred))

# Saving the model
model.save('../models/credit_risk_model.h5')

[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 306us/step




Accuracy:  0.7870120304443898

Confusion Matrix: 
 [[6365    4]
 [1731   46]]

Classification Report: 
               precision    recall  f1-score   support

           0       0.79      1.00      0.88      6369
           1       0.92      0.03      0.05      1777

    accuracy                           0.79      8146
   macro avg       0.85      0.51      0.47      8146
weighted avg       0.82      0.79      0.70      8146



In [None]:
# The model has an accuracy of 0.79 which is quite good. We can use this model to predict the loan status of new customers.
# The precision and recall for both classes is also quite good.
# We can try to use this model to predict the loan status of new customers.