In [25]:
# Add Libraries
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,Input
from tensorflow.keras.callbacks import EarlyStopping


In [45]:
# Load the dataset
data1 = pd.read_csv('../data/bank-additional-full_normalised.csv')
# Check if all values are between 0 and 1
data1.head()

Unnamed: 0,age,job=housemaid,job=services,job=admin.,job=blue-collar,job=technician,job=retired,job=management,job=unemployed,job=self-employed,...,previous,poutcome=nonexistent,poutcome=failure,poutcome=success,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,class
0,0.209877,0,0,0,0,0,0,0,0,0,...,0.0,1,0,0,1.0,0.882307,0.376569,0.98073,1.0,0
1,0.296296,0,0,1,0,0,0,0,0,0,...,0.0,1,0,0,1.0,0.484412,0.615063,0.981183,1.0,0
2,0.246914,1,0,0,0,0,0,0,0,0,...,0.0,1,0,0,0.9375,0.698753,0.60251,0.957379,0.859735,0
3,0.160494,0,1,0,0,0,0,0,0,0,...,0.142857,0,1,0,0.333333,0.26968,0.192469,0.150759,0.512287,0
4,0.530864,0,0,0,1,0,0,0,0,0,...,0.0,1,0,0,0.333333,0.340608,0.154812,0.17479,0.512287,1


In [26]:
# Load the dataset
data = pd.read_csv('../data/bank-additional-full_normalised.csv')
# Check if all values are between 0 and 1
if not ((data >= 0) & (data <= 1)).all().all():
    print("Values outside the range [0, 1] found in the dataset.")
    print(data[(data < 0) | (data > 1)])

In [27]:
# Separate features and target
X = data.drop('class', axis=1)
y = data['class']

In [46]:
type(y)

pandas.core.series.Series

In [28]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [43]:
X_train.shape[1]

62

In [61]:
# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


# Define the DNN model
model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(128, activation='relu'),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])


# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

from sklearn.utils.class_weight import compute_class_weight

# Compute class weights
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
# class_weights_dict = {cls: weight for cls, weight in zip(np.unique(y_train), class_weights)}
class_weights_dict  = {0 : class_weights[0], 1 : class_weights[1]};
print(class_weights_dict)

{0: 0.5639419456425002, 1: 4.409796573875803}


In [None]:
# Define early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3)

# Train the model

history = model.fit(
    X_train,
    y_train,
    batch_size=32,
    epochs=100,
    validation_split=0.2,
    class_weight=class_weights_dict
)


Epoch 1/100
[1m824/824[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7812 - loss: 0.4202 - val_accuracy: 0.8140 - val_loss: 0.3914
Epoch 2/100
[1m824/824[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8292 - loss: 0.2998 - val_accuracy: 0.8193 - val_loss: 0.3539
Epoch 3/100
[1m824/824[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8382 - loss: 0.2800 - val_accuracy: 0.8267 - val_loss: 0.3208
Epoch 4/100
[1m824/824[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.8437 - loss: 0.2653 - val_accuracy: 0.8156 - val_loss: 0.3539
Epoch 5/100
[1m824/824[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8491 - loss: 0.2578 - val_accuracy: 0.8463 - val_loss: 0.2993
Epoch 6/100
[1m824/824[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8477 - loss: 0.2481 - val_accuracy: 0.8219 - val_loss: 0.3861
Epoch 7/100
[1m824/82

In [58]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy:.4f}')

[1m258/258[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 800us/step - accuracy: 0.8839 - loss: 0.5917
Test Accuracy: 0.8843


In [59]:
from sklearn.metrics import classification_report

# Evaluate the model


# Get predictions
y_pred = model.predict(X_test)
y_pred_classes = (y_pred > 0.5).astype(int)  # Convert probabilities to binary classes

# Generate classification report
report = classification_report(y_test, y_pred_classes)
print(report)

[1m258/258[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 940us/step
              precision    recall  f1-score   support

           0       0.89      0.99      0.94      7334
           1       0.20      0.02      0.03       904

    accuracy                           0.88      8238
   macro avg       0.54      0.50      0.49      8238
weighted avg       0.82      0.88      0.84      8238

