'''

Author: Prathamesh Deshpande<br>
Date: 19-11-24<br>
Last modified by: Prathamesh Deshpande<br>
Last modified time:<br>
Title: Multi layer neural network to Predict Subscription to Term Deposit<br>

'''

Import Libraries

In [None]:
import pandas as pd  
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OrdinalEncoder,LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

In [2]:
from tensorflow.keras.models import Sequential # type: ignore
from tensorflow.keras.layers import Dense,Dropout # type: ignore

Load Dataset

In [3]:
df = pd.read_csv("bank.csv",sep=";")

Analyzing The Dataset

In [4]:
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,30,unemployed,married,primary,no,1787,no,no,cellular,19,oct,79,1,-1,0,unknown,no
1,33,services,married,secondary,no,4789,yes,yes,cellular,11,may,220,1,339,4,failure,no
2,35,management,single,tertiary,no,1350,yes,no,cellular,16,apr,185,1,330,1,failure,no
3,30,management,married,tertiary,no,1476,yes,yes,unknown,3,jun,199,4,-1,0,unknown,no
4,59,blue-collar,married,secondary,no,0,yes,no,unknown,5,may,226,1,-1,0,unknown,no


In [5]:
df.sample(5)

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
4249,35,admin.,married,tertiary,no,1145,yes,no,cellular,2,feb,82,2,-1,0,unknown,no
3936,34,management,married,tertiary,no,3050,yes,no,cellular,13,may,469,1,-1,0,unknown,yes
2428,28,services,single,secondary,no,229,yes,no,unknown,9,may,322,1,-1,0,unknown,no
3161,24,technician,married,secondary,no,556,yes,no,cellular,18,aug,460,2,-1,0,unknown,yes
1373,56,blue-collar,married,primary,no,379,no,no,cellular,13,aug,58,5,-1,0,unknown,no


In [6]:
df.shape

(4521, 17)

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4521 entries, 0 to 4520
Data columns (total 17 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   age        4521 non-null   int64 
 1   job        4521 non-null   object
 2   marital    4521 non-null   object
 3   education  4521 non-null   object
 4   default    4521 non-null   object
 5   balance    4521 non-null   int64 
 6   housing    4521 non-null   object
 7   loan       4521 non-null   object
 8   contact    4521 non-null   object
 9   day        4521 non-null   int64 
 10  month      4521 non-null   object
 11  duration   4521 non-null   int64 
 12  campaign   4521 non-null   int64 
 13  pdays      4521 non-null   int64 
 14  previous   4521 non-null   int64 
 15  poutcome   4521 non-null   object
 16  y          4521 non-null   object
dtypes: int64(7), object(10)
memory usage: 600.6+ KB


In [8]:
df.describe()

Unnamed: 0,age,balance,day,duration,campaign,pdays,previous
count,4521.0,4521.0,4521.0,4521.0,4521.0,4521.0,4521.0
mean,41.170095,1422.657819,15.915284,263.961292,2.79363,39.766645,0.542579
std,10.576211,3009.638142,8.247667,259.856633,3.109807,100.121124,1.693562
min,19.0,-3313.0,1.0,4.0,1.0,-1.0,0.0
25%,33.0,69.0,9.0,104.0,1.0,-1.0,0.0
50%,39.0,444.0,16.0,185.0,2.0,-1.0,0.0
75%,49.0,1480.0,21.0,329.0,3.0,-1.0,0.0
max,87.0,71188.0,31.0,3025.0,50.0,871.0,25.0


Identifying Null Values

In [9]:
df.isnull().sum()

age          0
job          0
marital      0
education    0
default      0
balance      0
housing      0
loan         0
contact      0
day          0
month        0
duration     0
campaign     0
pdays        0
previous     0
poutcome     0
y            0
dtype: int64

Encoding Categorical Columns

In [10]:
ordinal_categorical_columns =categorical_columns = ['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'poutcome']
encoder = OrdinalEncoder()
df_encoded = df.copy()
df_encoded[ordinal_categorical_columns] = encoder.fit_transform(df_encoded[ordinal_categorical_columns])

In [11]:
df_encoded

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,30,10.0,1.0,0.0,0.0,1787,0.0,0.0,0.0,19,10.0,79,1,-1,0,3.0,no
1,33,7.0,1.0,1.0,0.0,4789,1.0,1.0,0.0,11,8.0,220,1,339,4,0.0,no
2,35,4.0,2.0,2.0,0.0,1350,1.0,0.0,0.0,16,0.0,185,1,330,1,0.0,no
3,30,4.0,1.0,2.0,0.0,1476,1.0,1.0,2.0,3,6.0,199,4,-1,0,3.0,no
4,59,1.0,1.0,1.0,0.0,0,1.0,0.0,2.0,5,8.0,226,1,-1,0,3.0,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4516,33,7.0,1.0,1.0,0.0,-333,1.0,0.0,0.0,30,5.0,329,5,-1,0,3.0,no
4517,57,6.0,1.0,2.0,1.0,-3313,1.0,1.0,2.0,9,8.0,153,1,-1,0,3.0,no
4518,57,9.0,1.0,1.0,0.0,295,0.0,0.0,0.0,19,1.0,151,11,-1,0,3.0,no
4519,28,1.0,1.0,1.0,0.0,1137,0.0,0.0,0.0,6,3.0,129,4,211,3,1.0,no


In [12]:
le = LabelEncoder()
df_encoded['y'] =  le.fit_transform(df_encoded['y'])

In [13]:
df_encoded

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,30,10.0,1.0,0.0,0.0,1787,0.0,0.0,0.0,19,10.0,79,1,-1,0,3.0,0
1,33,7.0,1.0,1.0,0.0,4789,1.0,1.0,0.0,11,8.0,220,1,339,4,0.0,0
2,35,4.0,2.0,2.0,0.0,1350,1.0,0.0,0.0,16,0.0,185,1,330,1,0.0,0
3,30,4.0,1.0,2.0,0.0,1476,1.0,1.0,2.0,3,6.0,199,4,-1,0,3.0,0
4,59,1.0,1.0,1.0,0.0,0,1.0,0.0,2.0,5,8.0,226,1,-1,0,3.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4516,33,7.0,1.0,1.0,0.0,-333,1.0,0.0,0.0,30,5.0,329,5,-1,0,3.0,0
4517,57,6.0,1.0,2.0,1.0,-3313,1.0,1.0,2.0,9,8.0,153,1,-1,0,3.0,0
4518,57,9.0,1.0,1.0,0.0,295,0.0,0.0,0.0,19,1.0,151,11,-1,0,3.0,0
4519,28,1.0,1.0,1.0,0.0,1137,0.0,0.0,0.0,6,3.0,129,4,211,3,1.0,0


In [14]:
df_encoded['y'].value_counts()

0    4000
1     521
Name: y, dtype: int64

Train Test Split

In [15]:
# Define features (X) and target (y)
X = df_encoded.drop(columns=['y'])  # Features are all columns except 'y'
y = df_encoded['y']  # Target variable

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Feature Scaling

In [16]:
# Scale the features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [17]:
X_train.shape

(3616, 16)

In [18]:
X_test.shape

(905, 16)

In [19]:
y_train.shape[0]

3616

In [20]:
y_test.shape[0]

905

Model Training

In [29]:
# Build the Multi-Layer Neural Network
model = Sequential()

# Input layer and first hidden layer with 128 neurons and ReLU activation
model.add(Dense(128, input_dim=X_train_scaled.shape[1], activation='relu'))
model.add(Dropout(0.2))  # Add dropout to prevent overfitting

# Second hidden layer with 64 neurons
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))  # Dropout

# Third hidden layer with 32 neurons
model.add(Dense(32, activation='relu'))

# Output layer with 1 neuron (binary classification) and sigmoid activation
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train_scaled, y_train, epochs=13, batch_size=32, validation_split=0.2)

# Make predictions on the test set
y_pred = model.predict(X_test_scaled)
y_pred = (y_pred > 0.5).astype(int)  # Convert predictions to 0 or 1

Epoch 1/13


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.8429 - loss: 0.4280 - val_accuracy: 0.8964 - val_loss: 0.2573
Epoch 2/13
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8942 - loss: 0.2731 - val_accuracy: 0.9019 - val_loss: 0.2565
Epoch 3/13
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8851 - loss: 0.2751 - val_accuracy: 0.8950 - val_loss: 0.2506
Epoch 4/13
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8990 - loss: 0.2565 - val_accuracy: 0.8978 - val_loss: 0.2512
Epoch 5/13
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8901 - loss: 0.2485 - val_accuracy: 0.8964 - val_loss: 0.2536
Epoch 6/13
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8933 - loss: 0.2471 - val_accuracy: 0.9006 - val_loss: 0.2506
Epoch 7/13
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━

Evaluate Model Performance

In [30]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print('Classification Report:\n', class_report)


Accuracy: 0.8906077348066298
Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.97      0.94       807
           1       0.49      0.24      0.33        98

    accuracy                           0.89       905
   macro avg       0.70      0.61      0.63       905
weighted avg       0.87      0.89      0.87       905

