In [1]:
import tensorflow as tf
print(tf.__version__)

2.4.1


In [11]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import zipfile

#### Load the UCI Bank Marketing Dataset

In [12]:
'''
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/00222/bank-additional.zip
zip_ref = zipfile.ZipFile('bank-additional.zip', 'r')
zip_ref.extractall('')
zip_ref.close()
'''

"\n!wget https://archive.ics.uci.edu/ml/machine-learning-databases/00222/bank-additional.zip\nzip_ref = zipfile.ZipFile('bank-additional.zip', 'r')\nzip_ref.extractall('')\nzip_ref.close()\n"

In [13]:
# Load the CSV file into a pandas DataFrame
bank_dataframe = pd.read_csv('bank-additional/bank-additional-full.csv', delimiter=';')
print(bank_dataframe.shape)
bank_dataframe.head()

(41188, 21)


Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,...,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
0,56,housemaid,married,basic.4y,no,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
1,57,services,married,high.school,unknown,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
2,37,services,married,high.school,no,yes,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
3,40,admin.,married,basic.6y,no,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
4,56,services,married,high.school,no,no,yes,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no


In [14]:
# Select features from the DataFrame
features = ['age', 'job', 'marital', 'education', 'default', 'balance', 'housing', 'loan',  'campaign', 'pdays', 'poutcome']
labels = ['y']

bank_dataframe = bank_dataframe.filter(features + labels)
bank_dataframe.head()

Unnamed: 0,age,job,marital,education,default,housing,loan,campaign,pdays,poutcome,y
0,56,housemaid,married,basic.4y,no,no,no,1,999,nonexistent,no
1,57,services,married,high.school,unknown,no,no,1,999,nonexistent,no
2,37,services,married,high.school,no,yes,no,1,999,nonexistent,no
3,40,admin.,married,basic.6y,no,no,no,1,999,nonexistent,no
4,56,services,married,high.school,no,no,yes,1,999,nonexistent,no


In [15]:
 print(bank_dataframe['job'].unique())

['housemaid' 'services' 'admin.' 'blue-collar' 'technician' 'retired'
 'management' 'unemployed' 'self-employed' 'unknown' 'entrepreneur'
 'student']


In [16]:
# Replacing null values with most frequent value
# Now we are imputing those 2 Missing Values with the Most Frequent value in the num-of-doors column using pandas fillna() method.
categorical_features = ['default', 'housing', 'job', 'loan', 'education',  'poutcome']

for feature in categorical_features:
    bank_dataframe[feature] = bank_dataframe[feature].fillna(bank_dataframe[feature].value_counts().index[0])
bank_dataframe.shape

(41188, 11)

In [17]:
# Convert the categorical features in the DataFrame to one-hot encodings
from sklearn.preprocessing import LabelBinarizer
#!pip install category_encoders
import category_encoders as ce
#encoder = LabelBinarizer()

for feature in categorical_features:
    encoder = ce.BinaryEncoder(cols = [feature])

    df = encoder.fit_transform(bank_dataframe[feature])
    bank_dataframe = pd.concat([bank_dataframe,df], axis =1)
    bank_dataframe = bank_dataframe.drop[feature]

bank_dataframe.head()

  elif pd.api.types.is_categorical(cols):


TypeError: 'method' object is not subscriptable

In [36]:
# Shuffle the DataFrame
bank_dataframe = bank_dataframe.sample(frac=1).reset_index(drop=True)
bank_dataframe

Unnamed: 0,age,job,marital,education,default,housing,loan,campaign,pdays,poutcome,y
0,72,retired,divorced,university.degree,no,no,no,1,999,nonexistent,no
1,56,entrepreneur,married,basic.9y,no,yes,no,12,999,nonexistent,no
2,48,blue-collar,married,basic.9y,no,yes,no,5,999,nonexistent,no
3,46,technician,married,professional.course,no,no,no,1,999,nonexistent,no
4,48,management,married,university.degree,no,yes,no,2,999,nonexistent,no
...,...,...,...,...,...,...,...,...,...,...,...
41183,58,management,married,basic.4y,no,yes,no,5,999,nonexistent,no
41184,40,blue-collar,married,basic.4y,no,no,no,6,999,nonexistent,no
41185,42,services,married,professional.course,no,no,no,2,999,nonexistent,no
41186,38,blue-collar,married,basic.4y,unknown,yes,no,1,999,nonexistent,no


#### Create the Dataset object

Most of the introductory articles on TensorFlow would introduce you with the feed_dict method of feeding the data to the model. feed_dict processes the input data in a single thread and while the data is being loaded and processed on CPU, the GPU remains idle and when the GPU is training a batch of data, CPU remains in the idle state. The developers of TensorFlow have advised not to use this method during training or repeated validation of the same datasets.

In [90]:
bank_dataset = tf.data.Dataset.from_tensor_slices(dict(bank_dataframe))
bank_dataset.element_spec

{'age': TensorSpec(shape=(), dtype=tf.int64, name=None),
 'job': TensorSpec(shape=(), dtype=tf.string, name=None),
 'marital': TensorSpec(shape=(), dtype=tf.string, name=None),
 'education': TensorSpec(shape=(), dtype=tf.string, name=None),
 'default': TensorSpec(shape=(), dtype=tf.string, name=None),
 'housing': TensorSpec(shape=(), dtype=tf.string, name=None),
 'loan': TensorSpec(shape=(), dtype=tf.string, name=None),
 'contact': TensorSpec(shape=(), dtype=tf.string, name=None),
 'month': TensorSpec(shape=(), dtype=tf.string, name=None),
 'day_of_week': TensorSpec(shape=(), dtype=tf.string, name=None),
 'duration': TensorSpec(shape=(), dtype=tf.int64, name=None),
 'campaign': TensorSpec(shape=(), dtype=tf.int64, name=None),
 'pdays': TensorSpec(shape=(), dtype=tf.int64, name=None),
 'previous': TensorSpec(shape=(), dtype=tf.int64, name=None),
 'poutcome': TensorSpec(shape=(), dtype=tf.string, name=None),
 'emp.var.rate': TensorSpec(shape=(), dtype=tf.float64, name=None),
 'cons.price

In [93]:
# Filter the Dataset to retain only entries with a 'divorced' marital status
bank_dataset = bank_dataset.filter(lambda x : tf.equal(x['marital'], tf.constant([b'divorced']))[0] )

In [96]:
# First check that there are records in the dataset for non-married individuals

def check_divorced():
    bank_dataset_iterable = iter(bank_dataset)
    for x in bank_dataset_iterable:
        if x['marital'] != 'divorced':
            print('Found a person with marital status: {}'.format(x['marital']))
            return
    print('No non-divorced people were found!')

check_divorced()

No non-divorced people were found!


In [99]:
# Filter the Dataset to retain only entries with a 'divorced' marital status
bank_dataset = bank_dataset.filter(lambda x : tf.equal(x['marital'], tf.constant([b'divorced']))[0] )

In [102]:
bank_dataset.element_spec

{'age': TensorSpec(shape=(), dtype=tf.int64, name=None),
 'job': TensorSpec(shape=(), dtype=tf.string, name=None),
 'marital': TensorSpec(shape=(), dtype=tf.string, name=None),
 'education': TensorSpec(shape=(), dtype=tf.string, name=None),
 'default': TensorSpec(shape=(), dtype=tf.string, name=None),
 'housing': TensorSpec(shape=(), dtype=tf.string, name=None),
 'loan': TensorSpec(shape=(), dtype=tf.string, name=None),
 'contact': TensorSpec(shape=(), dtype=tf.string, name=None),
 'month': TensorSpec(shape=(), dtype=tf.string, name=None),
 'day_of_week': TensorSpec(shape=(), dtype=tf.string, name=None),
 'duration': TensorSpec(shape=(), dtype=tf.int64, name=None),
 'campaign': TensorSpec(shape=(), dtype=tf.int64, name=None),
 'pdays': TensorSpec(shape=(), dtype=tf.int64, name=None),
 'previous': TensorSpec(shape=(), dtype=tf.int64, name=None),
 'poutcome': TensorSpec(shape=(), dtype=tf.string, name=None),
 'emp.var.rate': TensorSpec(shape=(), dtype=tf.float64, name=None),
 'cons.price

#### Map a function over the dataset

In [None]:
# Convert the label ('y') to an integer instead of 'yes' or 'no'

def map_label(x):
    if x['y'] == tf.constant([b'no'], dtype=tf.string) :
        x['y'] = 0
    else:
        x['y'] = 1
    return x

bank_dataset = bank_dataset.map(map_label)

In [103]:
# Inspect the Dataset object
bank_dataset.element_spec

{'age': TensorSpec(shape=(), dtype=tf.int64, name=None),
 'job': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'education': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'default': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'housing': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'loan': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'campaign': TensorSpec(shape=(), dtype=tf.int64, name=None),
 'pdays': TensorSpec(shape=(), dtype=tf.int64, name=None),
 'poutcome': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'y': TensorSpec(shape=(), dtype=tf.int32, name=None)}

In [105]:
# Remove the 'marital' column
def remove_marital(x):
    dict_temp = {}
    for key,val in x.items():
        if key!='marital':
            dict_temp[key] = val
    return dict_temp

bank_dataset = bank_dataset.map(remove_marital)

In [106]:
# Inspect the Dataset object
bank_dataset.element_spec

{'age': TensorSpec(shape=(), dtype=tf.int64, name=None),
 'job': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'education': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'default': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'housing': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'loan': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'campaign': TensorSpec(shape=(), dtype=tf.int64, name=None),
 'pdays': TensorSpec(shape=(), dtype=tf.int64, name=None),
 'poutcome': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'y': TensorSpec(shape=(), dtype=tf.int32, name=None)}

#### Create input and output data tuples

In [108]:
# Create an input and output tuple for the dataset

def map_feature_label(x):
    features = [[x['age']], [x['balance']], [x['campaign']], x['contact'], x['default'],x['education'], x['housing'], x['job'], x['loan'], [x['pdays']], x['poutcome']]
    return (tf.concat(features, axis=0), x['y'])

In [109]:
# Map this function over the dataset
bank_dataset = bank_dataset.map(map_feature_label)

KeyError: in user code:

    <ipython-input-108-f5d178d26437>:4 map_feature_label  *
        features = [[x['age']], [x['balance']], [x['campaign']], x['contact'], x['default'],x['education'], x['housing'], x['job'], x['loan'], [x['pdays']], x['poutcome']]

    KeyError: 'balance'


In [110]:
# Inspect the Dataset object

bank_dataset.element_spec

{'age': TensorSpec(shape=(), dtype=tf.int64, name=None),
 'job': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'education': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'default': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'housing': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'loan': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'campaign': TensorSpec(shape=(), dtype=tf.int64, name=None),
 'pdays': TensorSpec(shape=(), dtype=tf.int64, name=None),
 'poutcome': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'y': TensorSpec(shape=(), dtype=tf.int32, name=None)}

#### Split into a training and a validation set

In [111]:
# Determine the length of the Dataset

dataset_length = 0
for _ in bank_dataset:
    dataset_length += 1
print(dataset_length)

4612


In [112]:
bank_dataframe.marital.value_counts()

married     24928
single      11568
divorced     4612
unknown        80
Name: marital, dtype: int64

In [113]:
# Make training and validation sets from the dataset
train_len = int(0.7 * dataset_length)
train_data = bank_dataset.take(train_len)
val_data = bank_dataset.skip(train_len)

#### Build a classification model

Now let's build a model to classify the features.

In [114]:
# Build a classifier model

from tensorflow.keras.layers import Dense, Input, Concatenate, BatchNormalization
from tensorflow.keras import Sequential

model = Sequential()
model.add(Input(shape=(30,)))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(400, activation='relu'))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(400, activation='relu'))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(1, activation='sigmoid'))

In [115]:
# Compile the model

optimizer = tf.keras.optimizers.Adam(1e-4)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

In [116]:
# Show the model summary

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization (BatchNo (None, 30)                120       
_________________________________________________________________
dense (Dense)                (None, 400)               12400     
_________________________________________________________________
batch_normalization_1 (Batch (None, 400)               1600      
_________________________________________________________________
dense_1 (Dense)              (None, 400)               160400    
_________________________________________________________________
batch_normalization_2 (Batch (None, 400)               1600      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 401       
Total params: 176,521
Trainable params: 174,861
Non-trainable params: 1,660
______________________________________________

#### Train the model

In [117]:
# Create batched training and validation datasets

train_data = train_data.batch(batch_size=20, drop_remainder=True)
val_data = val_data.batch(batch_size=100, drop_remainder=True)

In [118]:
# Shuffle the training data

train_data = train_data.shuffle(1000)

In [120]:
# Fit the model
history = model.fit(train_data, validation_data=val_data, epochs=5)

Epoch 1/5


ValueError: in user code:

    C:\Users\danie\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:571 train_function  *
        outputs = self.distribute_strategy.run(
    C:\Users\danie\anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:951 run  **
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    C:\Users\danie\anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2290 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    C:\Users\danie\anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2649 _call_for_each_replica
        return fn(*args, **kwargs)
    C:\Users\danie\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:531 train_step  **
        y_pred = self(x, training=True)
    C:\Users\danie\anaconda3\lib\site-packages\tensorflow\python\keras\engine\base_layer.py:885 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs,
    C:\Users\danie\anaconda3\lib\site-packages\tensorflow\python\keras\engine\input_spec.py:155 assert_input_compatibility
        raise ValueError('Layer ' + layer_name + ' expects ' +

    ValueError: Layer sequential expects 1 inputs, but it received 10 input tensors. Inputs received: [<tf.Tensor 'ExpandDims:0' shape=(20, 1) dtype=int64>, <tf.Tensor 'ExpandDims_1:0' shape=(20, 1) dtype=int64>, <tf.Tensor 'ExpandDims_2:0' shape=(20, 1) dtype=int32>, <tf.Tensor 'ExpandDims_3:0' shape=(20, 1) dtype=int32>, <tf.Tensor 'ExpandDims_4:0' shape=(20, 1) dtype=int32>, <tf.Tensor 'ExpandDims_5:0' shape=(20, 1) dtype=int32>, <tf.Tensor 'ExpandDims_6:0' shape=(20, 1) dtype=int32>, <tf.Tensor 'ExpandDims_7:0' shape=(20, 1) dtype=int64>, <tf.Tensor 'ExpandDims_8:0' shape=(20, 1) dtype=int32>, <tf.Tensor 'ExpandDims_9:0' shape=(20, 1) dtype=int32>]


In [None]:
# Plot the training and validation accuracy

plt.plot(history.history['accuracy'], label='Training')
plt.plot(history.history['val_accuracy'], label='validation')
plt.legend()
plt.xlabel('Epochs')
plt.ylabel('Accuracy')