### Tensorflow Implementation

- MNIST Dataset
- Sequential Model
- Monitoring training as well as test (Validation)
- L2 Regularization
- Dropouts
- Batch Norm
- Early Stopping

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, ConfusionMatrixDisplay, f1_score

from sklearn.preprocessing  import StandardScaler, LabelEncoder

import tensorflow as tf

In [None]:
RANDOM_STATE = 24
np.random.seed(RANDOM_STATE)           # Set random seed for reproducible results
tf.random.set_seed(RANDOM_STATE)

# TEST_SIZE = 0.2
ALPHA = 0.001
WEIGHT_DECAY = 0.001     # For regularization
EPOCHS = 200
BATCH_SIZE = 16

TRAIN_SIZE = int(BATCH_SIZE*18)
PATIENCE = 20          # for early stopping
LR_PATIENCE = 10
LR_FACTOR = 0.1       # by what factor we are reducing the lr

params = {'legend.fontsize': 'medium',
          'figure.figsize' : (15,6),
          'axes.labelsize': 'large',
          'axes.titlesize':'large',
          'xtick.labelsize':'medium',
          'ytick.labelsize':'medium'
          }

CMAP = 'coolwarm' # plt.cm.Spectral

plt.rcParams.update(params)

### BASICS

In [None]:
# # for, Physical device :

# physical_device = tf.config.list_physical_devices('GPU')
# if len(physical_device) > 0:
#   tf.config.experimental.set_memory_growth(physical_device[0], True)          code this only when you are working on local machine and using GPU and you don't want tensorflow to occupy the whole GPU at once and give access to other fn to utilise GPU

In [None]:
# Location of the directories :

inpDir = os.path.join('..','..','input')
outDir = os.path.join('..','output')
subDir = 'ionosphere'
modelDir = os.path.join('..','models')

In [None]:
# whenever we are working using inpDir and subDir on out machine, then

# data_df = pd.read_csv(os.path.join(inpDir, subDir, 'ionosphere.data), header=None)
# data_df.head()

In [None]:
data_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Datasets/ionosphere.data', header=None)

data_df.head()

In [None]:
data_df.info()

In [None]:
data_df.describe().T

In [None]:
labels = data_df[data_df.columns[-1]]
features_df = data_df.drop(data_df.columns[-1], axis=1)
features_df.shape, labels.shape

In [None]:
X_train, X_test, y_train, y_test = train_test_split(features_df, labels, train_size=TRAIN_SIZE, stratify=labels, random_state=RANDOM_STATE)

X_train.shape,X_test.shape, y_train.shape, y_test.shape

In [None]:
fig, ax = plt.subplots(figsize=(15,10))

sns.heatmap(features_df.corr().abs(), ax = ax, cmap='Blues', annot= True, fmt='.2g', cbar=False);

Make the model clean and never let out model asume anything on its own. So its always a good option to do encoding. Hence, here we do label encoding.

In [None]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)

In [None]:
class_names = list(le.classes_)
class_names

In [None]:
type(X_train), type(y_train), type(X_test), type(y_test)

All the above are numpy nd arrays(s)

### MODEL

This model takes the input as a list.

In [None]:
# Sequential Model
# another way to define the model is:
initializer = tf.keras.initializers.GlorotUniform(seed=RANDOM_STATE)

optimizer = tf.keras.optimizers.Adam(learning_rate=ALPHA)

loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

model = tf.keras.Sequential([

    tf.keras.Input(shape=(X_train.shape[1], )),

    tf.keras.layers.Dense(26, activation='relu', kernel_initializer = initializer),  # First Hidden Layer

    tf.keras.layers.Dense(18, activation='relu', kernel_initializer = initializer),  # Second Hidden Layer

    tf.keras.layers.Dense(10, activation='relu', kernel_initializer = initializer),  # Third Hidden Layer

    tf.keras.layers.Dense(2, kernel_initializer = initializer)    # Output Layer
])

In [None]:
model.summary()             # reduicng the layers by 8

In [None]:
model.compile(optimizer = optimizer, loss = loss_fn, metrics=['accuracy'])

In [None]:
history = model.fit(X_train, y_train, validation_data=[X_test, y_test], epochs = EPOCHS, batch_size=BATCH_SIZE, verbose=2)

In [None]:
loss_df = pd.DataFrame(history.history)
loss_df.head()

In [None]:
def fn_plot_tf_hist(hist_df):

    fig, axes = plt.subplots(1,2, figsize=(15,5))

    y1 = hist_df.columns[0]
    y2 = hist_df.columns[1]
    y3 = hist_df.columns[2]
    y4 = hist_df.columns[3]

    best = hist_df[hist_df[y4] == hist_df[y4].min()]
    # print(best)

    props = dict(boxstyle = 'round', facecolor = 'aqua', alpha = 0.4)
    facecolor = 'cyan'
    fontsize = 12
    CMAP = plt.cm.coolwarm
    ax = axes[0]

    hist_df.plot(y=[y2,y4], ax=ax, colormap = CMAP)
    txtFmt = 'Loss : \n   train : {:6.4f}\n     test : {:6.4f}'
    txtstr = txtFmt.format(hist_df.iloc[-1][y2].min(), hist_df.iloc[-1][y4])


    ax.text(0.3, 0.95, txtstr, transform=ax.transAxes, fontsize=fontsize, verticalalignment='top', bbox = props)

    # Offset for the Arrows
    y_min = min(hist_df[y2].min(), hist_df[y4].min())
    y_max = max(hist_df[y2].max(), hist_df[y4].max())
    offset = (y_max - y_min)/10.0

    ax.annotate(f'Min : {best[y4].to_numpy()[0]:6.4}',    # text to print
                xy = (best.index.to_numpy()[0], best[y4].to_numpy()[0]),  # start
                xytext = (best.index.to_numpy()[0], best[y4].to_numpy()[0] + offset),   #end
                fontsize = fontsize, va='bottom', ha = 'right', bbox= props,
                arrowprops = dict(facecolor = facecolor, shrink = 0.05))

    ax.axvline(best.index.to_numpy()[0], color = 'g', linestyle = '-.', lw = 3)
    ax.set_xlabel('Epochs')
    ax.set_ylabel(y2.capitalize())

    ax.grid()


    ax = axes[1]
    hist_df.plot(y=[y1,y3], ax=ax, colormap = CMAP)
    txtFmt = 'Loss : \n   train : {:6.4f}\n     test : {:6.4f}'
    txtstr = txtFmt.format(hist_df.iloc[-1][y1], hist_df.iloc[-1][y3])

    ax.text(0.3, 0.2, txtstr, transform=ax.transAxes, fontsize=fontsize, verticalalignment='top', bbox = props)

    # Offset for the Arrows
    y_min = min(hist_df[y1].min(), hist_df[y3].min())
    y_max = max(hist_df[y1].max(), hist_df[y3].max())
    offset = (y_max - y_min)/10.0

    ax.annotate(f'Best : {best[y3].to_numpy()[0]:6.4}',    # text to print
                xy = (best.index.to_numpy(), best[y3].to_numpy()[0]),  # start
                xytext = (best.index.to_numpy(), best[y3].to_numpy()[0] + offset),
                fontsize = fontsize, va='bottom', ha = 'right', bbox= props,
                arrowprops = dict(facecolor = facecolor, shrink = 0.05))

    ax.axvline(best.index.to_numpy(), color = 'g', linestyle = '-.', lw = 3)
    ax.set_xlabel('Epochs')
    ax.set_ylabel(y1.capitalize())

    ax.grid()
    plt.suptitle('Training Perfromance')
    plt.tight_layout()
    plt.show()
    # ax = axes[1]
    # hist_df.plot(y=[y2,y4], ax=axes[0])
    # hist_df.plot(y=[y1,y3], ax=ax)

In [None]:
fn_plot_tf_hist(loss_df)

In [None]:
pred = model(X_train[:1]).numpy()
pred

In [None]:
# Loss Function
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

loss_fn(y_train[:1], pred)

Train Predictions

In [None]:
pred_model = tf.keras.Sequential([
    model,
    tf.keras.layers.Softmax()
])

y_pred = pred_model(X_train).numpy()

print(f'Accuracy :{accuracy_score(y_train, y_pred.argmax(axis=1)):7.4f}')

In [None]:
print(classification_report(y_train, y_pred.argmax(axis=1)))

In [None]:
display_labels = class_names

cm = confusion_matrix(y_train, y_pred.argmax(axis=1))

disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels= display_labels)

fig, ax = plt.subplots(figsize = (4,4))

disp.plot(ax = ax, cmap = 'Greens', colorbar=False)

Test Predictions

In [None]:
y_true, y_pred = [], []


pred = pred_model(X_test)
y_pred = pred.numpy().argmax(axis=1)
y_true = y_test

len(y_true), len(y_pred)

### L2 *Regularization*

In [None]:
del initializer
del optimizer
del loss_fn
del model

In [None]:
initializer = tf.keras.initializers.GlorotUniform(seed=RANDOM_STATE)

optimizer = tf.keras.optimizers.Adam(learning_rate=ALPHA)

loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

regularizer = tf.keras.regularizers.L2(l2=0.05)

model = tf.keras.Sequential([

    tf.keras.Input(shape=(X_train.shape[1], )),

    tf.keras.layers.Dense(26, activation='relu', kernel_regularizer = regularizer , kernel_initializer = initializer),  # First Hidden Layer

    tf.keras.layers.Dense(18, activation='relu',kernel_regularizer = regularizer , kernel_initializer = initializer),  # Second Hidden Layer

    tf.keras.layers.Dense(10, activation='relu',kernel_regularizer = regularizer ,kernel_initializer = initializer),  # Third Hidden Layer

    tf.keras.layers.Dense(2,kernel_regularizer = regularizer , kernel_initializer = initializer)    # Output Layer
])


model.compile(optimizer = optimizer, loss = loss_fn, metrics=['accuracy'])

history = model.fit(X_train, y_train, validation_data=[X_test, y_test], epochs = EPOCHS, batch_size=BATCH_SIZE, verbose=2)

In [None]:
loss_df = pd.DataFrame(history.history)
fn_plot_tf_hist(loss_df)

In [None]:
del initializer
del optimizer
del loss_fn
del model

In [None]:
initializer = tf.keras.initializers.GlorotUniform(seed=RANDOM_STATE)

optimizer = tf.keras.optimizers.Adam(learning_rate=ALPHA)

loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

activation = 'relu'

model = tf.keras.Sequential([

    tf.keras.Input(shape=(X_train.shape[1], )),
    ##------------
    ## Set - 1
    ##------------
    tf.keras.layers.Dense(26, kernel_initializer = initializer),  # First Hidden Layer
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation(activation=activation),

    ##------------
    ## Set 2
    ## -----------

    tf.keras.layers.Dense(18, kernel_initializer = initializer),  # Second Hidden Layer
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation(activation=activation),

    ##------------
    ## Set 3
    ## -----------

    tf.keras.layers.Dense(10, kernel_initializer = initializer),  # Third Hidden Layer
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation(activation=activation),

    ##------------
    ## Set 4
    ## -----------
    tf.keras.layers.Dense(2, kernel_initializer = initializer)    # Output Layer
])


model.compile(optimizer = optimizer, loss = loss_fn, metrics=['accuracy'])

history = model.fit(X_train, y_train, validation_data=[X_test, y_test], epochs = EPOCHS, batch_size=BATCH_SIZE, verbose=2)

In [None]:
loss_df = pd.DataFrame(history.history)
fn_plot_tf_hist(loss_df)

In [None]:
pred_model = tf.keras.Sequential([
    model,
    tf.keras.layers.Softmax()
])

y_pred = pred_model(X_train).numpy()

print(f'Accuracy :{accuracy_score(y_train, y_pred.argmax(axis=1)):7.4f}')

In [None]:
print(classification_report(y_train, y_pred.argmax(axis=1)))

In [None]:
def fn_plot_confusion_matrix(y_test, y_pred, labels):

  display_labels = class_names

  cm = confusion_matrix(y_train, y_pred)

  disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels= display_labels)

  fig, ax = plt.subplots(figsize = (4,4))

  disp.plot(ax = ax, cmap = 'Greens', colorbar=False)

In [None]:
fn_plot_confusion_matrix(y_test, y_pred.argmax(axis=1), labels=class_names)

In [None]:
display_labels = class_names

cm = confusion_matrix(y_train, y_pred.argmax(axis=1))

disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels= display_labels)

fig, ax = plt.subplots(figsize = (4,4))

disp.plot(ax = ax, cmap = 'Greens', colorbar=False)

## DropOut
0.2-0.3-0.4

In [None]:
del initializer
del optimizer
del loss_fn
del model

In [None]:
initializer = tf.keras.initializers.GlorotUniform(seed=RANDOM_STATE)

optimizer = tf.keras.optimizers.Adam(learning_rate=ALPHA)

loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

activation = 'relu'

dor1 = 0.4     # drop out rate : here linearly increasing
dor2 = 0.3
dor3 = 0.2


model = tf.keras.Sequential([

    tf.keras.Input(shape=(X_train.shape[1], )),
    ##------------
    ## Set - 1
    ##------------
    tf.keras.layers.Dense(26, kernel_initializer = initializer),  # First Hidden Layer
    # tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation(activation=activation),
    tf.keras.layers.Dropout(rate=dor1, seed=RANDOM_STATE),


    ##------------
    ## Set 2
    ## -----------

    tf.keras.layers.Dense(18, kernel_initializer = initializer),  # Second Hidden Layer
    # tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation(activation=activation),
    tf.keras.layers.Dropout(rate=dor2, seed=RANDOM_STATE),

    ##------------
    ## Set 3
    ## -----------

    tf.keras.layers.Dense(10, kernel_initializer = initializer),  # Third Hidden Layer
    # tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation(activation=activation),
    tf.keras.layers.Dropout(rate=dor3, seed=RANDOM_STATE),

    ##------------
    ## Set 4
    ## -----------
    tf.keras.layers.Dense(2, kernel_initializer = initializer)    # Output Layer
])


model.compile(optimizer = optimizer, loss = loss_fn, metrics=['accuracy'])

history = model.fit(X_train, y_train, validation_data=[X_test, y_test], epochs = EPOCHS ,
                    batch_size=BATCH_SIZE, verbose=2)

In [None]:
loss_df = pd.DataFrame(history.history)
fn_plot_tf_hist(loss_df)

### Dropout, Batch Norm, Early Stopping, Learning rate Schedule, Save Checkpoints

In [None]:
del initializer
del optimizer
del loss_fn
del model

In [None]:
initializer = tf.keras.initializers.GlorotUniform(seed=RANDOM_STATE)

optimizer = tf.keras.optimizers.Adam(learning_rate=ALPHA)

loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

activation = 'relu'

model = tf.keras.Sequential([

    tf.keras.Input(shape=(X_train.shape[1], )),
    ##------------
    ## Set - 1
    ##------------
    tf.keras.layers.Dense(26, kernel_initializer = initializer),  # First Hidden Layer
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation(activation=activation),

    ##------------
    ## Set 2
    ## -----------

    tf.keras.layers.Dense(18, kernel_initializer = initializer),  # Second Hidden Layer
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation(activation=activation),

    ##------------
    ## Set 3
    ## -----------

    tf.keras.layers.Dense(10, kernel_initializer = initializer),  # Third Hidden Layer
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation(activation=activation),

    ##------------
    ## Set 4
    ## -----------
    tf.keras.layers.Dense(2, kernel_initializer = initializer)    # Output Layer
])

model.compile(optimizer = optimizer, loss = loss_fn, metrics=['accuracy'])

In [None]:
checkpoint_filepath = os.path.join(modelDir, subDir, 'all_in.keras')

chkpt_callback = tf.keras.callbacks.ModelCheckpoint(
    checkpoint_filepath,
    monitor = 'val_loss',
    verbose=2,
    save_best_only=True)

es_callback = tf.keras.callbacks.EarlyStopping(
    monitor = 'val_loss',
    patience = PATIENCE,
    verbose=2,
    restore_best_weights = True)

lr_callback = tf.keras.callbacks.ReduceLROnPlateau(
    monitor = 'val_loss',
    factor = LR_FACTOR,
    patience = LR_PATIENCE,
    verbose = 2,
    min_lr = 1e-6)

In [None]:
history = model.fit(x= X_train,
                    y= y_train,
                    validation_data=[X_test, y_test],
                    epochs = EPOCHS,
                    batch_size=BATCH_SIZE,
                    callbacks = [chkpt_callback,es_callback,lr_callback],
                    verbose=2)

In [None]:
loss_df = pd.DataFrame(history.history)
fn_plot_tf_hist(loss_df)