# Task

 Build a DNN with 20 hidden layers of 100 neurons each (that's too many, but it's the point of this exercise). Use He initialization and the Swish activation function.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


import tensorflow as tf

# Read data

In [2]:
cifar10 = tf.keras.datasets.cifar10.load_data()
(X_train_full, y_train_full), (X_test, y_test) = cifar10

X_train = X_train_full[5000:]
y_train = y_train_full[5000:]
X_valid = X_train_full[:5000]
y_valid = y_train_full[:5000]

# Build Network

In [3]:
tf.random.set_seed(2024)

model = tf.keras.Sequential()

model.add(tf.keras.layers.Flatten(input_shape=[32,32,3]))

for _ in range(20):
    model.add(tf.keras.layers.Dense(100, activation='swish', kernel_initializer='he_normal'))

In [4]:
model.add(tf.keras.layers.Dense(10, activation="softmax"))

In [5]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 3072)              0         
                                                                 
 dense (Dense)               (None, 100)               307300    
                                                                 
 dense_1 (Dense)             (None, 100)               10100     
                                                                 
 dense_2 (Dense)             (None, 100)               10100     
                                                                 
 dense_3 (Dense)             (None, 100)               10100     
                                                                 
 dense_4 (Dense)             (None, 100)               10100     
                                                                 
 dense_5 (Dense)             (None, 100)               1

# Traing the model

 Using Nadam optimization and early stopping, train the network on the CIFAR10 dataset. You can load it with tf.keras.datasets.cifar10.load_data(). The dataset is composed of 60,000 32 × 32–pixel color images (50,000 for training, 10,000 for testing) with 10 classes, so you'll need a softmax output layer with 10 neurons. Remember to search for the right learning rate each time you change the model's architecture or hyperparameters.

In [5]:
optimizer = tf.keras.optimizers.Nadam(learning_rate=0.00005)

model.compile(
    loss = 'sparse_categorical_crossentropy',
    metrics=['accuracy'],
    optimizer=optimizer
)

In [7]:
early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=20, restore_best_weights=True)
model_checkpoint_cb = tf.keras.callbacks.ModelCheckpoint("models/my_cifar10_model", save_best_only=True)

In [8]:
from pathlib import Path
run_index = 1 # increment every time you train the model
run_logdir = Path("my_logs") / "my_cifar10_logs" / f"run_{run_index:03d}"
tensorboard_cb = tf.keras.callbacks.TensorBoard(run_logdir)
callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]

In [9]:
%load_ext tensorboard
%tensorboard --logdir=.my_logs/my_cifar10_logs

Reusing TensorBoard on port 6006 (pid 6772), started 0:05:18 ago. (Use '!kill 6772' to kill it.)

In [10]:
history_base_model = model.fit(X_train, y_train, epochs=100,
          validation_data=(X_valid, y_valid),
          callbacks=callbacks)

Epoch 1/100


INFO:tensorflow:Assets written to: models\my_cifar10_model\assets


Epoch 2/100


INFO:tensorflow:Assets written to: models\my_cifar10_model\assets


Epoch 3/100


INFO:tensorflow:Assets written to: models\my_cifar10_model\assets


Epoch 4/100


INFO:tensorflow:Assets written to: models\my_cifar10_model\assets


Epoch 5/100


INFO:tensorflow:Assets written to: models\my_cifar10_model\assets


Epoch 6/100


INFO:tensorflow:Assets written to: models\my_cifar10_model\assets


Epoch 7/100


INFO:tensorflow:Assets written to: models\my_cifar10_model\assets


Epoch 8/100


INFO:tensorflow:Assets written to: models\my_cifar10_model\assets


Epoch 9/100


INFO:tensorflow:Assets written to: models\my_cifar10_model\assets


Epoch 10/100


INFO:tensorflow:Assets written to: models\my_cifar10_model\assets


Epoch 11/100


INFO:tensorflow:Assets written to: models\my_cifar10_model\assets


Epoch 12/100


INFO:tensorflow:Assets written to: models\my_cifar10_model\assets


Epoch 13/100


INFO:tensorflow:Assets written to: models\my_cifar10_model\assets


Epoch 14/100


INFO:tensorflow:Assets written to: models\my_cifar10_model\assets


Epoch 15/100
Epoch 16/100


INFO:tensorflow:Assets written to: models\my_cifar10_model\assets


Epoch 17/100


INFO:tensorflow:Assets written to: models\my_cifar10_model\assets


Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100


INFO:tensorflow:Assets written to: models\my_cifar10_model\assets


Epoch 22/100
Epoch 23/100


INFO:tensorflow:Assets written to: models\my_cifar10_model\assets


Epoch 24/100
Epoch 25/100


INFO:tensorflow:Assets written to: models\my_cifar10_model\assets


Epoch 26/100
Epoch 27/100


INFO:tensorflow:Assets written to: models\my_cifar10_model\assets


Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100


INFO:tensorflow:Assets written to: models\my_cifar10_model\assets


Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100


INFO:tensorflow:Assets written to: models\my_cifar10_model\assets


Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100


INFO:tensorflow:Assets written to: models\my_cifar10_model\assets


Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100


In [11]:
model.evaluate(X_valid, y_valid)



[1.5083513259887695, 0.48240000009536743]

This is giving very bad accuracy on validation set. Lets see if we can improve this with batch norm.

# Implement Batch Normalization in same model arch

In [12]:
tf.random.set_seed(2024)


model_bn = tf.keras.Sequential()
model_bn.add(tf.keras.layers.Flatten(input_shape=[32, 32, 3]))
for _ in range(20):
    model_bn.add(tf.keras.layers.Dense(100, kernel_initializer="he_normal"))
    model_bn.add(tf.keras.layers.BatchNormalization())
    model_bn.add(tf.keras.layers.Activation("swish"))

model_bn.add(tf.keras.layers.Dense(10, activation="softmax"))

optimizer = tf.keras.optimizers.Nadam(learning_rate=0.0005)
model_bn.compile(loss="sparse_categorical_crossentropy",
              optimizer=optimizer,
              metrics=["accuracy"])

early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=20,
                                                     restore_best_weights=True)
model_checkpoint_cb = tf.keras.callbacks.ModelCheckpoint("models/my_cifar10_bn_model",
                                                         save_best_only=True)
run_index = 1 # increment every time you train the model
run_logdir = Path('logs') / "my_cifar10_logs" / f"run_bn_{run_index:03d}"
tensorboard_cb = tf.keras.callbacks.TensorBoard(run_logdir)
callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]

history_batch_norm = model_bn.fit(X_train, y_train, epochs=100,
          validation_data=(X_valid, y_valid),
          callbacks=callbacks)




Epoch 1/100


INFO:tensorflow:Assets written to: models\my_cifar10_bn_model\assets


Epoch 2/100


INFO:tensorflow:Assets written to: models\my_cifar10_bn_model\assets


Epoch 3/100


INFO:tensorflow:Assets written to: models\my_cifar10_bn_model\assets


Epoch 4/100


INFO:tensorflow:Assets written to: models\my_cifar10_bn_model\assets


Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100


INFO:tensorflow:Assets written to: models\my_cifar10_bn_model\assets


Epoch 9/100
Epoch 10/100
Epoch 11/100


INFO:tensorflow:Assets written to: models\my_cifar10_bn_model\assets


Epoch 12/100
Epoch 13/100


INFO:tensorflow:Assets written to: models\my_cifar10_bn_model\assets


Epoch 14/100
Epoch 15/100
Epoch 16/100


INFO:tensorflow:Assets written to: models\my_cifar10_bn_model\assets


Epoch 17/100
Epoch 18/100


INFO:tensorflow:Assets written to: models\my_cifar10_bn_model\assets


Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100


Evaluate the model

In [13]:
model_bn.evaluate(X_valid, y_valid)



[1.4500573873519897, 0.4966000020503998]

# Replace Batch norm with SELU activation function

In [14]:
tf.random.set_seed(2024)

model_selu = tf.keras.Sequential()
model_selu.add(tf.keras.layers.Flatten(input_shape=[32, 32, 3]))
for _ in range(20):
    model_selu.add(tf.keras.layers.Dense(100,
                                    kernel_initializer="lecun_normal",
                                    activation="selu"))

model_selu.add(tf.keras.layers.Dense(10, activation="softmax"))

optimizer = tf.keras.optimizers.Nadam(learning_rate=7e-4)
model_selu.compile(loss="sparse_categorical_crossentropy",
              optimizer=optimizer,
              metrics=["accuracy"])

early_stopping_cb = tf.keras.callbacks.EarlyStopping(
    patience=20, restore_best_weights=True)
model_checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(
    "models/my_cifar10_selu_model", save_best_only=True)
run_index = 1 # increment every time you train the model
run_logdir = Path('logs') / "my_cifar10_logs" / f"run_selu_{run_index:03d}"
tensorboard_cb = tf.keras.callbacks.TensorBoard(run_logdir)
callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]

X_means = X_train.mean(axis=0)
X_stds = X_train.std(axis=0)
X_train_scaled = (X_train - X_means) / X_stds
X_valid_scaled = (X_valid - X_means) / X_stds
X_test_scaled = (X_test - X_means) / X_stds

history_selu = model_selu.fit(X_train_scaled, y_train, epochs=100,
          validation_data=(X_valid_scaled, y_valid),
          callbacks=callbacks)


Epoch 1/100


INFO:tensorflow:Assets written to: models\my_cifar10_selu_model\assets


Epoch 2/100


INFO:tensorflow:Assets written to: models\my_cifar10_selu_model\assets


Epoch 3/100


INFO:tensorflow:Assets written to: models\my_cifar10_selu_model\assets


Epoch 4/100


INFO:tensorflow:Assets written to: models\my_cifar10_selu_model\assets


Epoch 5/100


INFO:tensorflow:Assets written to: models\my_cifar10_selu_model\assets


Epoch 6/100


INFO:tensorflow:Assets written to: models\my_cifar10_selu_model\assets


Epoch 7/100


INFO:tensorflow:Assets written to: models\my_cifar10_selu_model\assets


Epoch 8/100


INFO:tensorflow:Assets written to: models\my_cifar10_selu_model\assets


Epoch 9/100
Epoch 10/100


INFO:tensorflow:Assets written to: models\my_cifar10_selu_model\assets


Epoch 11/100
Epoch 12/100


INFO:tensorflow:Assets written to: models\my_cifar10_selu_model\assets


Epoch 13/100
Epoch 14/100


INFO:tensorflow:Assets written to: models\my_cifar10_selu_model\assets


Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100


In [15]:
model_selu.evaluate(X_valid_scaled, y_valid)



[1.4724277257919312, 0.5112000107765198]