In [36]:
import os
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('fivethirtyeight')
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [37]:
# get the data- mnist
(X_train_full, y_train_full), (X_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()

#scale the data
X_train_full = X_train_full / 255.0
X_test = X_test / 255.0

# split the data into validation and train set
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

In [38]:
tf.random.set_seed(42)
np.random.seed(42)

LAYERS = [ tf.keras.layers.Flatten(input_shape=[28, 28]),
    tf.keras.layers.Dense(300, kernel_initializer="he_normal"),
    tf.keras.layers.LeakyReLU(),
    tf.keras.layers.Dense(100, kernel_initializer="he_normal"),
    tf.keras.layers.LeakyReLU(),
    tf.keras.layers.Dense(10, activation="softmax")]

model = tf.keras.models.Sequential(LAYERS)

In [39]:
# compile the model:

model.compile(loss="sparse_categorical_crossentropy",
              optimizer=tf.keras.optimizers.SGD(learning_rate=1e-3),
              metrics=["accuracy"])


In [40]:
# get the summary of the model
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_4 (Flatten)         (None, 784)               0         
                                                                 
 dense_12 (Dense)            (None, 300)               235500    
                                                                 
 leaky_re_lu_2 (LeakyReLU)   (None, 300)               0         
                                                                 
 dense_13 (Dense)            (None, 100)               30100     
                                                                 
 leaky_re_lu_3 (LeakyReLU)   (None, 100)               0         
                                                                 
 dense_14 (Dense)            (None, 10)                1010      
                                                                 
Total params: 266,610
Trainable params: 266,610
Non-tr

In [41]:
# training
history = model.fit(X_train, y_train, epochs=10,
                    validation_data=(X_valid, y_valid), verbose=2)

Epoch 1/10
1719/1719 - 6s - loss: 1.2977 - accuracy: 0.6057 - val_loss: 0.8853 - val_accuracy: 0.7094 - 6s/epoch - 4ms/step
Epoch 2/10
1719/1719 - 5s - loss: 0.7965 - accuracy: 0.7353 - val_loss: 0.7112 - val_accuracy: 0.7612 - 5s/epoch - 3ms/step
Epoch 3/10
1719/1719 - 5s - loss: 0.6834 - accuracy: 0.7714 - val_loss: 0.6455 - val_accuracy: 0.7866 - 5s/epoch - 3ms/step
Epoch 4/10
1719/1719 - 5s - loss: 0.6255 - accuracy: 0.7918 - val_loss: 0.5936 - val_accuracy: 0.8046 - 5s/epoch - 3ms/step
Epoch 5/10
1719/1719 - 5s - loss: 0.5885 - accuracy: 0.8033 - val_loss: 0.5629 - val_accuracy: 0.8148 - 5s/epoch - 3ms/step
Epoch 6/10
1719/1719 - 5s - loss: 0.5618 - accuracy: 0.8105 - val_loss: 0.5402 - val_accuracy: 0.8200 - 5s/epoch - 3ms/step
Epoch 7/10
1719/1719 - 4s - loss: 0.5413 - accuracy: 0.8171 - val_loss: 0.5214 - val_accuracy: 0.8256 - 4s/epoch - 3ms/step
Epoch 8/10
1719/1719 - 5s - loss: 0.5255 - accuracy: 0.8212 - val_loss: 0.5132 - val_accuracy: 0.8258 - 5s/epoch - 3ms/step
Epoch 9/

# Batch Normalization approach one

In [42]:
# delete the instance of model as we are using BN Approach now
del model

In [43]:
# Batch normalization after activation function
LAYERS_BN = [
    tf.keras.layers.Flatten(input_shape=[28, 28]),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(300, activation="relu"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(100, activation="relu"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(10, activation="softmax")
]

model = tf.keras.models.Sequential(LAYERS_BN)

In [44]:
model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_5 (Flatten)         (None, 784)               0         
                                                                 
 batch_normalization_9 (Batc  (None, 784)              3136      
 hNormalization)                                                 
                                                                 
 dense_15 (Dense)            (None, 300)               235500    
                                                                 
 batch_normalization_10 (Bat  (None, 300)              1200      
 chNormalization)                                                
                                                                 
 dense_16 (Dense)            (None, 100)               30100     
                                                                 
 batch_normalization_11 (Bat  (None, 100)             

$784 = input neurons$

$batch_mean, batch_var, gamman, beta = 4$

$784*4 + 300*4 + 100 * 4$

In [45]:
sum([3136,1200,400])

4736

In [46]:
#total parameters
266610 + 4736

271346

In [47]:
# train parameters:

# as only B and gamma are trainable

266610 + (4736/2)

# mean and variance are calculated internally

268978.0

In [48]:
bn1 = model.layers[1]

In [49]:
# check which parameters are trainable:
for var in bn1.variables:
  print(var.name, var.trainable)

batch_normalization_9/gamma:0 True
batch_normalization_9/beta:0 True
batch_normalization_9/moving_mean:0 False
batch_normalization_9/moving_variance:0 False


In [50]:
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=tf.keras.optimizers.SGD(learning_rate=1e-3),
              metrics=["accuracy"])

In [51]:
history = model.fit(X_train, y_train, epochs=10,
                    validation_data=(X_valid, y_valid), verbose=2)

Epoch 1/10
1719/1719 - 10s - loss: 0.8532 - accuracy: 0.7153 - val_loss: 0.5578 - val_accuracy: 0.8198 - 10s/epoch - 6ms/step
Epoch 2/10
1719/1719 - 8s - loss: 0.5734 - accuracy: 0.8032 - val_loss: 0.4764 - val_accuracy: 0.8434 - 8s/epoch - 5ms/step
Epoch 3/10
1719/1719 - 8s - loss: 0.5176 - accuracy: 0.8190 - val_loss: 0.4404 - val_accuracy: 0.8506 - 8s/epoch - 4ms/step
Epoch 4/10
1719/1719 - 8s - loss: 0.4792 - accuracy: 0.8331 - val_loss: 0.4186 - val_accuracy: 0.8566 - 8s/epoch - 5ms/step
Epoch 5/10
1719/1719 - 8s - loss: 0.4516 - accuracy: 0.8408 - val_loss: 0.4039 - val_accuracy: 0.8628 - 8s/epoch - 5ms/step
Epoch 6/10
1719/1719 - 8s - loss: 0.4361 - accuracy: 0.8462 - val_loss: 0.3927 - val_accuracy: 0.8646 - 8s/epoch - 4ms/step
Epoch 7/10
1719/1719 - 8s - loss: 0.4223 - accuracy: 0.8510 - val_loss: 0.3815 - val_accuracy: 0.8654 - 8s/epoch - 5ms/step
Epoch 8/10
1719/1719 - 9s - loss: 0.4098 - accuracy: 0.8557 - val_loss: 0.3751 - val_accuracy: 0.8688 - 9s/epoch - 5ms/step
Epoch 

# Batch Normalization approach 2

In [52]:
del model

In [53]:
# applying BN before activation function
LAYERS_BN_BIAS_FALSE = [
    tf.keras.layers.Flatten(input_shape=[28, 28]),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(300, use_bias=False),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation("relu"),
    tf.keras.layers.Dense(100, use_bias=False),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation("relu"),
    tf.keras.layers.Dense(10, activation="softmax")
]

model = tf.keras.models.Sequential(LAYERS_BN_BIAS_FALSE)

In [54]:
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=tf.keras.optimizers.SGD(learning_rate=1e-3),
              metrics=["accuracy"])

In [55]:
history = model.fit(X_train, y_train, epochs=10,
                    validation_data=(X_valid, y_valid), verbose=2)

Epoch 1/10
1719/1719 - 9s - loss: 1.0727 - accuracy: 0.6686 - val_loss: 0.6811 - val_accuracy: 0.7888 - 9s/epoch - 5ms/step
Epoch 2/10
1719/1719 - 7s - loss: 0.6868 - accuracy: 0.7783 - val_loss: 0.5586 - val_accuracy: 0.8194 - 7s/epoch - 4ms/step
Epoch 3/10
1719/1719 - 8s - loss: 0.6024 - accuracy: 0.7998 - val_loss: 0.5016 - val_accuracy: 0.8340 - 8s/epoch - 4ms/step
Epoch 4/10
1719/1719 - 8s - loss: 0.5515 - accuracy: 0.8149 - val_loss: 0.4670 - val_accuracy: 0.8434 - 8s/epoch - 5ms/step
Epoch 5/10
1719/1719 - 7s - loss: 0.5175 - accuracy: 0.8241 - val_loss: 0.4431 - val_accuracy: 0.8502 - 7s/epoch - 4ms/step
Epoch 6/10
1719/1719 - 8s - loss: 0.4949 - accuracy: 0.8297 - val_loss: 0.4246 - val_accuracy: 0.8556 - 8s/epoch - 5ms/step
Epoch 7/10
1719/1719 - 8s - loss: 0.4772 - accuracy: 0.8368 - val_loss: 0.4105 - val_accuracy: 0.8586 - 8s/epoch - 5ms/step
Epoch 8/10
1719/1719 - 8s - loss: 0.4618 - accuracy: 0.8389 - val_loss: 0.4008 - val_accuracy: 0.8624 - 8s/epoch - 4ms/step
Epoch 9/