<a href="https://colab.research.google.com/github/dominiksakic/NETworkingMay/blob/main/12_batch_normalization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

- We will train the same model as in 11:
  - MNIST CONVNET
  - Residual connection
  - Batch normalization (new!)
  

In [1]:
from keras.datasets import mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


##  Example of Batchnormalization: centering the data around zero
- normalized_data = (data - np.mean(data, axis=...)) / np.std(data, axis=...)


- x = ...
- x = layers.Conv2D(32, 3, use_bias = False)(x)
- x = layers.BatchNormalization()(x)

No learned bias vector is needed, because will take care of centering the layers output on zero.
--> Making the Network a bit leaner, if using BatchNormalization

- Recommended: place the activation AFTER the batch normalization (try and test it with the below networks!)

- Example how not to do it:
x = layers.Conv2D(32, 3, activation="relu")(x)
x = layers.BatchNormalization()(x)

- Example how to do it:
x = layers.Conv2D(32, 3, use_bias=False)(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)

- idea is:
  - batch normalization will center the inputs on zero, while the relu activation uses zero as a pivot for keeping or dropping activated channels.
  - doing normalizatoin before the activation maximies the utilization of the relu



In [3]:
from tensorflow import keras
from tensorflow.keras import layers

def residual_block(x, filters, pooling=False):
    residual = x
    x = layers.Conv2D(filters, 3, use_bias=False, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)
    x = layers.Conv2D(filters, 3, use_bias=False, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    if pooling:
        x = layers.MaxPooling2D(pool_size=2, strides=2)(x)
        residual = layers.Conv2D(filters, 1, strides=2, use_bias=False)(residual)
        residual = layers.BatchNormalization()(residual)
    elif filters != residual.shape[-1]:
        residual = layers.Conv2D(filters, 1, padding="same", use_bias=False)(residual)
        residual = layers.BatchNormalization()(residual)

    x = layers.add([x, residual])
    return x


inputs = keras.Input(shape=(28, 28, 1))
x = layers.Rescaling(1./255)(inputs)
x = layers.BatchNormalization()(x)
x = residual_block(x, 8, pooling=True)
x = residual_block(x, 16, pooling=True)
x = residual_block(x, 32, pooling=False) # False, because Global Pooling layer
x = layers.GlobalAveragePooling2D()(x)
outputs = layers.Dense(10, activation="softmax")(x)
model = keras.Model(inputs=inputs, outputs=outputs)

model = keras.Model(inputs=inputs, outputs=outputs)
model.summary()

In [4]:
model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

history = model.fit(
    train_images, train_labels,
    batch_size=128,
    epochs=20)

Epoch 1/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 15ms/step - accuracy: 0.7108 - loss: 0.9850
Epoch 2/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - accuracy: 0.9749 - loss: 0.1006
Epoch 3/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.9835 - loss: 0.0594
Epoch 4/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9869 - loss: 0.0457
Epoch 5/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9903 - loss: 0.0369
Epoch 6/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9907 - loss: 0.0326
Epoch 7/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9914 - loss: 0.0275
Epoch 8/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.9926 - loss: 0.0254
Epoch 9/20
[1m469/469[0m [32m━━━━━

In [5]:
# Notebook 11 test acc: 0.9706000089645386
test_loss, test_acc = model.evaluate(test_images, test_labels)
print(f"test_acc: {test_acc}")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - accuracy: 0.9839 - loss: 0.0589
test_acc: 0.9861000180244446
