<a href="https://colab.research.google.com/github/dominiksakic/NETworkingMay/blob/main/11_residual_connections.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
# Example of residual connections to fight the vanishing gradients

from tensorflow import keras
from tensorflow.keras import layers

# Input -> Block -> Input + Block result -> Forward
inputs = keras.Input(shape=(32, 32, 3))
x = layers.Conv2D(32, 3, activation="relu")(inputs)
residual = x # Set aside
x = layers.Conv2D(64, 3, activation="relu", padding="same")(x)
residual = layers.Conv2D(64, 1)(residual) # Project the residual to the correct shape to add it
x = layers.add([x, residual])

In [19]:
# Case max-pooling
inputs = keras.Input(shape=(32, 32, 3))
x = layers.Conv2D(32, 3, activation="relu")(inputs)
residual = x

x = layers.Conv2D(64, 3, activation="relu", padding="same")(x)
x = layers.MaxPooling2D(2, padding="same")(x)

residual = layers.Conv2D(64, 1, strides=2)(residual) # You have to match the ouput size of the prev layer before adding
x = layers.add([x, residual])

In [32]:
# simple convnet implementation

# Util function that scales the residual block correctly
def residual_block(x, filters, pooling=False):
    residual = x
    x = layers.Conv2D(filters, 3, activation="relu", padding="same")(x)
    x = layers.Conv2D(filters, 3, activation="relu", padding="same")(x)

    if pooling:
        x = layers.MaxPooling2D(pool_size=2, strides=2, padding="same")(x)
        residual = layers.Conv2D(filters, 1, strides=2, padding="same")(residual)
    elif filters != residual.shape[-1]:
        residual = layers.Conv2D(filters, 1, padding="same")(residual)

    x = layers.add([x, residual])
    return x

inputs = keras.Input(shape=(28, 28, 1))
x = layers.Rescaling(1./255)(inputs)
x = residual_block(x, 8, pooling=True)
x = residual_block(x, 16, pooling=True)
x = residual_block(x, 32, pooling=False)
x = layers.GlobalAveragePooling2D()(x)
outputs = layers.Dense(10, activation="softmax")(x)
model = keras.Model(inputs, outputs)

model = keras.Model(inputs=inputs, outputs=outputs)
model.summary()

In [33]:
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import mnist
%matplotlib inline

(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Normalize Training and Test data
train_images = train_images.reshape((60000, 28, 28))
train_images = train_images.astype("float32") / 255
test_images = test_images.reshape((10000, 28, 28))
test_images = test_images.astype("float32") / 255

model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

In [35]:
history = model.fit(train_images, train_labels, epochs=20, batch_size=128)

Epoch 1/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.8995 - loss: 0.3335
Epoch 2/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9133 - loss: 0.2899
Epoch 3/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9238 - loss: 0.2514
Epoch 4/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9263 - loss: 0.2492
Epoch 5/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.9333 - loss: 0.2201
Epoch 6/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9376 - loss: 0.1992
Epoch 7/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.9469 - loss: 0.1735
Epoch 8/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9490 - loss: 0.1671
Epoch 9/20
[1m469/469[0m [32m━━━━━━━━

- good priors for MNIST are
  - small images
  - simple patterns
  - low variation
  - single channels

- I dont need:
  - deep model
  - hight filter count
  - big kernel size

- Model didnt overfit due to the too big of filter size!

In [36]:
test_loss, test_acc = model.evaluate(test_images, test_labels)
print(f"test_acc: {test_acc}")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9642 - loss: 0.1055
test_acc: 0.9706000089645386
