Custom Built Residual Neural Network

In [28]:
# Setup
import tensorflow as tf
print("TensorFlow version", tf.__version__)

from tensorflow.keras.layers import Dense, Flatten, Conv2D, Input, BatchNormalization, Activation, Dropout, AveragePooling2D
from tensorflow.keras import Model, losses, models, layers, datasets, optimizers
import matplotlib.pyplot as plt


TensorFlow version 2.12.0


In [41]:
fashion_mnist = datasets.mnist

(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
x_train, x_test = x_train/255, x_test/255

# Channels Dimms
x_train = x_train[..., tf.newaxis].astype("float32")
x_test = x_test[..., tf.newaxis].astype("float32")

# Batching and shuffling the dataset
train_ds = tf.data.Dataset.from_tensor_slices(
    (x_train, y_train)).shuffle(10000).batch(512)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(512)

In [29]:
def residual_block(X, num_filters: int, stride: int = 1, kernel_size: int = 3,
                   activation: str = "relu", bn: bool = True, conv_first: bool = True):
  """
  Variables
  X: Tensor from previous layer
  num_filters: Conv2d number of filters
  stride: Stride? square dimension
  kernel_size: default 3, conv2d square kernel dimension
  activation: str by default "relu
  bn: bath normalization
  conv_first: bool by default true
  """

  conv_layer = Conv2D(num_filters,
                      kernel_size=kernel_size,
                      strides=stride,
                      padding="same",
                      kernel_regularizer=tf.keras.regularizers.L2(1e-4))

  if conv_first:
    X = conv_layer(X)
    if bn:
      X = BatchNormalization()(X)
    if activation is not None:
      X = Activation(activation)(X)
      X = Dropout(0.2)(X)
  else:
    if bn:
      X = BatchNormalization()(X)
    if activation is not None:
      X = Activation(activation)(X)
    X = conv_layer(X)

  return X



In [30]:
# depth should be 9n+2 (eg 56 or 110)
depth = 56

# Model definition
num_filters_in = 32
num_res_block = int((depth - 2) / 9)

inputs = Input(shape=(28,28,1))

# ResNet V2 performs Conv2D on X before spiting into two path
X = residual_block(X=inputs, num_filters=num_filters_in, conv_first=True)

# Building stack of residual units
for stage in range(3):
    for unit_res_block in range(num_res_block):
        activation = 'relu'
        bn = True
        stride = 1
        # First layer and first stage
        if stage == 0:
            num_filters_out = num_filters_in * 4
            if unit_res_block == 0:
                activation = None
                bn = False
            # First layer but not first stage
        else:
            num_filters_out = num_filters_in * 2
            if unit_res_block == 0:
                stride = 2

        # bottleneck residual unit
        y = residual_block(X,
                           num_filters=num_filters_in,
                           kernel_size=1,
                           stride=stride,
                           activation=activation,
                           bn=bn,
                           conv_first=False)
        y = residual_block(y,
                           num_filters=num_filters_in,
                           conv_first=False)
        y = residual_block(y,
                           num_filters=num_filters_out,
                           kernel_size=1,
                           conv_first=False)
        if unit_res_block == 0:
            # linear projection residual shortcut connection to match
            # changed dims
            X = residual_block(X=X,
                               num_filters=num_filters_out,
                               kernel_size=1,
                               stride=stride,
                               activation=None,
                               bn=False)
        X = tf.keras.layers.add([X, y])
    num_filters_in = num_filters_out

In [35]:
X = BatchNormalization()(X)
X = Activation('relu')(X)
X = AveragePooling2D(pool_size=1)(X)
y = Flatten()(X)
y = Dense(512, activation='relu')(y)
y = BatchNormalization()(y)
y = Dropout(0.5)(y)

outputs = Dense(10,
                activation='softmax')(y)

# Instantiate model.
model = Model(inputs=inputs, outputs=outputs)

In [None]:
model.compile(optimizer=optimizers.Adam(learning_rate=10), loss=losses.sparse_categorical_crossentropy, metrics=["accuracy"])
history = model.fit(x_train, y_train, batch_size=512, epochs=5)

Epoch 1/5