In [0]:
from tensorflow.keras import layers
from tensorflow.keras.models import Model
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
import numpy as np
tf.__version__

'2.2.0-rc4'

**Check ResNet paper https://arxiv.org/pdf/1512.03385.pdf**

In [0]:
class ConvNormRelU(layers.Layer):
  def __init__(self, num_filters, kernel_size, strides):
    super(ConvNormRelU, self).__init__()
    self.num_filters = num_filters
    self.kernel_size = kernel_size
    self.strides = strides
    self.conv_layer = layers.Conv2D(self.num_filters, self.kernel_size, strides=self.strides, padding='same')
    self.batch_norm_layer = layers.BatchNormalization()
  def call(self, tensor):
    x = self.conv_layer(tensor)
    x = self.batch_norm_layer(x)
    x = layers.ReLU()(x)
    return x

In [0]:
class Projection(layers.Layer):
  def __init__(self, num_filters, strides):
    super(Projection, self).__init__()
    self.num_filters = num_filters
    self.strides = strides
    self.conv_block_1 = ConvNormRelU(self.num_filters, kernel_size=1, strides=self.strides)
    self.conv_block_2 = ConvNormRelU(self.num_filters, kernel_size=3, strides=1)
    self.conv_layer = layers.Conv2D(4*self.num_filters, kernel_size=1, strides=1)
    self.shortcut_conv = layers.Conv2D(4*self.num_filters, kernel_size=1, strides=self.strides)
    self.batch_norm = layers.BatchNormalization()
  def call(self, tensor):
    x = self.conv_block_1(tensor)
    x = self.conv_block_2(x)
    x = self.conv_layer(x)
    shortcut = self.shortcut_conv(tensor)
    shortcut = self.batch_norm(shortcut)
    x = layers.Add()([x, shortcut])
    x = layers.ReLU()(x)
    return x

In [0]:
class Identity(layers.Layer):
  def __init__(self, num_filters):
    super(Identity, self).__init__()
    self.num_filters = num_filters
    self.conv_block_1 = ConvNormRelU(self.num_filters, 1, strides=1)
    self.conv_block_2 = ConvNormRelU(self.num_filters, 3, strides=1)
    self.conv_layer = layers.Conv2D(4*self.num_filters, 1, strides=1)
    self.batch_norm = layers.BatchNormalization()
  def call(self, tensor):
    x = self.conv_block_1(tensor)
    x = self.conv_block_2(x)
    x = self.conv_layer(x)
    x = self.batch_norm(x)
    x = layers.Add()([x, tensor])
    x = layers.ReLU()(x)
    return x

In [0]:
class ResidualBlock(layers.Layer):
  def __init__(self, num_filters, repitions, strides):
    super(ResidualBlock, self).__init__()
    self.num_filters = num_filters
    self.repititions = repitions-1
    self.strides = strides
  def call(self, tensor):
    x = Projection(self.num_filters, self.strides)(tensor)
    for i in range(self.repititions):
      x = Identity(self.num_filters)(x)
    return x

In [0]:
input_layer = layers.Input(shape=(28,28,1))
x = ConvNormRelU(num_filters=64, kernel_size=7, strides=2)(input_layer)
x = layers.MaxPool2D((2,2), strides=2, padding='same')(x)

x = ResidualBlock(64, 3, 1)(x)
x = ResidualBlock(128, 4, 2)(x)
x = ResidualBlock(256, 6, 2)(x)
x = ResidualBlock(512, 3, 2)(x)

x = layers.GlobalAvgPool2D()(x)

output_layer = layers.Dense(10, activation='softmax')(x)

model = Model(input_layer, output_layer)

In [0]:
model.summary()


Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv_norm_rel_u (ConvNormRel (None, 14, 14, 64)        3456      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 7, 7, 64)          0         
_________________________________________________________________
residual_block (ResidualBloc (None, 7, 7, 256)         0         
_________________________________________________________________
residual_block_1 (ResidualBl (None, 4, 4, 512)         0         
_________________________________________________________________
residual_block_2 (ResidualBl (None, 2, 2, 1024)        0         
_________________________________________________________________
residual_block_3 (ResidualBl (None, 1, 1, 2048)        0     

In [0]:
(x_train, y_train), _ = tf.keras.datasets.mnist.load_data()
dataset = tf.data.Dataset.from_tensor_slices((x_train.reshape(60000, 28, 28, 1).astype('float32') / 255, tf.keras.utils.to_categorical(y_train)))
dataset = dataset.shuffle(buffer_size=1024).batch(64)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [0]:
epochs = 50
optimizer = Adam()
loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
for epoch, (x, y) in enumerate(dataset):
  with tf.GradientTape() as tape:
    logits = model(x)
    loss = loss_fn(y, logits)
  gradients = tape.gradient(loss, model.trainable_weights)
  optimizer.apply_gradients(zip(gradients, model.trainable_weights))
  if epoch % 10 == 0:
    print(f'Loss: {loss}')