# Model Subclassing with Keras

## 1. Imports and Configuration

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Configure GPU memory growth to be dynamic instead of allocating all memory at once
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

## 2. Data Loading and Preprocessing

In [2]:
from tensorflow.keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(-1, 28, 28, 1).astype("float32") / 255.0
x_test = x_test.reshape(-1, 28, 28, 1).astype("float32") / 255.0

## 3. Model Definition

So far we have noticed a recurring structure which looks like:
CNN -> BatchNorm -> ReLU (common structure)
if we had to do it x10 times (lot of code to write!)

- `layers.Layer` will keep track of everything under the hood for doing backpropagation and gradient descent, etc. Layers are the basic building blocks of neural networks in Keras. A layer encapsulates both a state (the layer's "weights") and a transformation from inputs to outputs (a **call**, the layer's forward pass).

- Keras subclassing is exactly similar to creating **PyTorch** models. We can define the layers in the `__init__` method and the forward pass in the `call` method.

- In the `__init__` we use `super` to call the `__init__` of the parent class (in this case `layers.Layer`).  

In [3]:
class CNNBlock(layers.Layer):
    def __init__(self, out_channels, kernel_size=3):
        super(CNNBlock, self).__init__()
        # Define layers here
        self.conv = layers.Conv2D(out_channels, kernel_size, padding="same")
        self.bn = layers.BatchNormalization()

    """
    call() method is used to define the computation that should be performed on the
    input given to the layer object. 

    :param input_tensor: input tensor
    :param training: argument is used to differentiate between training and inference
    becuase some layers have different behavior during training and inference (e.g. Dropout, BatchNorm)
    When we do model.fit() that will set training=True, and when we do 
    model.evaluate() or model.predict() that will set training=False
    """    
    def call(self, input_tensor, training=False):
        x = self.conv(input_tensor)
        # You can also print the shape here, etc. for debugging
        # print(x.shape)
        x = self.bn(x, training=training)
        x = tf.nn.relu(x)
        return x

# model = keras.Sequential(
#     [
#         CNNBlock(32), 
#         CNNBlock(64), 
#         CNNBlock(128), 
#         layers.Flatten(), 
#         layers.Dense(10),
#     ]
# )

### More Complex Models

- You should you `keras.Model` in your final model. `keras.Model` is a subclass of `keras.layers.Layer` and it has all the functionality of `keras.layers.Layer` and more.

- For the final model we use `keras.Model` instead of `keras.layers.Layer`. This is because we want to use the `fit` method of `keras.Model` which is not available in `keras.layers.Layer`. You can also do `model.layers` to get the list of layers in the model. `model.summary()` will give you a summary of the model. You can also do serialization and deserialization of the model using `model.save()` and `keras.models.load_model()`, save your model as a `h5` file.

In [4]:
class ResBlock(layers.Layer):
    def __init__(self, channels):
        super(ResBlock, self).__init__()
        self.channels = channels
        self.cnn1 = CNNBlock(channels[0], 3)
        self.cnn2 = CNNBlock(channels[1], 3)
        self.cnn3 = CNNBlock(channels[2], 3)
        self.pooling = layers.MaxPooling2D()

        # We are using the same cnn three times, so the height and the width of the output
        # we get from these wont change but as each cnn layer has different number of channels,
        # they might not be the same. Hence we do identity mapping to ensure that we have same
        # number of channels. We do this by using kernel size = 1, which will change the
        # number of channels as output.
        self.identity_mapping = layers.Conv2D(channels[1], kernel_size = 1, padding="same")

    def call(self, input_tensor, training=False):
        x = self.cnn1(input_tensor, training=training)
        x = self.cnn2(x, training=training)
        x = self.cnn3(x + self.identity_mapping(input_tensor), training=training,)
        x = self.pooling(x)
        return x


class ResNet_Like(keras.Model):
    def __init__(self, num_classes=10):
        super(ResNet_Like, self).__init__()
        self.block1 = ResBlock([32, 32, 64])
        self.block2 = ResBlock([128, 128, 256])
        self.block3 = ResBlock([128, 256, 512])
        # Average pool the height and the width
        # you can also do layer.Flatten() 
        self.pool = layers.GlobalAveragePooling2D()
        self.classifier = layers.Dense(num_classes)

    def call(self, input_tensor, training=False):
        x = self.block1(input_tensor, training=training)
        x = self.block2(x, training=training)
        x = self.block3(x, training=training)
        x = self.pool(x, training=training)
        x = self.classifier(x)
        return x

    # This is used to print the summary of the model
    def model(self):
        x = keras.Input(shape=(28, 28, 1))
        return keras.Model(inputs=[x], outputs=self.call(x))


model = ResNet_Like().model()
base_input = model.layers[0].input
base_output = model.layers[2].output
output = layers.Dense(10)(layers.Flatten()(base_output))
model = keras.Model(base_input, output)

## 4. Compile Model

In [5]:
model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=["accuracy"],
)

## 5. Model Training and Evaluation

In [6]:
print("Training model...")
model.fit(x_train, y_train, batch_size=64, epochs=1, verbose=2)

print("Evaluating model...")
model.evaluate(x_test, y_test, batch_size=64, verbose=2)

Training model...
938/938 - 43s - loss: 0.1034 - accuracy: 0.9687
Evaluating model...
157/157 - 7s - loss: 0.0328 - accuracy: 0.9896


[0.03283665329217911, 0.9896000027656555]