# Introduction

The notebook is intended to experiment with the Subclassing API of TensorFlow to define custom Neural Network

In [2]:
# Import Standard Libraries
import os

# Suppress warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import tensorflow as tf
from tensorflow import keras

# Linear Layer

In [16]:
class Linear(keras.layers.Layer):
    """
    Define a custom Linear Layer in Keras

    Attributes:
        weights_params: tf.Variables set of weights for each neuron
        bias: tf.Variables set of biases for each neuron
    """
    
    def __init__(self, 
                 units=32):

        # Call the parent constructor             
        super(Linear, self).__init__()
        self.units = units

    def build(self, input_shape):

        # Initilialize the weights
        self.weights_params = self.add_weight(
            shape=(input_shape[-1], self.units),
            initializer=tf.random_normal_initializer(),
            trainable=True,
        )

        # Initilialize the bias             
        self.bias = self.add_weight(
            shape=(self.units,),
            initializer=tf.zeros_initializer(),
            trainable=True
        )

    def call(self, inputs):
        
        return tf.matmul(inputs, self.weights_params) + self.bias

In [18]:
# Test
x = tf.ones((2, 2))
linear_layer = Linear(4)
y = linear_layer(x)
print(y)

tf.Tensor(
[[-0.01872915 -0.06430404 -0.0005488  -0.01024083]
 [-0.01872915 -0.06430404 -0.0005488  -0.01024083]], shape=(2, 4), dtype=float32)


In [10]:
# NOTE: The layer has already a "weights" attribute
print(linear_layer.weights == [linear_layer.weights_params, linear_layer.bias])

True


# Multi-layer Perceptron Block

It creates a block layer that combines three custom Linear layers. It tracks the weights of the inner layers.

In [19]:
class MLPBlock(keras.layers.Layer):
    
    def __init__(self):

         # Call the parent constructor
        super(MLPBlock, self).__init__()

        # Define hidden layers
        self.linear_1 = Linear(32)
        self.linear_2 = Linear(32)
        self.linear_3 = Linear(1)

    def call(self, inputs):

        # Pass input to the first hidden layer
        x = self.linear_1(inputs)

        # Trigger the first hidden layer activation function
        x = tf.nn.relu(x)

        # Pass input to the second hidden layer
        x = self.linear_2(x)

        # Trigger the second hidden layer activation function
        x = tf.nn.relu(x)
        
        return self.linear_3(x)

In [22]:
# Test
mlp = MLPBlock()

y = mlp(tf.ones(shape=(3, 64)))  # The first call to the `mlp` will create the weights

print("weights:", len(mlp.weights)) # 3 vector of weights_params and 3 vector of bias
print("trainable weights:", len(mlp.trainable_weights))

weights: 6
trainable weights: 6


# Layer Loss

By adding the `add_loss()` function to a specific Layer it is possible to compute the Layer Loss between its inputs and outputs.

We are going to create an Activity Regularization Layer that encouragse the neural network's activations or outputs to exhibit certain desirable properties. Activity regularization losses are additional terms added to the overall loss function during training to promote specific characteristics in the activations of the network.

In [24]:
class ActivityRegularizationLayer(keras.layers.Layer):
    
    def __init__(self, 
                 dropout_rate=1e-2):

        # Call the parent constructor
        super(ActivityRegularizationLayer, self).__init__()
                     
        self.dropout_rate = dropout_rate

    def call(self, inputs):
        
        self.add_loss(self.dropout_rate * tf.reduce_sum(inputs))
        
        return inputs

In [28]:
# Test
class OuterLayer(keras.layers.Layer):
    
    def __init__(self):
        
        super(OuterLayer, self).__init__()
        self.activity_reg = ActivityRegularizationLayer(1e-2)

    def call(self, inputs):
        return self.activity_reg(inputs)


layer = OuterLayer()
print(len(layer.losses) == 0)  # No losses yet since the layer has never been called

_ = layer(tf.zeros(1, 1))
print(len(layer.losses) == 1)  # We created one loss value

# `layer.losses` gets reset at the start of each __call__
_ = layer(tf.zeros(1, 1))
print(len(layer.losses) == 1)  # This is the loss created during the call above

True
True
True


In [30]:
# It's also possible to use Subclassing to add a Layer Loss in a Dense Layer
class OuterLayerWithKernelRegularizer(keras.layers.Layer):
    def __init__(self):
        super(OuterLayerWithKernelRegularizer, self).__init__()
        
        self.dense = keras.layers.Dense(32, kernel_regularizer=tf.keras.regularizers.l2(1e-3))

    def call(self, inputs):
        return self.dense(inputs)

layer = OuterLayerWithKernelRegularizer()
_ = layer(tf.zeros((1, 1)))

print(layer.losses)

[<tf.Tensor: shape=(), dtype=float32, numpy=0.0016005059>]


# Layer Metric

It's the same logic applied to Layer Loss: track the metric for the specific layer during the training.

In [31]:
class LogisticEndpoint(keras.layers.Layer):
    
    def __init__(self, name=None):

        # Call the parent constructor
        super(LogisticEndpoint, self).__init__(name=name)

        # Define loss and metric functions
        self.loss_function = keras.losses.BinaryCrossentropy(from_logits=True)
        self.metric_function = keras.metrics.BinaryAccuracy()

    def call(self, targets, logits, sample_weights=None):

        # Compute and add the layer loss
        loss = self.loss_function(targets, logits, sample_weights)
        self.add_loss(loss)

        # Compute and add the layer metric
        metric = self.metric_function(targets, logits, sample_weights)
        self.add_metric(metric, name="accuracy")

        return tf.nn.softmax(logits)

In [33]:
# Test
layer = LogisticEndpoint()

targets = tf.ones((2, 2))
logits = tf.ones((2, 2))
y = layer(targets, logits)

print("layer.metrics:", layer.metrics)
print("current accuracy value:", float(layer.metrics[0].result()))

layer.metrics: [<keras.src.metrics.accuracy_metrics.BinaryAccuracy object at 0x1647c4e20>]
current accuracy value: 1.0
