# Custom Layers

## 1. Imports and Configuration

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Configure GPU memory growth to be dynamic instead of allocating all memory at once
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

2024-01-07 08:21:37.664273: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2024-01-07 08:21:37.665271: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2024-01-07 08:21:37.672039: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-01-07 08:21:37.672247: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA GeForce GTX 1660 Ti computeCapability: 7.5
coreClock: 1.59GHz coreCount: 24 deviceMemorySize: 5.80GiB deviceMemoryBandwidth: 268.26GiB/s
2024-01-07 08:21:37.672264: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2024-01-07 08:21:37.673926: I tensorflow/stream_executor/platform/

## 2. Data Loading and Preprocessing

In [2]:
from tensorflow.keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(-1, 28 * 28).astype("float32") / 255.0
x_test = x_test.reshape(-1, 28 * 28).astype("float32") / 255.0

## 3. Model Definition

Most of the time it is fine to use the layers provided by Keras but sometimes you need to implement your own layer, and to understandd what exactly is happening under the hood. This can be done by subclassing the `Layer` class and implementing some methods.

In [9]:
"""
Model WITHOUT custom layers, uses the built-in Keras Dense and ReLU layers.
"""
class MyModel(keras.Model):
    def __init__(self, num_classes=10):
        super(MyModel, self).__init__()

        # We want to create our own Dense layer and not
        # use the one provided by Keras directly.
        self.dense1 = layers.Dense(64)
        self.dense2 = layers.Dense(num_classes)

        # self.dense1 = layers.Dense(64)
        # self.dense3 = layers.Dense(num_classes)

    def call(self, input_tensor):
        # We want to create our own ReLU layer and not
        # use the one provided by Keras directly.
        x = tf.nn.relu(self.dense1(input_tensor))
        return self.dense2(x)


model = MyModel()

- Special methods:
    - `__init__()`: the constructor of the class, it takes the following arguments:
        - `name`: the name of the layer, **important to save and load models.**
        - `trainable`: whether the layer should be trained or not
        - `dtype`: the data type of the layer
        - `dynamic`: whether the layer is dynamic or not
        - `**kwargs`: other arguments
    - `build()`: will build the layer, it takes the following arguments:
        - `input_shape`: the shape of the input to the layer
    - `call()`: will call the layer, it takes the following arguments:
        - `inputs`: the input to the layer
        - `training`: whether the layer is in training mode or not
    - `compute_output_shape()`: will compute the output shape of the layer, it takes the following arguments:
        - `input_shape`: the shape of the input to the layer
    - `get_config()`: will get the configuration of the layer, it takes no arguments
    - `from_config()`: will create a layer from the configuration of the layer, it takes the following arguments:
        - `config`: the configuration of the layer
    - `get_weights()`: will get the weights of the layer, it takes no arguments
    - `set_weights()`: will set the weights of the layer, it takes the following arguments:
        - `weights`: the weights of the layer
    - `get_updates()`: will get the updates of the layer, it takes the following arguments:
        - `inputs`: the input to the layer
    - `add_update()`: will add an update to the layer, it takes the following arguments:
        - `updates`: the update to be added to the layer
        - `inputs`: the input to the layer
    - `add_loss()`: will add a loss to the layer, it takes the following arguments:
        - `losses`: the loss to be added to the layer
        - `inputs`: the input to the layer
    - `losses`: the losses of the layer
    - `built`: whether the layer is built or not
    - `supports_masking`: whether the layer supports masking or not
    - `compute_mask()`: will compute the mask of the layer, it takes the following arguments:
        - `inputs


- `self.add_weight()`: will add a weight variable to the layer. It is a method of the `Layer` class. It takes the following arguments:
    - `name`: the name of the weight variable, **important to save and load models.**
    - `shape`: the shape of the weight variable
    - `dtype`: the data type of the weight variable
    - `initializer`: the initializer to be used to initialize the weight variable, e.g. random_normal, random_uniform, etc.
    - `regularizer`: the regularizer to be used to regularize the weight variable
    - `trainable`: whether the weight variable should be trained or not
    - `constraint`: the constraint to be used to constrain the weight variable

- There are other ways to add weight as well, you can use tf.variable but this is the simplest way to do it.

In [10]:
"""
Model WITH custom layers, uses the custom ReLU and Dense layers.
"""
class Dense(layers.Layer):
    def __init__(self, units, input_dim):
        super(Dense, self).__init__()
        self.w = self.add_weight(
            name="w",
            shape=(input_dim, units),
            initializer="random_normal",
            trainable=True,
        )

        # b is the bias term (offset).
        self.b = self.add_weight(
            name="b", shape=(units,), initializer="zeros", trainable=True
        )

    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b


# Dense with build method to avoid specifying input_dim
class Dense2(layers.Layer):
    def __init__(self, units):
        super(Dense, self).__init__()
        self.units = units

    def build(self, input_shape):
        self.w = self.add_weight(
            name="w",
            shape=(input_shape[-1], self.units),
            initializer="random_normal",
            trainable=True,
        )
        self.b = self.add_weight(
            name="b", shape=(self.units,), initializer="random_normal", trainable=True,
        )

    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b


class MyReLU(layers.Layer):
    def __init__(self):
        super(MyReLU, self).__init__()

    def call(self, x):
        return tf.math.maximum(x, 0)


class MyModel(keras.Model):
    def __init__(self, num_classes=10):
        super(MyModel, self).__init__()

        # Instiantiate classes here

        # self.dense1 = Dense(64, input_dim=784)
        # self.dense2 = Dense(num_classes, input_dim=64)
        
        # As you can see, in layers.Dense you did not have to
        # even specify input_dim. This is because this layer
        # is tightly integrated with the rest of the framework.
        # How do we achieve the same thing here? for our custom
        # Dense layer.
        # self.dense1 = layers.Dense(64)
        # self.dense2 = layers.Dense(num_classes)

        self.dense1 = Dense2(64)
        self.dense2 = Dense2(num_classes)
        self.relu = MyReLU()

    def call(self, input_tensor):
        # We want to create our own ReLU layer and not
        # use the one provided by Keras directly.

        # x = tf.nn.relu(self.dense1(input_tensor))

        x = self.relu(self.dense1(input_tensor))
        return self.dense2(x)

## 4. Compile Model

In [11]:
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(),
    metrics=["accuracy"],
)

## 5. Model Training and Evaluation

In [12]:
model.fit(x_train, y_train, batch_size=32, epochs=2, verbose=2)
model.evaluate(x_test, y_test, batch_size=32, verbose=2)

Epoch 1/2
1875/1875 - 1s - loss: 0.3007 - accuracy: 0.9160
Epoch 2/2
1875/1875 - 1s - loss: 0.1474 - accuracy: 0.9571
313/313 - 0s - loss: 0.1223 - accuracy: 0.9640


[0.12228093296289444, 0.9639999866485596]