# Model subclassing and custom training loops

> In the section, we're going to look at ways to build fully customizable deep learning models and layers, as well as custom training loops. This is the summary of lecture "Customizing your model with Tensorflow 2" from Coursera.

- toc: true 
- badges: true
- comments: true
- author: Chanseok Kang
- categories: [Python, Coursera, Deep_Learning, Tensorflow]
- image: 

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

plt.rcParams['figure.figsize'] = (16, 10)
print('Tensorflow: v' + tf.__version__)

Tensorflow: v2.3.1


## Model subclassing

### Example

In [3]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense

class MyModel(Model):
    def __init__(self, **kwargs):
        super(MyModel, self).__init__(**kwargs)
        self.dense = Dense(16)
        
    def call(self, inputs):
        return self.dense(inputs)
    
my_model = MyModel(name='my_model')

In [4]:
class MyModel(Model):
    def __init__(self, num_classes, **kwargs):
        super(MyModel, self).__init__(**kwargs)
        self.dense1 = Dense(16, activation='sigmoid')
        self.dense2 = Dense(num_classes, activation='softmax')
        
    def call(self, inputs):
        h = self.dense1(inputs)
        return self.dense2(h)
    
my_model = MyModel(10, name='my_model')

In [5]:
from tensorflow.keras.layers import Dropout

class MyModel(Model):
    def __init__(self, num_classes, **kwargs):
        super(MyModel, self).__init__(**kwargs)
        self.dense1 = Dense(16, activation='sigmoid')
        self.dropout = Dropout(0.5)
        self.dense2 = Dense(num_classes, activation='softmax')
        
    def call(self, inputs, training=False):
        h = self.dense1(inputs)
        h = self.dropout(h, training=training)
        return self.dense2(h)
    
my_model = MyModel(12, name='my_model')

### Coding Tutorial

#### Create a simple model using the model subclassing API

In [10]:
from tensorflow.keras.layers import Softmax, concatenate

# Build the model
class MyModel(Model):
    def __init__(self):
        super(MyModel, self).__init__()
        self.dense_1 = Dense(64, activation='relu')
        self.dense_2 = Dense(10)
        self.dense_3 = Dense(5)
        self.softmax = Softmax()
        
    def call(self, inputs):
        x = self.dense_1(inputs)
        y1 = self.dense_2(inputs)
        y2 = self.dense_3(y1)
        concat = concatenate([x, y2])
        return self.softmax(concat)

In [11]:
# Print the model summary
model = MyModel()
model(tf.random.uniform([1, 10]))
model.summary()

Model: "my_model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_10 (Dense)             multiple                  704       
_________________________________________________________________
dense_11 (Dense)             multiple                  110       
_________________________________________________________________
dense_12 (Dense)             multiple                  55        
_________________________________________________________________
softmax (Softmax)            multiple                  0         
Total params: 869
Trainable params: 869
Non-trainable params: 0
_________________________________________________________________


## Custom layers

### Example

In [12]:
from tensorflow.keras.layers import Layer

class LinearMap(Layer):
    def __init__(self, input_dim, units):
        super(LinearMap, self).__init__()
        w_init = tf.random_normal_initializer()
        self.w = tf.Variable(initial_value=w_init(shape=(input_dim, units)))
        
    def call(self, inputs):
        return tf.matmul(inputs, self.w)

In [13]:
linear_layer = LinearMap(3, 2)
inputs = tf.ones((1, 3))
linear_layer(inputs)

<tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[-0.04122614,  0.01486031]], dtype=float32)>

In [14]:
linear_layer.weights

[<tf.Variable 'Variable:0' shape=(3, 2) dtype=float32, numpy=
 array([[-0.0536893 , -0.00283721],
        [ 0.05375961, -0.04046727],
        [-0.04129645,  0.05816479]], dtype=float32)>]

Or

In [15]:
class LinearMap(Layer):
    def __init__(self, input_dim, units):
        super(LinearMap, self).__init__()
        self.w = self.add_weight(shape=(input_dim, units), initializer='random_normal')
        
    def call(self, inputs):
        return tf.matmul(inputs, self.w)

In [16]:
class MyModel(Model):
    def __init__(self, hidden_units, outputs, **kwargs):
        super(MyModel, self).__init__(**kwargs)
        self.dense = Dense(hidden_units, activation='sigmoid')
        self.linear = LinearMap(hidden_units, outputs)
        
    def call(self, inputs):
        h = self.dense(inputs)
        return self.linear(h)
    
my_model = MyModel(64, 12, name='my_custom_model')

### Coding Tutorial

#### Create custom layers

In [17]:
class MyLayer(Layer):
    def __init__(self, units, input_dim):
        super(MyLayer, self).__init__()
        self.w = self.add_weight(shape=(input_dim, units),
                                 initializer='random_normal')
        self.b = self.add_weight(shape=(units,),
                                 initializer='zeros')
        
    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b
    
dense_layer = MyLayer(3, 5)
x = tf.ones((1, 5))

In [18]:
dense_layer(x)

<tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[ 0.05008806,  0.11125094, -0.01788366]], dtype=float32)>

In [19]:
dense_layer.weights

[<tf.Variable 'Variable:0' shape=(5, 3) dtype=float32, numpy=
 array([[ 0.04202368,  0.02934473, -0.05145034],
        [-0.02823197, -0.003109  ,  0.04414172],
        [ 0.02957419,  0.00572001, -0.01155856],
        [ 0.02049355,  0.06330845,  0.02127818],
        [-0.01377138,  0.01598675, -0.02029467]], dtype=float32)>,
 <tf.Variable 'Variable:0' shape=(3,) dtype=float32, numpy=array([0., 0., 0.], dtype=float32)>]

In [31]:
# Specify trainable weights
class MyLayer(Layer):
    def __init__(self, units, input_dim, trainable=True):
        super(MyLayer, self).__init__()
        self.w = self.add_weight(shape=(input_dim, units),
                                 initializer='random_normal',
                                 trainable=trainable)
        self.b = self.add_weight(shape=(units,),
                                 initializer='zeros',
                                 trainable=trainable)
        
    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b
    
dense_layer = MyLayer(3, 5, False)

In [32]:
print('trainable weights: ', len(dense_layer.trainable_weights))
print('non-trainable weights: ', len(dense_layer.non_trainable_weights))

trainable weights:  0
non-trainable weights:  2


In [33]:
# Create a custom layer to accumulate means of output values
class MyLayerMean(Layer):
    def __init__(self, units, input_dim):
        super(MyLayerMean, self).__init__()
        self.w = self.add_weight(shape=(input_dim, units),
                                 initializer='random_normal')
        self.b = self.add_weight(shape=(units, ),
                                 initializer='zeros')
        self.sum_activation = tf.Variable(initial_value=tf.zeros((units, )),
                                          trainable=False)
        self.number_call = tf.Variable(initial_value=0, trainable=False)
        
    def call(self, inputs):
        activations = tf.matmul(inputs, self.w) + self.b
        self.sum_activation.assign_add(tf.reduce_sum(activations, axis=0))
        self.number_call.assign_add(inputs.shape[0])
        return activations, self.sum_activation / tf.cast(self.number_call, tf.float32)
    
dense_layer = MyLayerMean(3, 5)

In [34]:
y, activation_means = dense_layer(tf.ones((1, 5)))
print(activation_means)

y, activation_means = dense_layer(tf.ones((1, 5)))
print(activation_means)

tf.Tensor([-0.05055349 -0.05187923  0.06057667], shape=(3,), dtype=float32)
tf.Tensor([-0.05055349 -0.05187923  0.06057667], shape=(3,), dtype=float32)


In [35]:
# Create a Dropout layer as a custom layer
class MyDropout(Layer):
    def __init__(self, rate):
        super(MyDropout, self).__init__()
        self.rate = rate
        
    def call(self, inputs):
        return tf.nn.dropout(inputs, rate=self.rate)

#### Implement the custom layers into a model

In [38]:
class MyModel(Model):
    def __init__(self, units_1, input_dim_1, units_2, units_3):
        super(MyModel, self).__init__()
        self.layer_1 = MyLayer(units_1, input_dim_1)
        self.dropout_1 = MyDropout(0.5)
        self.layer_2 = MyLayer(units_2, units_1)
        self.dropout_2 = MyDropout(0.5)
        self.layer_3 = MyLayer(units_3, units_2)
        self.softmax = Softmax()
        
    def call(self, inputs):
        x = self.layer_1(inputs)
        x = tf.nn.relu(x)
        x = self.dropout_1(x)
        x = self.layer_2(x)
        x = tf.nn.relu(x)
        x = self.dropout_2(x)
        x = self.layer_3(x)
        return self.softmax(x)

In [39]:
model = MyModel(64, 10000, 64, 46)
model(tf.ones((1, 10000)))

<tf.Tensor: shape=(1, 46), dtype=float32, numpy=
array([[0.02990018, 0.01800405, 0.01508308, 0.01572504, 0.02493499,
        0.008266  , 0.02224776, 0.00740983, 0.02153068, 0.01984571,
        0.03500341, 0.00302157, 0.02685716, 0.03435294, 0.03273074,
        0.00449319, 0.01599347, 0.00997612, 0.02103749, 0.01593344,
        0.01701007, 0.01201108, 0.00700865, 0.00856838, 0.01076013,
        0.01807716, 0.01108341, 0.0129216 , 0.00602711, 0.01187329,
        0.04498018, 0.00603162, 0.07162493, 0.00859119, 0.01896317,
        0.01398949, 0.02509321, 0.0248103 , 0.03002817, 0.02347226,
        0.04321312, 0.05968134, 0.06139896, 0.0202462 , 0.01175775,
        0.03843035]], dtype=float32)>

In [40]:
model.summary()

Model: "my_model_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
my_layer_10 (MyLayer)        multiple                  640064    
_________________________________________________________________
my_dropout_4 (MyDropout)     multiple                  0         
_________________________________________________________________
my_layer_11 (MyLayer)        multiple                  4160      
_________________________________________________________________
my_dropout_5 (MyDropout)     multiple                  0         
_________________________________________________________________
my_layer_12 (MyLayer)        multiple                  2990      
_________________________________________________________________
softmax_3 (Softmax)          multiple                  0         
Total params: 647,214
Trainable params: 647,214
Non-trainable params: 0
__________________________________________________