In [1]:
import tensorflow as tf
import numpy as np

## Understanding Model Weight Shapes

model = Sequential()

model.add(Dense(32, input_shape=(16,)))
* now the model will take as input arrays of shape (*, 16) and output arrays of shape (*, 32)

model.add(Dense(32))

* after the first layer, you don't need to specify the size of the input anymore:


In [101]:
net = tf.keras.models.Sequential([
    tf.keras.layers.Dense(4, activation='relu'),  # weights at this layer : 5*4 (number of features, number of layers)
    tf.keras.layers.Dense(3)  # 3 output for each samples  # weights at this layer : 4*3
])
X = tf.random.uniform((2, 5))  ## This has two samples with 5 features
net(X)    # Output : num_samples * number of output -> 2*3

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[ 0.45988375,  0.44945583, -0.5030286 ],
       [ 0.24884453, -0.01099233,  0.3660769 ]], dtype=float32)>

In [102]:
[w.shape for w in net.get_weights()]

[(5, 4), (4,), (4, 3), (3,)]

In [91]:
net.get_weights() 

[array([[-0.7477323 , -0.5512397 ,  0.28862655, -0.03863358],
        [ 0.45619917,  0.6058912 ,  0.55102146, -0.08232236],
        [ 0.05374765,  0.5406301 , -0.35176423,  0.17109698],
        [ 0.31326377,  0.08617252,  0.05538166, -0.16758734],
        [-0.23829949, -0.22896457, -0.20922929,  0.3343681 ]],
       dtype=float32),
 array([0., 0., 0., 0.], dtype=float32),
 array([[-0.8349323 , -0.36833513, -0.8404136 ],
        [ 0.8526596 , -0.15256757,  0.08835053],
        [-0.64301026,  0.47354722,  0.4408257 ],
        [ 0.85625815,  0.25792944,  0.3647381 ]], dtype=float32),
 array([0., 0., 0.], dtype=float32)]

In [92]:
net.layers[0].weights

[<tf.Variable 'sequential_42/dense_65/kernel:0' shape=(5, 4) dtype=float32, numpy=
 array([[-0.7477323 , -0.5512397 ,  0.28862655, -0.03863358],
        [ 0.45619917,  0.6058912 ,  0.55102146, -0.08232236],
        [ 0.05374765,  0.5406301 , -0.35176423,  0.17109698],
        [ 0.31326377,  0.08617252,  0.05538166, -0.16758734],
        [-0.23829949, -0.22896457, -0.20922929,  0.3343681 ]],
       dtype=float32)>,
 <tf.Variable 'sequential_42/dense_65/bias:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>]

In [93]:
print(net.summary())

Model: "sequential_42"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_65 (Dense)             multiple                  24        
_________________________________________________________________
dense_66 (Dense)             multiple                  15        
Total params: 39
Trainable params: 39
Non-trainable params: 0
_________________________________________________________________
None


## Subclassing 

In [84]:
class Net(tf.keras.Model):
    def __init__(self):
        super().__init__() 
        self.hidden = tf.keras.layers.Dense(units = 256, activation=tf.keras.activations.relu)
        self.out = tf.keras.layers.Dense(units = 10)

    def call(self, X):
        return self.out(self.hidden(X))

net = Net()
net(X)

<tf.Tensor: shape=(2, 10), dtype=float32, numpy=
array([[-0.17519699,  0.07823773, -0.09413238, -0.06599735,  0.02433382,
        -0.05878056, -0.06345084, -0.0343364 , -0.01901158, -0.16578159],
       [-0.12553011, -0.03113088, -0.20948812, -0.0360925 ,  0.01026879,
        -0.04221698, -0.07907405, -0.08402479,  0.08492374, -0.18152303]],
      dtype=float32)>

## Nested Blocks

In [87]:
def block1(name):
    return tf.keras.models.Sequential([
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(4, activation='relu')
    ], name = name)

def block2(num):
    net = tf.keras.Sequential()
    for i in range(num):
        net.add(block1(name = f'block_{i}'))
    return net

rgnet = tf.keras.Sequential()
rgnet.add(block2(2))
rgnet(X)

<tf.Tensor: shape=(2, 4), dtype=float32, numpy=
array([[0.        , 0.        , 0.35440812, 0.        ],
       [0.        , 0.        , 0.4273185 , 0.        ]], dtype=float32)>

In [88]:
print(rgnet.summary())

Model: "sequential_40"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential_41 (Sequential)   multiple                  44        
Total params: 44
Trainable params: 44
Non-trainable params: 0
_________________________________________________________________
None


## Weight, Bias Initialization

* Weight
    * Uniform
    * Gaussian
    * Xavier/Glorot
    * Constant
    * Custom
* Bias
    * Usually set to zero

In [98]:
# Normal Initialization
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(
        4, activation=tf.nn.relu,
        kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.01),
        bias_initializer=tf.zeros_initializer()),
    tf.keras.layers.Dense(1)])

net(X)
net.weights[0].numpy(), net.weights[1].numpy()

(array([[-0.00984479,  0.0095035 , -0.00801117,  0.00997174],
        [-0.01140316,  0.00892066, -0.00894017, -0.00260398],
        [ 0.00284238, -0.00238924, -0.01004896, -0.00639841],
        [-0.0104738 ,  0.01300001,  0.00124462,  0.011874  ],
        [-0.00071052,  0.00271565,  0.00456786,  0.00090117]],
       dtype=float32),
 array([0., 0., 0., 0.], dtype=float32))

In [99]:
# Constant Init
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(
        4, activation=tf.nn.relu,
        kernel_initializer=tf.keras.initializers.Constant(1),
        bias_initializer=tf.zeros_initializer()),
    tf.keras.layers.Dense(1),
])

net(X)
net.weights[0].numpy(), net.weights[1].numpy()

(array([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]], dtype=float32),
 array([0., 0., 0., 0.], dtype=float32))

In [100]:
# Xavier Init
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(
        4, activation=tf.nn.relu,
        kernel_initializer=tf.keras.initializers.GlorotNormal(),
        bias_initializer=tf.zeros_initializer()),
    tf.keras.layers.Dense(1),
])

net(X)
net.weights[0].numpy(), net.weights[1].numpy()

(array([[ 0.2800113 , -1.0012556 ,  0.9648218 , -0.08584899],
        [ 0.16471107, -0.21502799,  0.83380544,  0.04262323],
        [-0.79368573, -0.13869938, -0.07479477, -0.06960519],
        [-0.27690142, -0.36010242,  0.03960536,  0.08371516],
        [ 0.35505927, -0.1824444 , -0.37923357, -0.3211536 ]],
       dtype=float32),
 array([0., 0., 0., 0.], dtype=float32))

## Custom Layers

In [115]:
## Center/Mean Layer
class CenteredLayer(tf.keras.Model):
    def __init__(self):
        super().__init__()
    
    def call(self, inputs):
        return inputs - tf.reduce_mean(inputs)

In [127]:
X = tf.constant([1, 2, 3, 4, 5])
print(f' Mean : {X.numpy().mean()}')
X.numpy()

Mean : 3.0


array([1, 2, 3, 4, 5])

In [128]:
cen_layer = CenteredLayer()
print(f'Centered Mean : {cen_layer(X).numpy().mean()}')
cen_layer(X).numpy()

Centered Mean : 0.0


array([-2, -1,  0,  1,  2])