In [1]:
import tensorflow as tf
import numpy as np

## Understanding Model Weight Shapes

model = Sequential()

model.add(Dense(32, input_shape=(16,)))
* now the model will take as input arrays of shape (*, 16) and output arrays of shape (*, 32)

model.add(Dense(32))

* after the first layer, you don't need to specify the size of the input anymore:


In [2]:
net = tf.keras.models.Sequential([
    tf.keras.layers.Dense(4, activation='relu'),  # weights at this layer : 5*4 (number of features, number of layers)
    tf.keras.layers.Dense(3)  # 3 output for each samples  # weights at this layer : 4*3
])
X = tf.random.uniform((2, 5))  ## This has two samples with 5 features
net(X)    # Output : num_samples * number of output -> 2*3

<tf.Tensor: id=60, shape=(2, 3), dtype=float32, numpy=
array([[ 0.01671392,  0.90553355,  0.18247415],
       [-0.02877694,  0.00736426, -0.00826675]], dtype=float32)>

In [3]:
[w.shape for w in net.get_weights()]

[(5, 4), (4,), (4, 3), (3,)]

In [4]:
net.get_weights() 

[array([[-0.70231956,  0.33076   ,  0.37125206,  0.46176136],
        [-0.70593864, -0.4999299 , -0.63074696, -0.56508505],
        [-0.08774465,  0.23178315,  0.14967823, -0.16643196],
        [ 0.67740226,  0.21079755,  0.81191397,  0.49080694],
        [ 0.4767325 ,  0.37426507, -0.7815539 ,  0.3904084 ]],
       dtype=float32),
 array([0., 0., 0., 0.], dtype=float32),
 array([[ 0.43081045,  0.83204496, -0.25872368],
        [-0.8179759 ,  0.20932698, -0.23497993],
        [ 0.8340477 , -0.45239016,  0.6053418 ],
        [-0.04227406,  0.86390483,  0.8457637 ]], dtype=float32),
 array([0., 0., 0.], dtype=float32)]

In [5]:
net.layers[0].weights

[<tf.Variable 'sequential/dense/kernel:0' shape=(5, 4) dtype=float32, numpy=
 array([[-0.70231956,  0.33076   ,  0.37125206,  0.46176136],
        [-0.70593864, -0.4999299 , -0.63074696, -0.56508505],
        [-0.08774465,  0.23178315,  0.14967823, -0.16643196],
        [ 0.67740226,  0.21079755,  0.81191397,  0.49080694],
        [ 0.4767325 ,  0.37426507, -0.7815539 ,  0.3904084 ]],
       dtype=float32)>,
 <tf.Variable 'sequential/dense/bias:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>]

In [6]:
print(net.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                multiple                  24        
_________________________________________________________________
dense_1 (Dense)              multiple                  15        
Total params: 39
Trainable params: 39
Non-trainable params: 0
_________________________________________________________________
None


## Subclassing 

In [7]:
class Net(tf.keras.Model):
    def __init__(self):
        super().__init__() 
        self.hidden = tf.keras.layers.Dense(units = 256, activation=tf.keras.activations.relu)
        self.out = tf.keras.layers.Dense(units = 10)

    def call(self, X):
        return self.out(self.hidden(X))

net = Net()
net(X)

<tf.Tensor: id=123, shape=(2, 10), dtype=float32, numpy=
array([[ 0.19341265,  0.16742884, -0.07801189,  0.00709925,  0.19250943,
        -0.05013669,  0.18435931,  0.070962  , -0.09334707, -0.08752254],
       [ 0.25540304,  0.01825365, -0.05991789,  0.06557041,  0.14278069,
        -0.07365306,  0.08589697,  0.02383904, -0.06201077,  0.06696422]],
      dtype=float32)>

## Nested Blocks

In [8]:
def block1(name):
    return tf.keras.models.Sequential([
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(4, activation='relu')
    ], name = name)

def block2(num):
    net = tf.keras.Sequential()
    for i in range(num):
        net.add(block1(name = f'block_{i}'))
    return net

rgnet = tf.keras.Sequential()
rgnet.add(block2(2))
rgnet(X)

<tf.Tensor: id=181, shape=(2, 4), dtype=float32, numpy=
array([[0.        , 0.10468936, 0.41078112, 0.32752213],
       [0.        , 0.20354982, 0.37920904, 0.3137256 ]], dtype=float32)>

In [9]:
print(rgnet.summary())

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential_2 (Sequential)    multiple                  44        
Total params: 44
Trainable params: 44
Non-trainable params: 0
_________________________________________________________________
None


## Weight, Bias Initialization

* Weight
    * Uniform
    * Gaussian
    * Xavier/Glorot
    * Constant
    * Custom
* Bias
    * Usually set to zero

In [10]:
# Normal Initialization
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(
        4, activation=tf.nn.relu,
        kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.01),
        bias_initializer=tf.zeros_initializer()),
    tf.keras.layers.Dense(1)])

net(X)
net.weights[0].numpy(), net.weights[1].numpy()

(array([[-0.01548367,  0.00943118, -0.00170447, -0.01062113],
        [ 0.00248973,  0.00634976,  0.01298806,  0.00266542],
        [ 0.01151692,  0.00271542,  0.00158345, -0.00879586],
        [-0.00417596,  0.01616655,  0.00698785,  0.00300117],
        [ 0.01090067,  0.00229237,  0.02077415, -0.00362489]],
       dtype=float32),
 array([0., 0., 0., 0.], dtype=float32))

In [11]:
# Constant Init
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(
        4, activation=tf.nn.relu,
        kernel_initializer=tf.keras.initializers.Constant(1),
        bias_initializer=tf.zeros_initializer()),
    tf.keras.layers.Dense(1),
])

net(X)
net.weights[0].numpy(), net.weights[1].numpy()

(array([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]], dtype=float32),
 array([0., 0., 0., 0.], dtype=float32))

In [12]:
# Xavier Init
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(
        4, activation=tf.nn.relu,
        kernel_initializer=tf.keras.initializers.GlorotNormal(),
        bias_initializer=tf.zeros_initializer()),
    tf.keras.layers.Dense(1),
])

net(X)
net.weights[0].numpy(), net.weights[1].numpy()

(array([[-0.24993627,  0.6882137 , -0.04835684,  0.14106779],
        [-0.6707692 ,  0.07403941,  0.6376924 , -0.5068528 ],
        [-0.7230006 , -0.17344736,  0.06102401,  0.430907  ],
        [-0.0978165 ,  0.95055515, -0.5735844 , -0.99266607],
        [-0.31049836,  0.17081314, -0.20078082,  0.91287506]],
       dtype=float32),
 array([0., 0., 0., 0.], dtype=float32))

## Custom Layers

In [13]:
## Center/Mean Layer
class CenteredLayer(tf.keras.Model):
    def __init__(self):
        super().__init__()
    
    def call(self, inputs):
        return inputs - tf.reduce_mean(inputs)

In [14]:
X = tf.constant([1, 2, 3, 4, 5])
print(f' Mean : {X.numpy().mean()}')
X.numpy()

Mean : 3.0


array([1, 2, 3, 4, 5])

In [15]:
cen_layer = CenteredLayer()
print(f'Centered Mean : {cen_layer(X).numpy().mean()}')
cen_layer(X).numpy()

Centered Mean : 0.0


array([-2, -1,  0,  1,  2])