 # Gradient monitoring

In [73]:
import tensorflow as tf
import numpy as np
import tensorflow.keras as keras
import tensorflow.keras.layers as layers

from datetime import datetime
from pathlib import Path

In [59]:
n = 10_000
x = np.random.rand(n, 2)
x1, x2 = x[:, 0], x[:, 1]
y = 3*x1 + 2*x2

In [67]:
def get_mlp(layers_n, initializer, act='relu', last_act=None):
    l_first, *ls, l_last = layers_n
    
    model = keras.Sequential(name='MLP')
    
    model.add(layers.Input(shape=(l_first,)))
    for i, l in enumerate(ls):
        model.add(layers.Dense(l, activation=act, kernel_initializer=initializer, name=f'Dense_{i}'))
    
    model.add(layers.Dense(l_last, activation=last_act, kernel_initializer=initializer, name=f'Output'))

    return model

In [68]:
def get_features_extractor(model):
    inputs = model.input
    outputs = {layer.name: layer.output for layer in model.layers}
    return keras.Model(inputs, outputs)

In [69]:
hidden_layers_size = 100
hidden_layers_num = 10

initializer = tf.keras.initializers.RandomNormal(mean=0., stddev=1.)
layers_n=[2] + [hidden_layers_size]*hidden_layers_num  + [1]

mlp = get_mlp(layers_n, initializer)

In [55]:
lr = 1e-9
mlp.compile(optimizer=keras.optimizers.SGD(learning_rate=lr), loss='mse', run_eagerly=True)

In [56]:
batch_size = 32
inner_folder = datetime.now().strftime('%Y_%m_%d__%H_%M_%S')
tensorboard_log_folder = f'./tensorboard_logs/{inner_folder}'
callbacks = [
    keras.callbacks.TensorBoard(log_dir=tensorboard_log_folder),
    MyMonitoringCallback(tensorboard_log_folder)
]
epochs = 1

history = mlp.fit(x, y, batch_size=batch_size, epochs=epochs, callbacks=callbacks, validation_split=0.1)

NameError: name 'MyMonitoringCallback' is not defined

In [91]:
batch_size = 32
inner_folder = datetime.now().strftime('%Y_%m_%d__%H_%M_%S')
tensorboard_log_folder = f'./tensorboard_logs/{inner_folder}'
num_batches = 10

initializer = tf.keras.initializers.RandomNormal(mean=0., stddev=1.)
mlp = get_mlp(layers_n, initializer)
feature_extractor = get_features_extractor(mlp)
lr = 1e-3

optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

for num_batch, (xbatch, ybatch) in tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size).take(num_batches).enumerate():
    with tf.GradientTape() as tape:
        outputs = feature_extractor(xbatch)
        y_pred = tf.reshape(outputs['Output'], (-1,))
        loss = tf.keras.losses.MSE(ybatch, y_pred)
    grads = tape.gradient(loss, mlp.trainable_variables)
    optimizer.apply_gradients(zip(grads, mlp.trainable_variables))
    with tf.summary.create_file_writer(tensorboard_log_folder).as_default():
        for grad, var in zip(grads, mlp.trainable_variables):
            tf.summary.histogram(name=f'grads/{var.name}', data=grad, step=num_batch)
        for layer_name, output in outputs.items():
            tf.summary.histogram(name=f'output/{layer_name}', data=output, step=num_batch)

In [82]:
layer = mlp.layers[0]

In [87]:
layer.bias.name

'Dense_0/bias:0'