 # Gradient monitoring

In [23]:
import tensorflow as tf
import numpy as np
import tensorflow.keras as keras
import tensorflow.keras.layers as layers

from datetime import datetime
from pathlib import Path

In [24]:
n = 10_000
x = np.random.rand(n, 2)
x1, x2 = x[:, 0], x[:, 1]
y = 3*x1 + 2*x2

In [25]:
def get_mlp(layers_n, initializer, act='relu', last_act=None):
    l_first, *ls, l_last = layers_n
    
    model = keras.Sequential(name='MLP')
    
    model.add(layers.Input(shape=(l_first,)))
    for l in ls:
        model.add(layers.Dense(l, activation=act, kernel_initializer=initializer))
    
    model.add(layers.Dense(l_last, activation=last_act, kernel_initializer=initializer))

    return model

In [88]:
class MyMonitoringCallback(keras.callbacks.Callback):
    def __init__(self, folder):
        self.folder = str(Path(folder) / 'debug')
        self.step = 1

    def on_train_batch_end(self, batch, logs=None):
        for layer in self.model.layers:
            with tf.summary.create_file_writer(self.folder).as_default():
                tf.summary.histogram(name='layer.name', data=[1, 2, 3], step=self.step)
            self.step += 1

In [89]:
hidden_layers_size = 100
hidden_layers_num = 10

initializer = tf.keras.initializers.RandomNormal(mean=0., stddev=1.)
layers_n=[2] + [hidden_layers_size]*hidden_layers_num  + [1]

mlp = get_mlp(layers_n, initializer)

In [90]:
lr = 1e-9
mlp.compile(optimizer=keras.optimizers.SGD(learning_rate=lr), loss='mse', run_eagerly=True)

In [91]:
batch_size = 32
inner_folder = datetime.now().strftime('%Y_%m_%d__%H_%M_%S')
tensorboard_log_folder = f'./tensorboard_logs/{inner_folder}'
callbacks = [
    keras.callbacks.TensorBoard(log_dir=tensorboard_log_folder),
    MyMonitoringCallback(tensorboard_log_folder)
]
epochs = 1

history = mlp.fit(x, y, batch_size=batch_size, epochs=epochs, callbacks=callbacks, validation_split=0.1)

  5/282 [..............................] - ETA: 6:13 - loss: nan

KeyboardInterrupt: 