# TensorBoard Scalars: Logging Metrics in Keras

This example is a slightly motified tutorial from Tensorflow for using TensorBoard Scalars on Carnegie Clusters sourced from: https://www.tensorflow.org/tensorboard/scalars_and_keras

In this example, Tensorflow uses TensorBoard's Time Series Dashboard in order to visualize key metrics for ML Training using an API (visualize default and custom scalars). This tutorial presents very basic examples to help you learn how to use these APIs with TensorBoard when developing your Keras model.

In [17]:
import tensorflow as tf
import datetime, os
# TensorBoard is provided through an Extension
%load_ext tensorboard

from datetime import datetime
from packaging import version

from tensorflow import keras
from keras import backend as K

import numpy as np
tf.__version__

'2.18.0'

In [8]:
# Define a log directory in your scratch or home folder, change this to whatever you want
os.chdir('/carnegie/nobackup/scratch/tkaminski/container_images/tensorboard_test/logs')
print(os.getcwd())

/carnegie/nobackup/scratch/tkaminski/container_images/tensorboard_test/logs


In [9]:
data_size = 1000
# 80% of the data is for training.
train_pct = 0.8

train_size = int(data_size * train_pct)

# Create some input data between -1 and 1 and randomize it.
x = np.linspace(-1, 1, data_size)
np.random.shuffle(x)

# Generate the output data.
# y = 0.5x + 2 + noise
y = 0.5 * x + 2 + np.random.normal(0, 0.05, (data_size, ))

# Split into test and train pairs.
x_train, y_train = x[:train_size], y[:train_size]
x_test, y_test = x[train_size:], y[train_size:]

In [11]:
# Write the scalar values to TensorBoard logs
logdir = "scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)

model = keras.models.Sequential([
    keras.layers.Dense(16, input_dim=1),
    keras.layers.Dense(1),
])

model.compile(
    loss='mse', # keras.losses.mean_squared_error
    optimizer=keras.optimizers.SGD(learning_rate=0.2),
)

print("Training ... With default parameters, this takes less than 10 seconds.")
training_history = model.fit(
    x_train, # input
    y_train, # output
    batch_size=train_size,
    verbose=0, # Suppress chatty output; use Tensorboard instead
    epochs=100,
    validation_data=(x_test, y_test),
    callbacks=[tensorboard_callback],
)

print("Average test loss: ", np.average(training_history.history['loss']))

Training ... With default parameters, this takes less than 10 seconds.
Average test loss:  0.05308009155560285


In [None]:
# Launch TensorBoard Dashboard with the log from the created log directory
tensorboard --logdir=/carnegie/nobackup/scratch/tkaminski/container_images/tensorboard_test/logs/scalars --port=8888 --bind_all --path_prefix=/node/memex-2015-017.bsehpc.carnegiescience.edu/8888

In [None]:
# Write the scalar values to TensorBoard logs
logdir = "scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
file_writer = tf.summary.create_file_writer(logdir + "/metrics")
file_writer.set_as_default()

def lr_schedule(epoch):
  """
  Returns a custom learning rate that decreases as epochs progress.
  """
  learning_rate = 0.2
  if epoch > 10:
    learning_rate = 0.02
  if epoch > 20:
    learning_rate = 0.01
  if epoch > 50:
    learning_rate = 0.005

  tf.summary.scalar('learning rate', data=learning_rate, step=epoch)
  return learning_rate

lr_callback = keras.callbacks.LearningRateScheduler(lr_schedule)
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)

model = keras.models.Sequential([
    keras.layers.Dense(16, input_dim=1),
    keras.layers.Dense(1),
])

model.compile(
    loss='mse', # keras.losses.mean_squared_error
    optimizer=keras.optimizers.SGD(),
)

training_history = model.fit(
    x_train, # input
    y_train, # output
    batch_size=train_size,
    verbose=0, # Suppress chatty output; use Tensorboard instead
    epochs=100,
    validation_data=(x_test, y_test),
    callbacks=[tensorboard_callback, lr_callback],
)

In [16]:
print(model.predict(x=np.array([60, 25, 2])))
# True values to compare predictions against: 
# [[32.0]
#  [14.5]
#  [ 3.0]]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[[31.782534]
 [14.409847]
 [ 2.993511]]
