In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf

In [2]:
# Callbacks are useful to get a view on internal states and statistics of the model 
# during training.
# Defines the Keras model to add callbacks to.
def get_model():
  model = tf.keras.Sequential()
  model.add(tf.keras.layers.Dense(1, activation = 'linear', input_dim = 784))
  model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=0.1), loss='mean_squared_error', metrics=['mae'])
  return model

In [3]:
# Loads example MNIST data and pre-process it.
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = x_train.reshape(60000, 784).astype('float32') / 255
x_test = x_test.reshape(10000, 784).astype('float32') / 255

In [4]:
import datetime

class MyCustomCallback(tf.keras.callbacks.Callback):

  def on_train_batch_begin(self, batch, logs=None):
    print('Training: batch {} begins at {}'.format(batch, datetime.datetime.now().time()))

  def on_train_batch_end(self, batch, logs=None):
    print('Training: batch {} ends at {}'.format(batch, datetime.datetime.now().time()))

  def on_test_batch_begin(self, batch, logs=None):
    print('Evaluating: batch {} begins at {}'.format(batch, datetime.datetime.now().time()))

  def on_test_batch_end(self, batch, logs=None):
    print('Evaluating: batch {} ends at {}'.format(batch, datetime.datetime.now().time()))

In [6]:
model = get_model()
_ = model.fit(x_train, y_train,
          batch_size=64,
          epochs=1,
          steps_per_epoch=5,
          verbose=0,
          callbacks=[MyCustomCallback()])

Training: batch 0 begins at 10:39:38.405603


Training: batch 0 ends at 10:39:38.672845
Training: batch 1 begins at 10:39:38.673230
Training: batch 1 ends at 10:39:38.720355
Training: batch 2 begins at 10:39:38.720838
Training: batch 2 ends at 10:39:38.767838
Training: batch 3 begins at 10:39:38.768244
Training: batch 3 ends at 10:39:38.814460
Training: batch 4 begins at 10:39:38.814808
Training: batch 4 ends at 10:39:38.862105


- Users can supply a list of callbacks to the following tf.keras.Model methods:

	- fit(), fit_generator().
	- evaluate(), evaluate_generator().
	- predict(), predict_generator().
    

In [7]:
_ = model.evaluate(x_test, y_test, batch_size=128, verbose=0, steps=5,
          callbacks=[MyCustomCallback()])

Evaluating: batch 0 begins at 10:39:41.612639
Evaluating: batch 0 ends at 10:39:41.680305
Evaluating: batch 1 begins at 10:39:41.681081
Evaluating: batch 1 ends at 10:39:41.686469
Evaluating: batch 2 begins at 10:39:41.686822
Evaluating: batch 2 ends at 10:39:41.691684
Evaluating: batch 3 begins at 10:39:41.692047
Evaluating: batch 3 ends at 10:39:41.697310
Evaluating: batch 4 begins at 10:39:41.698033
Evaluating: batch 4 ends at 10:39:41.703055


- In on_(train|test|predict)_batch_begin(self, batch, logs=None), 'logs' is a dict with batch and size available keys, representing the current batch number and the size of the batch.

- In on_(train|test|predict)_batch_end(self, batch, logs=None), 'logs' is a dict containing the stateful metrics result.

In [8]:
class LossAndErrorPrintingCallback(tf.keras.callbacks.Callback):

  def on_train_batch_end(self, batch, logs=None):
    print('For batch {}, loss is {:7.2f}.'.format(batch, logs['loss']))

  def on_test_batch_end(self, batch, logs=None):
    print('For batch {}, loss is {:7.2f}.'.format(batch, logs['loss']))

  def on_epoch_end(self, epoch, logs=None):
    print('The average loss for epoch {} is {:7.2f} and mean absolute error is {:7.2f}.'.format(epoch, logs['loss'], logs['mae']))

model = get_model()
_ = model.fit(x_train, y_train,
          batch_size=64,
          steps_per_epoch=5,
          epochs=3,
          verbose=0,
          callbacks=[LossAndErrorPrintingCallback()])

For batch 0, loss is   27.99.
For batch 1, loss is  920.77.
For batch 2, loss is   25.12.
For batch 3, loss is    8.60.
For batch 4, loss is    7.08.
The average loss for epoch 0 is  197.91 and mean absolute error is    8.31.


For batch 0, loss is    6.32.
For batch 1, loss is    5.80.
For batch 2, loss is    5.44.
For batch 3, loss is    5.18.


For batch 4, loss is    4.99.
The average loss for epoch 1 is    5.55 and mean absolute error is    1.95.
For batch 0, loss is    4.85.
For batch 1, loss is    4.74.
For batch 2, loss is    4.65.
For batch 3, loss is    4.58.


For batch 4, loss is    4.51.
The average loss for epoch 2 is    4.67 and mean absolute error is    1.75.


In [9]:
_ = model.evaluate(x_test, y_test, batch_size=128, verbose=0, steps=20,
          callbacks=[LossAndErrorPrintingCallback()])

For batch 0, loss is    4.21.
For batch 1, loss is    4.21.
For batch 2, loss is    4.21.
For batch 3, loss is    4.21.
For batch 4, loss is    4.21.
For batch 5, loss is    4.21.
For batch 6, loss is    4.21.
For batch 7, loss is    4.21.
For batch 8, loss is    4.21.
For batch 9, loss is    4.21.
For batch 10, loss is    4.21.
For batch 11, loss is    4.21.
For batch 12, loss is    4.21.
For batch 13, loss is    4.21.
For batch 14, loss is    4.21.
For batch 15, loss is    4.21.
For batch 16, loss is    4.21.
For batch 17, loss is    4.21.
For batch 18, loss is    4.21.
For batch 19, loss is    4.21.


In [10]:
import numpy as np

class EarlyStoppingAtMinLoss(tf.keras.callbacks.Callback):
  """Stop training when the loss is at its min, i.e. the loss stops decreasing.

  Arguments:
      patience: Number of epochs to wait after min has been hit. After this
      number of no improvement, training stops.
  """

  def __init__(self, patience=0):
    super(EarlyStoppingAtMinLoss, self).__init__()

    self.patience = patience

    # best_weights to store the weights at which the minimum loss occurs.
    self.best_weights = None

  def on_train_begin(self, logs=None):
    # The number of epoch it has waited when loss is no longer minimum.
    self.wait = 0
    # The epoch the training stops at.
    self.stopped_epoch = 0
    # Initialize the best as infinity.
    self.best = np.Inf

  def on_epoch_end(self, epoch, logs=None):
    current = logs.get('loss')
    if np.less(current, self.best):
      self.best = current
      self.wait = 0
      # Record the best weights if current results is better (less).
      self.best_weights = self.model.get_weights()
    else:
      self.wait += 1
      if self.wait >= self.patience:
        self.stopped_epoch = epoch
        self.model.stop_training = True
        print('Restoring model weights from the end of the best epoch.')
        self.model.set_weights(self.best_weights)

  def on_train_end(self, logs=None):
    if self.stopped_epoch > 0:
      print('Epoch %05d: early stopping' % (self.stopped_epoch + 1))

In [11]:
model = get_model()
_ = model.fit(x_train, y_train,
          batch_size=64,
          steps_per_epoch=5,
          epochs=30,
          verbose=0,
          callbacks=[LossAndErrorPrintingCallback(), EarlyStoppingAtMinLoss()])

For batch 0, loss is   34.24.
For batch 1, loss is  883.28.
For batch 2, loss is   29.83.
For batch 3, loss is    9.07.
For batch 4, loss is    7.21.
The average loss for epoch 0 is  192.73 and mean absolute error is    8.41.


For batch 0, loss is    6.37.
For batch 1, loss is    5.81.
For batch 2, loss is    5.43.
For batch 3, loss is    5.16.
For batch 4, loss is    4.97.
The average loss for epoch 1 is    5.55 and mean absolute error is    1.95.


For batch 0, loss is    4.82.
For batch 1, loss is    4.71.
For batch 2, loss is    4.62.
For batch 3, loss is    4.54.
For batch 4, loss is    4.47.
The average loss for epoch 2 is    4.63 and mean absolute error is    1.75.


For batch 0, loss is    4.41.
For batch 1, loss is    4.35.
For batch 2, loss is    4.30.
For batch 3, loss is    4.25.
For batch 4, loss is    4.20.
The average loss for epoch 3 is    4.30 and mean absolute error is    1.66.


For batch 0, loss is    4.16.
For batch 1, loss is    4.13.
For batch 2, loss is    4.13.
For batch 3, loss is    4.35.
For batch 4, loss is    6.08.
The average loss for epoch 4 is    4.57 and mean absolute error is    1.69.
Restoring model weights from the end of the best epoch.
Epoch 00005: early stopping


- Keras backend exposes get_value/set_value api which can be used to get/set the variables of a model.

In [12]:
class LearningRateScheduler(tf.keras.callbacks.Callback):
  """Learning rate scheduler which sets the learning rate according to schedule.

  Arguments:
      schedule: a function that takes an epoch index
          (integer, indexed from 0) and current learning rate
          as inputs and returns a new learning rate as output (float).
  """

  def __init__(self, schedule):
    super(LearningRateScheduler, self).__init__()
    self.schedule = schedule

  def on_epoch_begin(self, epoch, logs=None):
    if not hasattr(self.model.optimizer, 'lr'):
      raise ValueError('Optimizer must have a "lr" attribute.')
    # Get the current learning rate from model's optimizer.
    lr = float(tf.keras.backend.get_value(self.model.optimizer.lr))
    # Call schedule function to get the scheduled learning rate.
    scheduled_lr = self.schedule(epoch, lr)
    # Set the value back to the optimizer before this epoch starts.
    tf.keras.backend.set_value(self.model.optimizer.lr, scheduled_lr)
    print('\nEpoch %05d: Learning rate is %6.4f.' % (epoch, scheduled_lr))

In [13]:
LR_SCHEDULE = [
    # (epoch to start, learning rate) tuples
    (3, 0.05), (6, 0.01), (9, 0.005), (12, 0.001)
]

def lr_schedule(epoch, lr):
  """Helper function to retrieve the scheduled learning rate based on epoch."""
  if epoch < LR_SCHEDULE[0][0] or epoch > LR_SCHEDULE[-1][0]:
    return lr
  for i in range(len(LR_SCHEDULE)):
    if epoch == LR_SCHEDULE[i][0]:
      return LR_SCHEDULE[i][1]
  return lr  # If the epoch is none of the ones in LR_SCHEDULE, just return lr.

model = get_model()
_ = model.fit(x_train, y_train,
          batch_size=64,
          steps_per_epoch=5,
          epochs=15,
          verbose=0,
          callbacks=[LossAndErrorPrintingCallback(), LearningRateScheduler(lr_schedule)])


Epoch 00000: Learning rate is 0.1000.


For batch 0, loss is   31.62.
For batch 1, loss is  905.65.
For batch 2, loss is   28.07.
For batch 3, loss is    9.50.
For batch 4, loss is    7.65.
The average loss for epoch 0 is  196.50 and mean absolute error is    8.40.



Epoch 00001: Learning rate is 0.1000.
For batch 0, loss is    6.72.
For batch 1, loss is    6.09.
For batch 2, loss is    5.65.


For batch 3, loss is    5.34.
For batch 4, loss is    5.12.
The average loss for epoch 1 is    5.78 and mean absolute error is    2.00.

Epoch 00002: Learning rate is 0.1000.
For batch 0, loss is    4.95.
For batch 1, loss is    4.82.


For batch 2, loss is    4.72.
For batch 3, loss is    4.63.
For batch 4, loss is    4.55.
The average loss for epoch 2 is    4.73 and mean absolute error is    1.77.

Epoch 00003: Learning rate is 0.0500.
For batch 0, loss is    4.48.


For batch 1, loss is    4.45.
For batch 2, loss is    4.42.
For batch 3, loss is    4.39.
For batch 4, loss is    4.35.
The average loss for epoch 3 is    4.42 and mean absolute error is    1.69.

Epoch 00004: Learning rate is 0.0500.


For batch 0, loss is    4.32.
For batch 1, loss is    4.29.
For batch 2, loss is    4.27.
For batch 3, loss is    4.24.
For batch 4, loss is    4.21.
The average loss for epoch 4 is    4.27 and mean absolute error is    1.65.



Epoch 00005: Learning rate is 0.0500.
For batch 0, loss is    4.18.
For batch 1, loss is    4.15.
For batch 2, loss is    4.13.


For batch 3, loss is    4.10.
For batch 4, loss is    4.08.
The average loss for epoch 5 is    4.13 and mean absolute error is    1.61.

Epoch 00006: Learning rate is 0.0100.
For batch 0, loss is    4.05.
For batch 1, loss is    4.05.


For batch 2, loss is    4.04.
For batch 3, loss is    4.04.
For batch 4, loss is    4.03.
The average loss for epoch 6 is    4.04 and mean absolute error is    1.59.

Epoch 00007: Learning rate is 0.0100.
For batch 0, loss is    4.03.


For batch 1, loss is    4.02.
For batch 2, loss is    4.02.
For batch 3, loss is    4.01.
For batch 4, loss is    4.01.
The average loss for epoch 7 is    4.02 and mean absolute error is    1.58.

Epoch 00008: Learning rate is 0.0100.


For batch 0, loss is    4.00.
For batch 1, loss is    3.99.
For batch 2, loss is    3.99.
For batch 3, loss is    3.98.
For batch 4, loss is    3.97.
The average loss for epoch 8 is    3.99 and mean absolute error is    1.57.

Epoch 00009: Learning rate is 0.0050.


For batch 0, loss is    3.97.
For batch 1, loss is    3.96.
For batch 2, loss is    3.96.
For batch 3, loss is    3.95.
For batch 4, loss is    3.95.
The average loss for epoch 9 is    3.96 and mean absolute error is    1.56.

Epoch 00010: Learning rate is 0.0050.


For batch 0, loss is    3.95.
For batch 1, loss is    3.94.
For batch 2, loss is    3.94.
For batch 3, loss is    3.93.


For batch 4, loss is    3.93.
The average loss for epoch 10 is    3.94 and mean absolute error is    1.56.

Epoch 00011: Learning rate is 0.0050.
For batch 0, loss is    3.92.
For batch 1, loss is    3.92.
For batch 2, loss is    3.91.
For batch 3, loss is    3.91.


For batch 4, loss is    3.90.
The average loss for epoch 11 is    3.91 and mean absolute error is    1.55.

Epoch 00012: Learning rate is 0.0010.
For batch 0, loss is    3.90.
For batch 1, loss is    3.90.
For batch 2, loss is    3.90.


For batch 3, loss is    3.89.
For batch 4, loss is    3.89.
The average loss for epoch 12 is    3.90 and mean absolute error is    1.54.

Epoch 00013: Learning rate is 0.0010.
For batch 0, loss is    3.89.
For batch 1, loss is    3.89.


For batch 2, loss is    3.89.
For batch 3, loss is    3.89.
For batch 4, loss is    3.89.
The average loss for epoch 13 is    3.89 and mean absolute error is    1.54.

Epoch 00014: Learning rate is 0.0010.
For batch 0, loss is    3.88.


For batch 1, loss is    3.88.
For batch 2, loss is    3.88.
For batch 3, loss is    3.88.
For batch 4, loss is    3.88.
The average loss for epoch 14 is    3.88 and mean absolute error is    1.54.


- Applications of callbacks include logging to CSV, saving the model, visualizing on TensorBoard, etc.