In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

tf.keras.backend.clear_session()

In [3]:
# Get the model.
inputs = keras.Input(shape=(784,), name='digits')
x = layers.Dense(64, activation='relu', name='dense_1')(inputs)
x = layers.Dense(64, activation='relu', name='dense_2')(x)
outputs = layers.Dense(10, activation='softmax', name='predictions')(x)
model = keras.Model(inputs=inputs, outputs=outputs)

# Instantiate an optimizer.
optimizer = keras.optimizers.SGD(learning_rate=1e-3)
# Instantiate a loss function.
loss_fn = keras.losses.SparseCategoricalCrossentropy()

In [6]:
# Prepare the training dataset.
# Load a toy dataset for the sake of this example
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# Preprocess the data (these are Numpy arrays)
x_train = x_train.reshape(60000, 784).astype('float32') / 255
x_test = x_test.reshape(10000, 784).astype('float32') / 255

y_train = y_train.astype('float32')
y_test = y_test.astype('float32')

batch_size = 64
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

In [9]:
# Iterate over epochs.
epochs = 3
for epoch in range(epochs):
    print('Start of epoch %d' % (epoch,))

    # Iterate over the batches of the dataset.
    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):

        # Open a GradientTape to record the operations run
        # during the forward pass, which enables autodifferentiation.
        with tf.GradientTape() as tape:

            # Run the forward pass of the layer.
            # The operations that the layer applies
            # to its inputs are going to be recorded
            # on the GradientTape.
            logits = model(x_batch_train)  # Logits for this minibatch

            # Compute the loss value for this minibatch.
            loss_value = loss_fn(y_batch_train, logits)

        # Use the gradient tape to automatically retrieve
        # the gradients of the trainable variables with respect to the loss.
        grads = tape.gradient(loss_value, model.trainable_weights)

        # Run one step of gradient descent by updating
        # the value of the variables to minimize the loss.
        optimizer.apply_gradients(zip(grads, model.trainable_weights))

        # Log every 200 batches.
        if step % 200 == 0:
            print('Training loss (for one batch) at step %s: %s' % (step, float(loss_value)))
            print('Seen so far: %s samples' % ((step + 1) * 64))

Start of epoch 0
Training loss (for one batch) at step 0: 1.0786077976226807
Seen so far: 64 samples
Training loss (for one batch) at step 200: 1.090922236442566
Seen so far: 12864 samples
Training loss (for one batch) at step 400: 0.9967347979545593
Seen so far: 25664 samples
Training loss (for one batch) at step 600: 0.7556844353675842
Seen so far: 38464 samples
Training loss (for one batch) at step 800: 0.8074716329574585
Seen so far: 51264 samples
Start of epoch 1
Training loss (for one batch) at step 0: 0.9331329464912415
Seen so far: 64 samples
Training loss (for one batch) at step 200: 0.6544872522354126
Seen so far: 12864 samples
Training loss (for one batch) at step 400: 0.6331177949905396
Seen so far: 25664 samples
Training loss (for one batch) at step 600: 0.7966447472572327
Seen so far: 38464 samples
Training loss (for one batch) at step 800: 0.7959169149398804
Seen so far: 51264 samples
Start of epoch 2
Training loss (for one batch) at step 0: 0.6346855163574219
Seen so fa