In [18]:
import tensorflow as tf

In [31]:
# import mnist data
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

# Add a channels dimension
x_train, x_test = x_train[..., tf.newaxis].astype("float32"), x_test[..., tf.newaxis].astype("float32")
y_train, y_test = x_test.astype("float32"), y_test.astype("float32")

In [20]:
# Create batches from the mnist data
train_ds = tf.data.Dataset.from_tensor_slices(
    (x_train, y_train)).shuffle(10000).batch(32)

test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)

In [36]:
# Define the Neural Network
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),      # input layer, rehaping image into vectors
    tf.keras.layers.Dense(512, activation='elu', kernel_initializer='he_uniform'),       # applying a linear layer with activation function
    tf.keras.layers.Dense(512, activation='elu', kernel_initializer='he_uniform'),
    tf.keras.layers.Dropout(0.2),                       # randomly sets inputs to zero at rate of 0.2 steps, reducing overfitting
    tf.keras.layers.Dense(10)                           # output layer
])

In [27]:
# Define the cross entropy loss function
cross_loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [23]:
# Define the optimizer, in this case, it is AdaGrad
adagrad = tf.keras.optimizers.experimental.Adagrad(learning_rate=0.01, epsilon=1e-04)

In [24]:
# Computing the average loss and accuracy for each batch
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

In [28]:
# Define our training and testing steps
@tf.function
def train_step(images, labels):
  with tf.GradientTape() as tape:
    predictions = model(images, training=True)
    loss = cross_loss(labels, predictions)
  gradients = tape.gradient(loss, model.trainable_variables)
  adagrad.apply_gradients(zip(gradients, model.trainable_variables))

  train_loss(loss)
  train_accuracy(labels, predictions)

@tf.function
def test_step(images, labels):
  predictions = model(images, training=False)
  t_loss = cross_loss(labels, predictions)

  test_loss(t_loss)
  test_accuracy(labels, predictions)

In [37]:
# Training the model

EPOCHS = 5

for epoch in range(EPOCHS):
  # Reset the metrics at the start of the next epoch
  train_loss.reset_states()
  train_accuracy.reset_states()
  test_loss.reset_states()
  test_accuracy.reset_states()

  for images, labels in train_ds:
    train_step(images, labels)

  for test_images, test_labels in test_ds:
    test_step(test_images, test_labels)

  print(
    f'Epoch {epoch + 1}, '
    f'Loss: {train_loss.result()}, '
    f'Accuracy: {train_accuracy.result() * 100}, '
    f'Test Loss: {test_loss.result()}, '
    f'Test Accuracy: {test_accuracy.result() * 100}'
  )

Epoch 1, Loss: 0.06770087033510208, Accuracy: 98.0183334350586, Test Loss: 0.08478207886219025, Test Accuracy: 97.43999481201172
Epoch 2, Loss: 0.06400670111179352, Accuracy: 98.02667236328125, Test Loss: 0.08204759657382965, Test Accuracy: 97.53999328613281
Epoch 3, Loss: 0.061353616416454315, Accuracy: 98.15833282470703, Test Loss: 0.0816703587770462, Test Accuracy: 97.53999328613281
Epoch 4, Loss: 0.05885535478591919, Accuracy: 98.24500274658203, Test Loss: 0.07855336368083954, Test Accuracy: 97.6199951171875
Epoch 5, Loss: 0.05487972870469093, Accuracy: 98.36666870117188, Test Loss: 0.07714438438415527, Test Accuracy: 97.56999969482422
