## Import modules

In [None]:
import tensorflow as tf
import numpy as np

tf.random.set_seed(0)

## Loading MNIST

As we did before, we will first load the MNIST dataset.

To use 2d convolutional filters, we do not flatten (reshape) the image.

In [None]:
# Prepare MNIST data.
from tensorflow.keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Convert from uint8 to float32 and
# normalize images value from [0, 255] to [0, 1].
# (uint8: Unsigned integer (0 to 255))
x_train = tf.convert_to_tensor(x_train, dtype=tf.float32) / 255.
x_test = tf.convert_to_tensor(x_test, dtype=tf.float32) / 255.

# add new axis for channel
x_train = tf.reshape(x_train, [-1, 28, 28, 1])
x_test = tf.reshape(x_test, [-1, 28, 28, 1])

print("Image Shape: {}".format(x_train[0].shape))
print("Label Shape: {}".format(y_train[0].shape))
print()
print("Training Set:   {} samples".format(len(x_train)))
print("Test Set:       {} samples".format(len(x_test)))

## Model: Convolutional Neural network

### Building LeNet-5 with Convolution and Pooling operations

![](https://cdnpythonmachinelearning.azureedge.net/wp-content/uploads/2017/09/lenet-5-825x285.png?x64257)

- conv2d: https://www.tensorflow.org/api_docs/python/tf/contrib/layers/conv2d
- max_pool2d: https://www.tensorflow.org/api_docs/python/tf/contrib/layers/max_pool2d

In [None]:
from tensorflow.keras import layers, Sequential

In [None]:
lenet = Sequential([
    # convolutional layer
    # input = 28x28x1, output = 24x24x6
    layers.Conv2D(6, (5, 5), padding='VALID'),
    layers.Activation('relu'),
    # pooling layer
    # input = 24x24x6, output = 12x12x6
    layers.MaxPooling2D(pool_size=(2, 2), strides=2),
    # convolutional layer
    # input = 12x12x6, output = 8x8x16
    layers.Conv2D(16, (5, 5), padding='VALID'),
    layers.Activation('relu'),
    # pooling layer
    # input = 8x8x16, output = 4x4x16
    layers.MaxPooling2D(pool_size=(2, 2), strides=2),
    # flatten layer
    # input = 4x4x16, output = 256
    layers.Flatten(),
    layers.Dense(120, activation='relu'),
    layers.Dense(84, activation='relu'),
    layers.Dense(10)])
# [batch_size, image_width, image_height, channels]
lenet.build(input_shape=(None, 28, 28, 1))
lenet.summary()

## Loss function

In [None]:
# Cross-Entropy loss function.
def cross_entropy(y_pred, y_true):
    # Convert labels to int 64 for tf cross-entropy function.
    y_true = tf.cast(y_true, tf.int64)
    # Apply softmax to logits and compute cross-entropy.
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_true, 
                                                          logits=y_pred)
    # Average loss across the batch.
    return tf.reduce_mean(loss)

## Optimizer and Batch Generator

In [None]:
# Parameters for Training
learning_rate = 0.1
batch_size = 256
training_steps = 10

# Stochastic gradient descent optimizer.
optimizer = tf.optimizers.SGD(learning_rate)

# Use tf.data API to shuffle and batch data.
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
training_batch = train_data.batch(batch_size).repeat(training_steps)

## Test Metric: Accuracy

In [None]:
# Accuracy metric.
def accuracy(y_pred, y_true):
    # Predicted class is the index of highest score in prediction vector (i.e. argmax).
    correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
    return tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

## Training

In [None]:
# Training parameters.
display_step = 500

# Run training for the given number of steps.
for step, (batch_x, batch_y) in enumerate(training_batch, 1):
    # Wrap computation inside a GradientTape for automatic differentiation.
    with tf.GradientTape() as g:
        pred = lenet(batch_x)
        loss = cross_entropy(pred, batch_y)

    # Compute gradients.
    trainable_variables = lenet.trainable_variables
    gradients = g.gradient(loss, trainable_variables)
    
    # Update W and b following gradients.
    optimizer.apply_gradients(zip(gradients, trainable_variables))
    
    if step % display_step == 0:
        pred = lenet(batch_x)
        loss = cross_entropy(pred, batch_y)
        acc = accuracy(pred, batch_y)
        print("step: %i, loss: %f, accuracy: %f" % (step, loss, acc))

## Test

In [None]:
# Test model on validation set.
pred = lenet(x_test)
print("Test Accuracy: %f" % accuracy(pred, y_test))

# Visualize the prediction results

In [None]:
# Visualize predictions.
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# Predict 5 images from validation set.
n_images = 5
test_images = x_test[:n_images]
predictions = lenet(test_images)

# Display image and model prediction.
for i in range(n_images):
    title = "Model prediction: %i" % np.argmax(predictions.numpy()[i])
    plt.imshow(np.reshape(test_images[i], [28, 28]), cmap='gray')
    plt.title(title)
    plt.show()