## Import pacakages

In [1]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

## Data

In [2]:
mnist_dataset, mnist_info = tfds.load(name='mnist', with_info=True, as_supervised=True)

In [3]:
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']

# either count the # of train samples or use the mnist_info
num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples
num_validation_samples = tf.cast(num_validation_samples, tf.int64)

num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)

# dataset.map(*function*) applies a custom transformation to a given dataset. It takes as input a function which determines the transformation

def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255.
    return image, label


scaled_train_and_validation_data = mnist_train.map(scale)

test_data = mnist_test.map(scale)

# if BUFFER_SIZE is 1, no shuffling will happen
# if BUFFER_SIZE >= num_samples, shuffling will happen at once (uniformly)
# if 1 < BUFFER_SIZE < num_samples, we will be optimizing the computational power
BUFFER_SIZE = 10000

shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(BUFFER_SIZE)

validation_data = shuffled_train_and_validation_data.take(num_validation_samples)
train_data = shuffled_train_and_validation_data.skip(num_validation_samples)

BATCH_SIZE = 100 # 1 #10000 # 100

train_data = train_data.batch(BATCH_SIZE)
validation_data = validation_data.batch(num_validation_samples)

test_data = test_data.batch(num_test_samples)

validation_inputs, validation_targets = next(iter(validation_data))

## Model

outline the model

In [4]:
input_size = 784
output_size = 10
hidden_layer_size = 200 #100 #50


model = tf.keras.Sequential([
                            tf.keras.layers.Flatten(input_shape=(28,28,1)),
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'), #'sigmoid'), #'relu'), # 1st hidden layer
                            tf.keras.layers.Dense(hidden_layer_size, activation='tanh'), # 'tanh'), #'sigmoid'), #'relu'), # 2nd hidden layer
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(hidden_layer_size, activation='tanh'), # 'tanh'),
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(output_size, activation='softmax')
                            ])

## Choose the optimizer and loss function

In [5]:
# binary_crossentropy: binary encoding
# categorical_crossentropy: expects targets have been one-hot encoded
# sparse_categorical_crossentropy: applies one-hot encoding
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

## Training

In [6]:
# At the beginning of each epoch, the training loss will be set to 0
# The algorithm will iterate over a present number of batches, all from train_data
# The weights and biases will be updates as many times as there are batches
# We will get avalue for the loss function, indicating how the training is going
# We will also see a training accuracy
# At the end of the epoch, the algorithm will forward propagate the whole validation set
# When we reach the maximum number of epochs the training will be over

NUM_EPOCHS = 5
STEPS = num_validation_samples / BATCH_SIZE

model.fit(train_data, epochs = NUM_EPOCHS, validation_data=(validation_inputs, validation_targets), validation_steps=STEPS, verbose=2)

Epoch 1/5
540/540 - 24s - loss: 0.2468 - accuracy: 0.9252 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 2/5
540/540 - 18s - loss: 0.1074 - accuracy: 0.9675 - val_loss: 0.0996 - val_accuracy: 0.9705
Epoch 3/5
540/540 - 21s - loss: 0.0772 - accuracy: 0.9761 - val_loss: 0.0731 - val_accuracy: 0.9772
Epoch 4/5
540/540 - 16s - loss: 0.0587 - accuracy: 0.9817 - val_loss: 0.0565 - val_accuracy: 0.9840
Epoch 5/5
540/540 - 15s - loss: 0.0489 - accuracy: 0.9844 - val_loss: 0.0535 - val_accuracy: 0.9842


<tensorflow.python.keras.callbacks.History at 0x211d43613c8>

## Test the model

In [7]:
test_loss, test_accuracy = model.evaluate(test_data)



In [8]:
print('Test Loss: {0:.2f}. Test Accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100))

Test Loss: 0.10. Test Accuracy: 97.48%
