# __MNIST Classification__

## __Importing the relevant libraries__

In [3]:
import numpy as np
import tensorflow as tf

# TensorFLow includes a data provider for MNIST.
# It comes with the 'tensorflow-datasets' module (which includes a large number of other datasets).

# conda install tensorflow-datasets
# conda install -c conda-forge ipywidgets
# jupyter nbextension enable --py widgetsnbextension
# then restart jupyter

import tensorflow_datasets as tfds

# these datasets will be stored in C:\Users\*USERNAME*\tensorflow_datasets\...

## __load and preprocess the data__

In [4]:
# tfds.load actually loads a dataset (or downloads and then loads if that's the first time we use it).
# in our case, we are interesteed in the MNIST; the name of the dataset is the only mandatory argument.

mnist_dataset, mnist_info = tfds.load(name='mnist', with_info=True, as_supervised=True)

# with_info=True will also provide us with a tuple containing information about the version, features, number of samples.
# we will store this info in mnist_info.

# as_supervised=True will load the dataset in a 2-tuple structure (input, target) 
# alternatively, as_supervised=False, would return a dictionary
# obviously we prefer to have our inputs and targets separated 



In [5]:
# once we have loaded the dataset, we can easily extract the training and testing dataset.
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']

In [7]:
# by default, TF has training and testing datasets, but no validation sets.
# we start by defining the number of validation samples as a % of the train samples.
# this is also where we make use of mnist_info.

num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples

# we cast the number to an integer, as a float may cause an error along the way.
num_validation_samples = tf.cast(num_validation_samples, tf.int64)

In [8]:
# let's also store the number of test samples in a dedicated variable (instead of using the mnist_info one).
num_test_samples = mnist_info.splits['test'].num_examples

# once more, we'd prefer an integer (rather than the default float).
num_test_samples = tf.cast(num_test_samples, tf.int64)

In [17]:
# normally, we would like to scale our data in some way to make the result more numerically stable.
# in this case we will simply prefer to have inputs between 0 and 1.
# the function 'scale' will take an MNIST image and its label.
def scale(image, label):
    image = tf.cast(image, tf.float32)   # we make sure the value is a float.
    # since the possible values for the inputs are 0 to 255 (256 different shades of grey),
    # if we divide each element by 255, we would get the desired result -> all elements will be between 0 and 1.
    image /= 255.
    return image, label


# the method .map() allows us to apply a custom transformation to a given dataset.
# we have already decided that we will get the validation data from mnist_train
scaled_train_and_validation_data = mnist_train.map(scale)

# finally, we scale and batch the test data so it has the same magnitude as the train and validation.
# there would be a single batch, equal to the size of the test data.
test_data = mnist_test.map(scale)

In [18]:
# let's also shuffle the data

BUFFER_SIZE = 10000
# this BUFFER_SIZE parameter is here for cases when we're dealing with enormous datasets
# then we can't shuffle the whole dataset in one go because we can't fit it all in memory
# so instead TF only stores BUFFER_SIZE samples in memory at a time and shuffles them
# if BUFFER_SIZE=1 => no shuffling will actually happen
# if BUFFER_SIZE >= num samples => shuffling is uniform
# BUFFER_SIZE in between - a computational optimization to approximate uniform shuffling
shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(BUFFER_SIZE)

# our validation data would be equal to 10% of the training set.
# we use the .take() method to take that many samples
# finally, we create a batch with a batch size equal to the total number of validation samples
validation_data = shuffled_train_and_validation_data.take(num_validation_samples)

# the train_data is everything else, so we skip the first num_validation in the dataset
train_data = shuffled_train_and_validation_data.skip(num_validation_samples)

In [19]:
# determine the batch size
BATCH_SIZE = 100

train_data = train_data.batch(BATCH_SIZE)

# here we didn't mini-batch the validation and the test since we won't back propagate on them.
# we just put them in a batch form.
validation_data = validation_data.batch(num_validation_samples)
test_data = test_data.batch(num_test_samples)

# validation data must have the same shape and obj properties as the train and test data.
validation_inputs, validation_targets = next(iter(validation_data))

## __Model__

### __Outline the model__

In [20]:
input_size = 784
output_size = 10
hidden_layer_size = 50   # Use same hidden layer size for both hidden layers. Not a necessity.
    
# define how the model will look like
model = tf.keras.Sequential([
    
    # the first layer (the input layer)
    # each observation is 28x28x1 pixels, therefore it is a tensor of rank 3
    # there is a convenient method 'Flatten' that simply takes our 28x28x1 tensor and orders it into a (None,) 
    # or (28x28x1,) = (784,) vector
    tf.keras.layers.Flatten(input_shape=(28, 28, 1)),
    
    # tf.keras.layers.Dense is basically implementing: output = activation(dot(input, weight) + bias)
    # the most important args are the hidden_layer_size and the activation function
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'), # 1st hidden layer
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'), # 2nd hidden layer
    
    # the final layer is no different, we just make sure to activate it with softmax
    tf.keras.layers.Dense(output_size, activation='softmax') # output layer
])

### __Choose the optimizer and the loss function__

In [21]:
# we define the metrics we are interested in obtaining at each iteration.
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

### __Training__

In [24]:
NUM_EPOCHS = 5

model.fit(train_data, 
          epochs=NUM_EPOCHS, 
          validation_data=(validation_inputs, validation_targets),
          validation_steps=1,    # how many batches of valid_samples to use in one epoch.
          verbose =2)

Epoch 1/5
540/540 - 4s - loss: 0.0877 - accuracy: 0.9735 - val_loss: 0.0956 - val_accuracy: 0.9728
Epoch 2/5
540/540 - 4s - loss: 0.0767 - accuracy: 0.9767 - val_loss: 0.0854 - val_accuracy: 0.9738
Epoch 3/5
540/540 - 4s - loss: 0.0687 - accuracy: 0.9791 - val_loss: 0.0869 - val_accuracy: 0.9767
Epoch 4/5
540/540 - 4s - loss: 0.0617 - accuracy: 0.9810 - val_loss: 0.0689 - val_accuracy: 0.9790
Epoch 5/5
540/540 - 4s - loss: 0.0537 - accuracy: 0.9833 - val_loss: 0.0655 - val_accuracy: 0.9810


<tensorflow.python.keras.callbacks.History at 0x259261a4208>

## __Test the model__

In [25]:
test_loss, test_accuracy = model.evaluate(test_data)

      1/Unknown - 1s 1s/step - loss: 0.1021 - accuracy: 0.9704

In [26]:
# We can apply some nice formatting if we want to
print('Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))

Test loss: 0.10. Test accuracy: 97.04%
