# Deep Neural Network Classification on MNIST dataset

In [28]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

## Data Preprocessing

In [29]:
mnist_dataset, mnist_info = tfds.load(name='mnist', with_info=True, as_supervised=True)

In [30]:
mnist_info.splits

{'train': <SplitInfo num_examples=60000, num_shards=1>,
 'test': <SplitInfo num_examples=10000, num_shards=1>}

In [31]:
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']


We shall take 10% of the training data to be our validation data.

In [32]:
num_validation = 0.1*mnist_info.splits['train'].num_examples
num_validation = tf.cast(num_validation, tf.int64)
num_validation

<tf.Tensor: shape=(), dtype=int64, numpy=6000>

In [33]:
num_test = mnist_info.splits['test'].num_examples
num_test = tf.cast(num_test, tf.int64)

Normally, we would like to scale our data in some way so that our results are numerically stable.\
In this case, we scale our data so that our inputs are between 0 and 1.

In [34]:
def scale(image, label):
    image = tf.cast(image, tf.float32)
    image = image/255
    
    return image, label

In [35]:
scaled_data_train_and_validation = mnist_train.map(scale)
scaled_data_test = mnist_test.map(scale)

In [36]:
BUFFER_SIZE = 10000
shuffled_data_train_and_validation = scaled_data_train_and_validation.shuffle(BUFFER_SIZE)

validation_data = shuffled_data_train_and_validation.take(num_validation)
train_data = shuffled_data_train_and_validation.skip(num_validation)

In [37]:
BATCH_SIZE = 100
train_data = train_data.batch(BATCH_SIZE)
validation_data = validation_data.batch(num_validation)
test_data = scaled_data_test.batch(num_test)
validation_inputs, validation_targets = next(iter(validation_data))

## Model

In [38]:
input_size = 784
output_size = 10
# Use same hidden layer size for both hidden layers. Not a necessity.
hidden_layer_size = 50
    
# define how the model will look like
model = tf.keras.Sequential([
    
    # the first layer (the input layer)
    # each observation is 28x28x1 pixels, therefore it is a tensor of rank 3
    # since we don't know CNNs yet, we don't know how to feed such input into our net, so we must flatten the images
    # there is a convenient method 'Flatten' that simply takes our 28x28x1 tensor and orders it into a (None,) 
    # or (28x28x1,) = (784,) vector
    # this allows us to actually create a feed forward neural network
    tf.keras.layers.Flatten(input_shape=(28, 28, 1)), # input layer
    
    # tf.keras.layers.Dense is basically implementing: output = activation(dot(input, weight) + bias)
    # it takes several arguments, but the most important ones for us are the hidden_layer_size and the activation function
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'), # 1st hidden layer
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'), # 2nd hidden layer
    
    # the final layer is no different, we just make sure to activate it with softmax
    tf.keras.layers.Dense(output_size, activation='softmax') # output layer
])

## Choosing Optimizer and Loss Function

In [39]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

## Training

In [41]:
NUM_EPOCHS = 10
model.fit(train_data, epochs=NUM_EPOCHS, validation_data=(validation_inputs, validation_targets), verbose =2)

Epoch 1/10
540/540 - 1s - loss: 0.0846 - accuracy: 0.9744 - val_loss: 0.0946 - val_accuracy: 0.9717 - 1s/epoch - 3ms/step
Epoch 2/10
540/540 - 1s - loss: 0.0752 - accuracy: 0.9766 - val_loss: 0.0825 - val_accuracy: 0.9758 - 954ms/epoch - 2ms/step
Epoch 3/10
540/540 - 1s - loss: 0.0672 - accuracy: 0.9796 - val_loss: 0.0774 - val_accuracy: 0.9775 - 934ms/epoch - 2ms/step
Epoch 4/10
540/540 - 1s - loss: 0.0588 - accuracy: 0.9826 - val_loss: 0.0647 - val_accuracy: 0.9815 - 949ms/epoch - 2ms/step
Epoch 5/10
540/540 - 1s - loss: 0.0533 - accuracy: 0.9829 - val_loss: 0.0615 - val_accuracy: 0.9817 - 934ms/epoch - 2ms/step
Epoch 6/10
540/540 - 1s - loss: 0.0470 - accuracy: 0.9855 - val_loss: 0.0692 - val_accuracy: 0.9792 - 942ms/epoch - 2ms/step
Epoch 7/10
540/540 - 1s - loss: 0.0431 - accuracy: 0.9870 - val_loss: 0.0595 - val_accuracy: 0.9835 - 937ms/epoch - 2ms/step
Epoch 8/10
540/540 - 1s - loss: 0.0387 - accuracy: 0.9885 - val_loss: 0.0458 - val_accuracy: 0.9857 - 943ms/epoch - 2ms/step
Epo

<keras.callbacks.History at 0x2bdc81c7550>