# The MNIST Action Plan
1. Outline the model and choose the activation functions we want to employ
2. Describe the placeholders, variables, and the related operations
3. Choose the appropriate advanced optimizers
4. Split the dataset into batches for faster learning
5. Initialize the variables
6. Make it learn
7. Test the accuracy of the model

# Deep NN for MNIST Classification

### Import relevant packages

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)
# The dataset has been split into training, validation and test
# The data has also been preprocessed

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


### Outline the model

In [2]:
input_size = 784
output_size = 10
hidden_size = 100

tf.compat.v1.reset_default_graph() 

# tf.reset_default_graph() clears the memory of all variables left
# from previous runs (reset the computational graph)

inputs = tf.compat.v1.placeholder(tf.float32, [None, input_size])
targets = tf.compat.v1.placeholder(tf.float32, [None, output_size])

weights_1 = tf.compat.v1.get_variable('weights_1', [input_size, hidden_size])
biases_1 = tf.compat.v1.get_variable('biases_1', [hidden_size])

# tf.get_variable('name', shape) is a function used to declare 
# variables. The default initializer is Xavier (Glorot)

outputs_1 = tf.nn.relu(tf.matmul(inputs, weights_1) + biases_1)

# tf.nn is a module that contains neural network support. Among 
# other things, it contains the most commonly used activation
# functions

weights_2 = tf.compat.v1.get_variable('weights_2', [hidden_size, hidden_size])
biases_2 = tf.compat.v1.get_variable('biases_2', [hidden_size])
outputs_2 = tf.nn.relu(tf.matmul(outputs_1, weights_2) + biases_2)

weights_3 = tf.compat.v1.get_variable('weights_3', [hidden_size, output_size])
biases_3 = tf.compat.v1.get_variable('biases_3', [output_size])

outputs = tf.matmul(outputs_2, weights_3) + biases_3

# It is common practice to incorporate the final activation in
# the loss

In [3]:
# Objective function
loss = tf.compat.v1.nn.softmax_cross_entropy_with_logits(logits=outputs, labels=targets)

# tf.nn.softmax_cross_entropy_with_logits(logits, labels) is a 
# function that applies a softmax activation and calculates a
# cross-entropy loss

mean_loss = tf.reduce_mean(loss)

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



In [4]:
# Optimization
optimize = tf.compat.v1.train.AdamOptimizer(learning_rate=0.001).minimize(mean_loss)

In [5]:
# Prediction Accuracy
out_equals_target = tf.equal(tf.argmax(outputs,axis=1), tf.argmax(targets,axis=1))
accuracy = tf.reduce_mean(tf.cast(out_equals_target, tf.float32))

In [6]:
# Prepare for execution
sess = tf.compat.v1.InteractiveSession()

In [7]:
# Initializing variables
init = tf.compat.v1.global_variables_initializer()
sess.run(init)

In [8]:
# Batching
batch_size = 100

num_batches = mnist.train._num_examples // batch_size

# Early stopping
max_epochs = 15


prev_val_loss = 9999999.

In [9]:
# Make it learn

# Create a loop for the epochs. e is a variable that automatically
# starts from 0
for e in range(max_epochs):
    
    # Keep track of sum of batch losses in the epoch
    epoch_loss = 0.
    
    # Iterate over the batches in this epoch
    for b in range(num_batches):
        input_batch, target_batch = mnist.train.next_batch(batch_size)
        
        # Run the optimization step and get the mean loss for 
        # this batch. Feed it with the inputs and targets we just
        # got from the train set
        _, batch_loss = sess.run([optimize, mean_loss],
                                feed_dict={inputs:input_batch, targets:target_batch})
        
        # Increment the sum of the batch losses
        epoch_loss += batch_loss
        
    # Average batch loss    
    epoch_loss /= num_batches #this is the training loss
    
    # At the end of each epoch, get the val loss and accuracy
    # Get the input batch and the target batch from the val set
    input_batch, target_batch = mnist.validation.next_batch(mnist.validation._num_examples)
    
    # Run without the optimization step (simply forward propagate)
    val_loss, val_accuracy = sess.run([mean_loss, accuracy],
                                     feed_dict={inputs: input_batch, targets:target_batch})
    
    # Print stats for each epoch
    print('Epoch '+str(e+1)+
          '. Mean loss: '+'{0:.3f}'.format(epoch_loss)+
          '. Validation loss: '+'{0:.3f}'.format(val_loss)+
          '. Validation accuracy: '+'{0:.2f}'.format(val_accuracy * 100.)+'%')
    
    # Trigger early stopping if val_loss increases
    if val_loss > prev_val_loss:
        break
        
    # Store this epoch's val_loss to be used as prev_val_loss
    prev_val_loss = val_loss
    
        
# What does the inside of the 2nd for loop do?
# 1. Loads 100 inputs and targets (batch_size=100)
# 2. Optimizes the algorithm and calculates the batch loss
# 3. Records the loss for the iteration
# 4. Starts with the next 100 inputs and targets
# 5. Stops when the training set is exhausted

print('End of training')

Epoch 1. Mean loss: 0.335. Validation loss: 0.158. Validation accuracy: 95.58%
Epoch 2. Mean loss: 0.141. Validation loss: 0.114. Validation accuracy: 96.80%
Epoch 3. Mean loss: 0.096. Validation loss: 0.098. Validation accuracy: 97.20%
Epoch 4. Mean loss: 0.074. Validation loss: 0.084. Validation accuracy: 97.52%
Epoch 5. Mean loss: 0.059. Validation loss: 0.084. Validation accuracy: 97.36%
Epoch 6. Mean loss: 0.047. Validation loss: 0.087. Validation accuracy: 97.46%
End of training


### Test

In [10]:
input_batch, target_batch = mnist.test.next_batch(mnist.test._num_examples)
test_accuracy = sess.run([accuracy],
                        feed_dict={inputs:input_batch, targets:target_batch})

test_accuracy_percent = test_accuracy[0]*100

print('Test accuracy: '+'{0:.2f}'.format(test_accuracy_percent)+'%')

Test accuracy: 97.10%
