### Import the relevent packages

In [4]:
import numpy as np
import tensorflow as tf
old_v = tf.logging.get_verbosity()
tf.logging.set_verbosity(tf.logging.ERROR)

from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
train_data = mnist.train.images  # Returns np.array
train_labels = np.asarray(mnist.train.labels, dtype=np.int32)
eval_data = mnist.test.images  # Returns np.array
eval_labels = np.asarray(mnist.test.labels, dtype=np.int32)

tf.logging.set_verbosity(old_v)


Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


### Outline the model

In [5]:
train_data.shape
# 550000 images = vectors of (784,1)

(55000, 784)

In [6]:
train_labels.shape
# 10 classes

(55000, 10)

In [14]:
input_size = 784
output_size = 10
hidden_layer_size = 50

# As we don't know the optimal hyperparameters we need to rerun the algo several times so each time we need to reset the computational graph
tf.reset_default_graph ()
# It clears the memory of all variables left from the previous runs (reset the computational graph)

inputs = tf.placeholder(tf.float32, [None, input_size])
targets = tf.placeholder(tf.float32, [None, output_size])

weights_1 = tf.get_variable("weights_1",[input_size, hidden_layer_size])
biases_1 = tf.get_variable("biases_1", [hidden_layer_size])

#Let's find the nodes of the first hidden layer : Time to apply an activation function to the linear combination of the inputs and weights
# Activation function are contained in the module : tf.nn
outputs_1 = tf.nn.relu(tf.matmul(inputs, weights_1) + biases_1)

weights_2 = tf.get_variable("weights_2", [hidden_layer_size, hidden_layer_size])
biases_2 = tf.get_variable("biases_2", [hidden_layer_size])

#Nodes of hidden layer 2 
outputs_2 = tf.nn.relu(tf.matmul(outputs_1,weights_2) + biases_2)

weights_3 = tf.get_variable("weights_3", [hidden_layer_size, output_size])
biases_3 = tf.get_variable("biases_3", [output_size])

outputs = tf.matmul(outputs_2,weights_3) + biases_3 #Logits (unscaled probalities)

# Computation of loss between outputs and targets(labels)
# tf.nn.softmax_cross_entropy_with_logits : Compute softmax cross entropy between logits and labels
loss = tf.nn.softmax_cross_entropy_with_logits(logits = outputs , labels = targets)
# We prefere use the mean_loss as it gives a big performance boost, we use the methode reduce_mean as it returns the mean of a tensor

mean_loss = tf.reduce_mean(loss)

# So far we have the model and the Loss , we need to choose the optimazation method
optimize = tf.train.AdamOptimizer(learning_rate=0.001).minimize(mean_loss)

#Prediction acccuracy tf.argmax

out_equals_target = tf.equal(tf.argmax(outputs,1), tf.argmax(targets,1))
accuracy = tf.reduce_mean(tf.cast(out_equals_target,tf.float32))

# Declare the session variable.
sess = tf.InteractiveSession()

# Initialize the variables. Default initializer is Xavier.
initializer = tf.global_variables_initializer()
sess.run(initializer)

# Batching
batche_size = 100
batches_number = mnist.train._num_examples // batche_size

# Early Stopping
max_epochs = 15
# Keep track of the validation loss of the previous epoch.
# If the validation loss becomes increasing, we want to trigger early stopping.
# We initially set it at some arbitrarily high number to make sure we don't trigger it
# at the first epoch
prev_validation_loss = 9999999.


# Create the loop for the epochs 
for epoch_counter in range(max_epochs):
    # Keep track the sum of batches loss in the epoch
    curr_epoch_loss = 0.
    
    #Iterate over the batches in this epoch
    for batche_counter in range(batches_number):
        # input batch and target batch 
        input_batch, target_batch = mnist.train.next_batch(batche_size)
        
        # Run the optimization step and get the mean loss for this batch.
        # Feed it with the inputs and the targets we just got from the train dataset
        
        _, batch_loss = sess.run([optimize, mean_loss],
                         feed_dict = {inputs: input_batch , targets : target_batch})
        #Increment the sum of batch losses
        curr_epoch_loss += batch_loss
        
    # So far curr_epoch_loss contained the sum of all batches loss inside the epoch
    # We want to find the average batch losses over the whole epoch
    # The average batch loss is a good proxy for the current epoch loss
    curr_epoch_loss /= batches_number
    
    # At the end of each epoch, get the validation loss and accuracy
    # Get the input batch and the target batch from the validation dataset
    input_batch , target_batch = mnist.validation.next_batch(mnist.validation._num_examples)
    
    # Run without the optimization step (simply forward propagate)
    validation_loss, validation_accuracy = sess.run([mean_loss, accuracy],
                                           feed_dict = {inputs : input_batch , targets : target_batch})
    
    # Print statistics for the current epoch
    # Epoch counter + 1, because epoch_counter automatically starts from 0, instead of 1
    # We format the losses with 3 digits after the dot
    # We format the accuracy in percentages for easier interpretation
    
    print('Epoch '+str(epoch_counter+1)+
          '. Mean loss: '+'{0:.3f}'.format(curr_epoch_loss)+
          '. Validation loss: '+'{0:.3f}'.format(validation_loss)+
          '. Validation accuracy: '+'{0:.2f}'.format(validation_accuracy * 100.)+'%')
    
    # Trigger early stopping if validation loss begins increasing.
    if validation_loss > prev_validation_loss : 
        break
        
    # Store this epoch's validation loss to be used as previous validation loss in the next iteration.
    prev_validation_loss = validation_loss
    
# Not essential, but it is nice to know when the algorithm stopped working in the output section, rather than check the kernel
print('End of training.')

Epoch 1. Mean loss: 0.422. Validation loss: 0.203. Validation accuracy: 94.12%
Epoch 2. Mean loss: 0.188. Validation loss: 0.151. Validation accuracy: 95.94%
Epoch 3. Mean loss: 0.142. Validation loss: 0.131. Validation accuracy: 96.20%
Epoch 4. Mean loss: 0.115. Validation loss: 0.113. Validation accuracy: 96.68%
Epoch 5. Mean loss: 0.096. Validation loss: 0.101. Validation accuracy: 97.04%
Epoch 6. Mean loss: 0.081. Validation loss: 0.095. Validation accuracy: 97.28%
Epoch 7. Mean loss: 0.070. Validation loss: 0.099. Validation accuracy: 96.92%
End of training.


### Test

In [54]:
input_batch , target_batch = mnist.test.next_batch(mnist.test._num_examples)
test_accuracy = sess.run([accuracy],
                        feed_dict = {inputs : input_batch , targets : target_batch })

test_accuracy_percent = test_accuracy[0] * 100.
#print('Test accuracy : '+'{}'.format(test_accuracy_percent)+'%')
print('Test accuracy: '+'{0:.2f}'.format(test_accuracy_percent)+'%')

Test accuracy: 96.83%
