In [1]:
import numpy as np
import time
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [2]:
input_size = 784
output_size = 10
hidden_layer_size = 500

tf.reset_default_graph() # reset memory of all variables left from previous runs (reset computaional graph),
# multiple computational graphs can be declared as part of the training process

inputs = tf.placeholder(tf.float32, [None, input_size])
targets = tf.placeholder(tf.float32, [None, output_size])

# stacking layers (4 hidden layers)

weights_1 = tf.get_variable('weights_1', [input_size, hidden_layer_size]) # function used to declare variables. The default initializer is Xacier (Glorot)
# shape of weights is k x m, so 784 x hidden layer size
biases_1 = tf.get_variable('biases_1', [hidden_layer_size])
outputs_1 = tf.nn.relu(tf.matmul(inputs, weights_1) + biases_1)

weights_2 = tf.get_variable('weights_2', [hidden_layer_size, hidden_layer_size])
biases_2 = tf.get_variable('biases_2', [hidden_layer_size])
outputs_2 = tf.nn.tanh(tf.matmul(outputs_1, weights_2) + biases_2) # applying Relu activation to linear combination of inputs and weights plus biases
# tf.nn is a module that contains neural network support, among other also commonly used activation functions
#'''
weights_3 = tf.get_variable('weights_3', [hidden_layer_size, hidden_layer_size])
biases_3 = tf.get_variable('biases_3', [hidden_layer_size])
outputs_3 = tf.nn.tanh(tf.matmul(outputs_2, weights_3) + biases_3)
#'''
'''
weights_4 = tf.get_variable('weights_4', [hidden_layer_size, hidden_layer_size])
biases_4 = tf.get_variable('biases_4', [hidden_layer_size])
outputs_4 = tf.nn.sigmoid(tf.matmul(outputs_3, weights_4) + biases_4)
'''

# output layer

'''
weights_3 = tf.get_variable('weights_3', [hidden_layer_size, output_size])
biases_3 = tf.get_variable('biases_3', [output_size])
outputs = tf.matmul(outputs_2, weights_3) + biases_3 # activation function is not included, since it is common practice to incorporate final activation in the loss
'''
#'''
weights_4 = tf.get_variable('weights_4', [hidden_layer_size, output_size])
biases_4 = tf.get_variable('biases_4', [output_size])
outputs = tf.matmul(outputs_3, weights_4) + biases_4
#'''
'''
weights_5 = tf.get_variable('weights_5', [hidden_layer_size, output_size])
biases_5 = tf.get_variable('biases_5', [output_size])
outputs = tf.matmul(outputs_4, weights_5) + biases_5
'''
# tf.nn.softmax_cross_entropy_with_logits() is a function that applies a softmax activation and calculates a cross-entropy loss
# it is a numericaly stable function. Obraining very small number jeopardizes our model if we do not employ this function

loss = tf.nn.softmax_cross_entropy_with_logits(logits = outputs, labels = targets)
mean_loss = tf.reduce_mean(loss) # tensorflow method to find mean of a tensor
optimize = tf.train.AdamOptimizer(learning_rate = 0.001).minimize(mean_loss)

out_equals_target = tf.equal(tf.argmax(outputs, 1), tf.argmax(targets, 1)) # check if 2 vectors are the same, argmax returns index of max value in vector (row - 1)
accuracy = tf.reduce_mean(tf.cast(out_equals_target, tf.float32)) # we need to change datatype from boolean to float to get accuracy

sess = tf.InteractiveSession()
initializer = tf.global_variables_initializer()
sess.run(initializer)

# batch size = 1 - SGD, batch size = nr. of samples - GD

batch_size = 1000
batches_number = mnist.train._num_examples//batch_size

max_epochs = 50
prev_validation_loss = 9999999. # this value is large enough to ensure the early stopping wont be triggered on the first epoch

start_time = time.time()

for epoch_counter in range(max_epochs):
    
    start_time_epoch = time.time()
    curr_epoch_loss = 0.
        
    # learning part loop
    
    for batch_counter in range(batches_number):
        
        # get inputs and targets of batch
        input_batch, target_batch = mnist.train.next_batch(batch_size) # function that comes with the MNIST data provide, which loads the batches one after the other
        
        _, batch_loss = sess.run([optimize, mean_loss], 
            feed_dict={inputs: input_batch, targets: target_batch})
        
        curr_epoch_loss += batch_loss
    
    
    
    curr_epoch_loss /= batches_number
    
    input_batch, target_batch = mnist.validation.next_batch(mnist.validation._num_examples)
    
    validation_loss, validation_accuracy = sess.run([mean_loss, accuracy], 
        feed_dict={inputs: input_batch, targets: target_batch})
    
    epoch_time = time.time() - start_time_epoch
    
    print('Epoch '+str(epoch_counter+1)+
          '. Mean loss: '+'{0:.3f}'.format(curr_epoch_loss)+
          '. Validation loss: '+'{0:.3f}'.format(validation_loss)+
          '. Validation accuracy: '+'{0:.2f}'.format(validation_accuracy * 100.)+'%'+
         ' Epoch time: ' + '{0:.2f}'.format(epoch_time) +'s')
    
    if validation_loss > prev_validation_loss:
        break
        
    prev_validation_loss = validation_loss

total_training_time = time.time() - start_time

print('End of training. Total training time: ' + '{0:.2f}'.format(total_training_time) + 's')

Epoch 1. Mean loss: 0.420. Validation loss: 0.162. Validation accuracy: 95.20% Epoch time: 7.58s
Epoch 2. Mean loss: 0.131. Validation loss: 0.102. Validation accuracy: 96.94% Epoch time: 7.58s
Epoch 3. Mean loss: 0.084. Validation loss: 0.079. Validation accuracy: 97.54% Epoch time: 7.60s
Epoch 4. Mean loss: 0.053. Validation loss: 0.070. Validation accuracy: 98.08% Epoch time: 7.56s
Epoch 5. Mean loss: 0.037. Validation loss: 0.069. Validation accuracy: 97.94% Epoch time: 7.59s
Epoch 6. Mean loss: 0.024. Validation loss: 0.065. Validation accuracy: 98.06% Epoch time: 7.65s
Epoch 7. Mean loss: 0.015. Validation loss: 0.069. Validation accuracy: 98.18% Epoch time: 8.07s
End of training. Total training time: 53.63s


In [3]:
input_batch, target_batch = mnist.test.next_batch(mnist.test._num_examples)
test_accuracy = sess.run([accuracy], 
    feed_dict={inputs: input_batch, targets: target_batch})

# Test accuracy is a list with 1 value, so we want to extract the value from it, using x[0]
# Uncomment the print to see how it looks before the manipulation
# print (test_accuracy)
test_accuracy_percent = test_accuracy[0] * 100.

# Print the test accuracy formatted in percentages
print('Test accuracy: '+'{0:.2f}'.format(test_accuracy_percent)+'%')

Test accuracy: 97.96%
