In [8]:
## Import Data

"""A deep MNIST classifier using convolutional layers.

See extensive documentation at
https://www.tensorflow.org/get_started/mnist/pros
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function


from tensorflow.examples.tutorials.mnist import input_data

import tensorflow as tf

mnist = input_data.read_data_sets("/Users/Adam/MNIST_data/",one_hot=True)


Extracting /Users/Adam/MNIST_data/train-images-idx3-ubyte.gz
Extracting /Users/Adam/MNIST_data/train-labels-idx1-ubyte.gz
Extracting /Users/Adam/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting /Users/Adam/MNIST_data/t10k-labels-idx1-ubyte.gz


In [9]:
#Define Various models for experimentation

##1. Simple Fully Connected NN with relu acitvation
def SimpleNN(x, training_phase):
    h1 = FullyConnected(x, 100, 'h1')
#    h1_norm = tf.layers.batch_normalization(h1, training=trainingPhase,  name='bn1')
    h1_relu = tf.nn.sigmoid(h1)
    y = FullyConnected(h1_relu, 10, 'y')
    return y 

##2. Fully Connected NN wirth relu activation and batch normalisation to avoid covariance shift
# This is the first model that actually uses the training_phase parameter
def SimpleNN(input, training_phase):
    h1 = FullyConnected(input, 100, 'h1')
  
    h1_norm = tf.layers.batch_normalization(h1, training=trainingPhase,  name='bn1')
    h1_relu = tf.nn.relu(h1_norm)
    
    h2 = FullyConnected(h1_relu, 100, 'h2')
    h2_norm = tf.layers.batch_normalization(h2, training=trainingPhase, name='bn2')
    h2_relu = tf.nn.relu(h2_norm)
    
    h3 = FullyConnected(h2_relu, 100, 'h3')
        
    h4 = FullyConnected(h3, 10, 'h4')
    return h4

##3. Convolutional Neural Network -  5x5 kernel with dropout
def Convolutional_simpleNN(input, trainin_phase, keep_prob):
    rs = tf.reshape(input, [-1, 28, 28, 1])

    # 5x5 kernel
    # operates on one input channel
    # outputs depth of 16 channels (16 kernels)
    # Image will be padded if necessary for kernel
    h1_conv = Convolutional(rs, [5, 5, 1, 32], 'h1_conv', padding='SAME') # 28 x 28 -> 28 x 28
    h1_relu = tf.nn.relu(h1_conv)
    h1_pool = max_pool(h1_relu, 'h1_pool', kernel_size=2, strides=2) # 28 x 28 -> 14 x 14
    
    #2nd conv layer
    h2_conv = Convolutional(h1_pool, [5, 5, 32, 64], 'h2_conv', padding='SAME') # 14 x 14 -> 14 x 14
    h2_relu = tf.nn.relu(h2_conv)
    h2_pool = max_pool(h2_relu, 'h2_pool', kernel_size=2, strides=2) # 14 x 14 -> 7 x 7    
    #reshape before hitting with FC layer
    h_pool2_flat = tf.reshape(h2_pool, [-1, 7*7*64])    
    
    #1st FullyConnected Layer
    FC1 = FullyConnected(h_pool2_flat, 1024, 'FC1')
    # Dropout - controls the complexity of the model, prevents co-adaptation of
    # features.
    FC1_drop = tf.nn.dropout(FC1, keep_prob)
    
    # Map the 1024 features to 10 classes, one for each digit
    FC2 = FullyConnected(FC1_drop,10, 'FC2')
    
    return FC2


## Useful Wrapper functions
def FullyConnected(x, output_size, name, initializer=tf.contrib.layers.variance_scaling_initializer(uniform=True)):
    ''' 
    Wrapper for Fully Connected layer - takes care of variable scoping
    '''
    input_size = int(x.get_shape()[1])
    
    #variable scoping
    with tf.variable_scope(name):
        weights = tf.get_variable('weights', [input_size, output_size], initializer=initializer)
        biases = tf.get_variable("biases", [output_size], initializer=tf.constant_initializer(0.01))
        
        # Add the parameters to Tensorboard so we can visualise them later
        tf.summary.histogram('weights', weights)
        tf.summary.histogram('biases', biases)
        
        return tf.add(tf.matmul(x, weights), biases)


## Wrapper for 2D convolutional layer
def Convolutional(x, kernel_shape, name, 
                 padding='SAME', 
                 strides=1, 
                 initializer=tf.contrib.layers.variance_scaling_initializer(uniform=True)):
    '''
    Wrapper for a 2d convolutional layer
    params: x - The input data, should be a 4D tensor
            kernel_shape - The shape of the convolution kernel. Shape should be as follows:
                            [kernel_width, kernel_height, input_depth, num_kernels]
                            input_depth is number of input channels (i.e 1 for black and white)
                            num_kernels is the number of output channels.
            name - to take care of variable scopring in tf
            padding - padding scheme: Must be the string 'SAME' or 'VALID'. Valid padding will pad the input matrix with zeros 
            the minimum required to ensure dimensions are consitent with convolution of kernels. Same padding will pad the input 
            matrix such that the output of convolution with the kernel has the same dimensions as the input
            strides - How much the kernel should slide in each step. We assume width=height
            initializer - initialisation scheme for our weights
    '''
    bias_shape = kernel_shape[3]
    with tf.variable_scope(name):
        weights = tf.get_variable('weights', kernel_shape, initializer=initializer)
        biases = tf.get_variable("biases", bias_shape,initializer=tf.constant_initializer(0.01))
        
        # Add the parameters to Tensorboard so we can visualise them later
        tf.summary.histogram('weights', weights)
        tf.summary.histogram('biases', biases)
        
        x = tf.nn.conv2d(x, weights, strides=[1, strides, strides, 1], padding=padding)
        return x + biases

## Wrapper for max pooling
def max_pool(x, name, padding='VALID', strides=2, kernel_size=2):
    '''
    Wrapper for a maxpool convolutional layer. Performs pooling as to reduce dimensions of output of conv. layer
    Only using square kernels
    '''
    with tf.variable_scope(name):
        return tf.nn.max_pool(x, ksize=[1, kernel_size, kernel_size, 1], strides=[1, strides, strides, 1], padding=padding)

## Utility function for finding number of params of NN
def count_params():
    total_parameters = 0
    for variable in tf.trainable_variables():
        shape = variable.get_shape()
        variable_parametes = 1
        for dim in shape:
            variable_parametes *= dim.value
        total_parameters += variable_parametes
    print("There are " +str(total_parameters) + " parameters.")


In [10]:
##define data inputs
tf.reset_default_graph()
## size of input images: 28 * 28 = 784 
x_input = tf.placeholder(tf.float32, [None, 784])
## size of number of classes 0, 1, ..., 9
y_input = tf.placeholder(tf.float32, [None, 10])
##trainingPhase - used in caluclation of Batch statistics for batch normalisation
trainingPhase = tf.placeholder(tf.bool, name='phase')
keep_prob = tf.placeholder(tf.float32)

#simpleNN
prediction = SimpleNN(x_input, trainingPhase)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y_input))

correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(y_input, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

train_step = tf.train.AdamOptimizer(10e-4).minimize(cost)

#convNN
prediction_conv = Convolutional_simpleNN(x_input, trainingPhase, keep_prob)
cost_conv = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction_conv, labels=y_input))

correct_prediction_conv = tf.equal(tf.argmax(prediction_conv, 1), tf.argmax(y_input, 1))
accuracy_conv = tf.reduce_mean(tf.cast(correct_prediction_conv, tf.float32))

train_step_conv = tf.train.AdamOptimizer(10e-4).minimize(cost_conv)


#summary scalars
training_loss = tf.summary.scalar("test_loss", cost)
training_accuracy = tf.summary.scalar("test_accuracy", accuracy)

training_loss_conv = tf.summary.scalar("test_loss", cost_conv)
training_accuracy_conv = tf.summary.scalar("test_accuracy", accuracy_conv)

count_params()

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    writer = tf.summary.FileWriter("/Users/Adam/Documents/Python/mnist/SimpleNN", tf.get_default_graph())
    writer_conv = tf.summary.FileWriter("/Users/Adam/Documents/Python/mnist/ConvNN", tf.get_default_graph())
    
      # Train
    for i in range(10000):
        batch_xs, batch_ys = mnist.train.next_batch(128)
        sess.run(train_step, feed_dict={x_input: batch_xs, y_input: batch_ys, trainingPhase: True})
        sess.run(train_step_conv, feed_dict={x_input: batch_xs, y_input: batch_ys, trainingPhase: True, keep_prob: 0.5})
        if i % 100 == 0:
            test_x, test_y = mnist.test.next_batch(128)
            test_loss, test_accuracy, accPrint = sess.run([training_loss, training_accuracy, accuracy], feed_dict={x_input: test_x, y_input: test_y, trainingPhase: False})
            test_loss_conv, test_accuracy_conv, accPrint_conv = sess.run([training_loss_conv, training_accuracy_conv, accuracy_conv], feed_dict={x_input: test_x, y_input: test_y, trainingPhase: False, keep_prob: 1})
            writer.add_summary(test_loss, i) 
            writer.add_summary(test_accuracy, i) 
            writer.add_summary(test_loss_conv, i) 
            writer.add_summary(test_accuracy_conv, i) 
            print(' step %6d: - test accuracy simple =  %6.3f, test accuracy conv = %6.3f' % (i, accPrint, accPrint_conv))
    writer.flush()
    writer_conv.flush()
    writer.close()
    writer_conv.close()

# Test trained model
    test_acc = sess.run(accuracy, feed_dict={x_input: mnist.test.images,
                                      y_input: mnist.test.labels,
                                      trainingPhase: False})
    test_acc_conv = sess.run(accuracy_conv, feed_dict={x_input: mnist.test.images,
                                      y_input: mnist.test.labels,
                                      trainingPhase: False, keep_prob:1.0})
    print("SIMPLE NN Test Accuracy: %6.3f  -  CONV NN Test Accuracy: %6.3f"  % (test_acc, test_acc_conv))
    


There are 3374744 parameters.
 step      0: - test accuracy simple =   0.148, test accuracy conv =  0.211
 step    100: - test accuracy simple =   0.695, test accuracy conv =  0.977
 step    200: - test accuracy simple =   0.859, test accuracy conv =  0.977
 step    300: - test accuracy simple =   0.859, test accuracy conv =  0.977
 step    400: - test accuracy simple =   0.883, test accuracy conv =  0.984
 step    500: - test accuracy simple =   0.922, test accuracy conv =  0.953
 step    600: - test accuracy simple =   0.930, test accuracy conv =  0.984
 step    700: - test accuracy simple =   0.891, test accuracy conv =  0.969
 step    800: - test accuracy simple =   0.789, test accuracy conv =  0.977
 step    900: - test accuracy simple =   0.812, test accuracy conv =  0.969
 step   1000: - test accuracy simple =   0.859, test accuracy conv =  0.977
 step   1100: - test accuracy simple =   0.844, test accuracy conv =  0.977
 step   1200: - test accuracy simple =   0.773, test accur