Assignment 1 Part 2 - Hayden Barker

In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data


def run_cnn():
    mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

    # Python optimisation variables
    learning_rate = 0.001
    epochs = 10
    batch_size = 100
    i = 0

    # declare the training data placeholders
    # input x - for 28 x 28 pixels = 784 - this is the flattened image data that is drawn from mnist.train.nextbatch()
    x = tf.placeholder(tf.float32, [None, 784])
    # reshape the input data so that it is a 4D tensor.  The first value (-1) tells function to dynamically shape that
    # dimension based on the amount of data passed to it.  The two middle dimensions are set to the image size (i.e. 28
    # x 28).  The final dimension is 1 as there is only a single colour channel i.e. grayscale.  If this was RGB, this
    # dimension would be 3
    x_shaped = tf.reshape(x, [-1, 28, 28, 1])
    # now declare the output data placeholder - 10 digits
    y = tf.placeholder(tf.float32, [None, 10])

   #"""Model function for CNN."""
   # Input Layer
   # Reshape X to 4-D tensor: [batch_size, width, height, channels]
   # MNIST images are 28x28 pixels, and have one color channel
    input_layer = x_shaped
   # Convolutional Layer #1
   # Computes 4 features using a 5x5 filter with ReLU activation.
   # Padding is added to preserve width and height.
   # Input Tensor Shape: [batch_size, 28, 28, 1]
   # Output Tensor Shape: [batch_size, 28, 28, 4]
    conv1 = tf.layers.conv2d(inputs=input_layer, filters=4, kernel_size=[5, 5], padding="same", activation=tf.nn.relu)
   # Pooling Layer #1
   # First max pooling layer with a 2x2 filter and stride of 2
   # Input Tensor Shape: [batch_size, 28, 28, 4]
   # Output Tensor Shape: [batch_size, 14, 14, 4]
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)
   # Convolutional Layer #2
   # Computes 8 features using a 5x5 filter.
   # Padding is added to preserve width and height.
   # Input Tensor Shape: [batch_size, 14, 14, 4]
   # Output Tensor Shape: [batch_size, 14, 14, 8]
    conv2 = tf.layers.conv2d(inputs=pool1, filters=8, kernel_size=[5, 5], padding="same", activation=tf.nn.relu)
   # Pooling Layer #2
   # Second max pooling layer with a 2x2 filter and stride of 2
   # Input Tensor Shape: [batch_size, 14, 14, 8]
   # Output Tensor Shape: [batch_size, 7, 7, 8]
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
   # Flatten tensor into a batch of vectors
   # Input Tensor Shape: [batch_size, 7, 7, 8]
   # Output Tensor Shape: [batch_size, 7 * 7 * 8]
    pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 8])
   # Logit layer
   # Densely connected layer with 10 neurons
   # Input Tensor Shape: [batch_size, 7 * 7 * 8]
   # Output Tensor Shape: [batch_size, 10]
    logits = tf.layers.dense(inputs=pool2_flat, units=10)
    y_ = tf.nn.softmax(logits)
    
    # Create a placeholder for loss to avoid using tensorflow to create it.
    cross_entropy = tf.placeholder(tf.float32)

    # Use this loss the program will run on the first pass through.
    if i == 0:  
        cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))

    # add an optimiser
    optimiser = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cross_entropy)

    # define an accuracy assessment operation
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # setup the initialisation operator
    init_op = tf.global_variables_initializer()

    # Stop using the tensorflow cross entropy
    i = 1
    with tf.Session() as sess:
        # initialise the variables
        sess.run(init_op)
        total_batch = int(len(mnist.train.labels) / batch_size)
        for epoch in range(epochs):
            avg_cost = 0
            for i in range(total_batch):
                
                # create batchs
                batch_x, batch_y = mnist.train.next_batch(batch_size=batch_size)
                # get the logits and the labesl for the batch
                logi, labels = sess.run([logits, y], feed_dict={x: batch_x, y: batch_y})
                # calculate the loss outside of tensforflow
                loss = calcLoss(logi, labels, 0.1, 0.1)
                # feed loss back into optimiser for update
                opt = sess.run([optimiser], feed_dict={x: batch_x, y: batch_y, cross_entropy: loss})
                avg_cost += loss / total_batch
                
            test_acc = sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels})
            print("Epoch:", (epoch + 1), "cost =", "{:.3f}".format(avg_cost), " test accuracy: {:.3f}".format(test_acc))

        print("\nTraining complete!")
        print(sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels}))

def calcLoss(logits, labels, sigma, alpha):
    
    loss = 0
    
    # fix the labels so that they will play nice in numpy
    labels = np.expand_dims(labels, axis=0)
    labels = np.expand_dims(labels, axis=1)
    while len(labels.shape) > 2:
        labels = labels[0]
        
    # get the correct class
    labels = np.argmax(labels, 1)
    
    # fix the logits so that they will play nice in numpy
    while len(logits.shape) > 2:
        logits = logits[0] 

    # get the random numbers for ES
    e = np.random.normal(size = [1, 10])
    e = np.repeat(e, 100, 0)
    
    # add the jitter to the logits
    newLogits = logits + (sigma * e)
    
    # get predictions based on the logtis with jitter
    predictions = np.argmax(newLogits, axis = 1)
    acc = np.equal(predictions,labels).astype(int)
    acc = np.expand_dims(acc, axis = 1)
    acc = np.repeat(acc,10,1)
    
    # get the update for every prediction that was correct and sum them
    goodTheta = np.multiply(acc, e)
    
    theSum = np.sum(goodTheta, axis=0)
    
    # add the multipliers to the sum
    multiplier = 1 / 100
    multiplier = multiplier / sigma
    multiplier = alpha * multiplier
    multiplier = multiplier * theSum
    
    # create the update that will be used as theta t+1
    update = np.add(logits, multiplier)

    # get the euclidean norm of the old logits - the updated ones
    loss = loss + np.linalg.norm(np.subtract(logits,update))
    #print(loss)
    
    return loss


if __name__ == "__main__":
    run_cnn()

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.

Epoch: 1 cost = 26.951  test accuracy: 0.949
Epoch: 2 cost = 29.476  test accuracy: 0.968
Epoch: 3 cost = 29.959  test accuracy: 0.973
Epoch: 4 cost = 29.963  test accuracy: 0.975
Epoch: 5 cost = 29.818  test accuracy: 0.979
Epoch: 6 cost = 29.954  test accuracy: 0.980
Epoch: 7 cost = 30.337  test accuracy: 0.982
Epoch: 8 cost = 30.425  test accuracy: 0.984
Epoch: 9 cost = 30.202  test accuracy: 0.984
Epoch: 10 cost = 29.533  test accuracy: 0.984

Training complete!
0.9842


Results from a good run of the model with the current parameters (there is some fluctuation as expected):
    
Epoch: 1 cost = 26.556  test accuracy: 0.951
Epoch: 2 cost = 29.972  test accuracy: 0.970
Epoch: 3 cost = 29.281  test accuracy: 0.977
Epoch: 4 cost = 29.999  test accuracy: 0.977
Epoch: 5 cost = 30.225  test accuracy: 0.979
Epoch: 6 cost = 30.304  test accuracy: 0.983
Epoch: 7 cost = 30.156  test accuracy: 0.984
Epoch: 8 cost = 30.986  test accuracy: 0.985
Epoch: 9 cost = 30.069  test accuracy: 0.985
Epoch: 10 cost = 30.268  test accuracy: 0.985

Training complete!
0.9853