Input data

In [11]:
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets("MNIST_data/", reshape=False)
X_train, y_train           = mnist.train.images, mnist.train.labels
X_validation, y_validation = mnist.validation.images, mnist.validation.labels
X_test, y_test             = mnist.test.images, mnist.test.labels

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [12]:
from tqdm import tqdm
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from tensorflow.contrib.layers import flatten


Pad image with zeros

In [13]:
X_train      = np.pad(X_train, ((0,0),(2,2),(2,2),(0,0)), 'constant')
X_validation = np.pad(X_validation, ((0,0),(2,2),(2,2),(0,0)), 'constant')
X_test       = np.pad(X_test, ((0,0),(2,2),(2,2),(0,0)), 'constant')

In [14]:
EPOCHS = 10
BATCH_SIZE = 128

In [15]:
def neural(x):
    # Arguments used for tf.truncated_normal, randomly defines variables for the weights and biases for each layer
    mu = 0
    sigma = 0.1

    # TODO: Layer 1: Convolutional. Input = 32x32x1. Output = 28x28x6.
    w1 = tf.Variable(tf.truncated_normal([5, 5, 1, 6], mean = mu, stddev = sigma))
    b1 = tf.Variable(tf.zeros([6]))
    conv1 = tf.nn.conv2d(x, w1, strides = [1, 1, 1, 1], padding = 'VALID')
    conv1 = tf.nn.bias_add(conv1, b1)

    # TODO: Activation.
    conv1 = tf.nn.relu(conv1)
    conv1 = tf.nn.dropout(conv1, keep_prob = prob)

    # TODO: Pooling. Input = 28x28x6. Output = 14x14x6.
    pool1 = tf.nn.max_pool(conv1, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'VALID')

    # TODO: Flatten. Input = 14x14x6. Output = 1176.
    flat = flatten(pool1)

    # TODO: Layer 3 Input = 1176. Output = 120.
    w3 = tf.Variable(tf.truncated_normal([1176, 120], mean = mu, stddev = sigma))
    b3 = tf.Variable(tf.zeros([120]))
    full1 = tf.matmul(flat, w3)
    full1 = tf.nn.bias_add(full1, b3)

    # TODO: Activation.
    full1 = tf.nn.relu(full1)

    # TODO: Layer 4: Fully Connected. Input = 120. Output = 10.
    w4 = tf.Variable(tf.truncated_normal([120, 10], mean = mu, stddev = sigma))
    b4 = tf.Variable(tf.zeros([10]))
    full2 = tf.matmul(full1, w4)
    full2 = tf.nn.bias_add(full2, b4)

    # TODO: Activation.
    logits = tf.nn.softmax(full2)
    
    return logits

Define placeholders to feed data into the neural network

In [16]:
x = tf.placeholder(tf.float32, (None, 32, 32, 1), 'input')
y = tf.placeholder(tf.int32, (None, ), 'labels')
prob = tf.placeholder(tf.float32, (None), 'dropout_prob')
one_hot_y = tf.one_hot(y, 10)

Define how to train the network

In [17]:
rate = 0.001

logits = neural(x)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = one_hot_y)
loss_operation = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdamOptimizer(learning_rate = rate)
training_operation = optimizer.minimize(loss_operation)

Get the predictions and calculate accuracy

In [18]:
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_y, 1))
accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
saver = tf.train.Saver()

Summarise the data for TensorBoard

In [19]:
rate_summary = tf.summary.scalar('learning_rate', rate)

accuracy_summary = tf.summary.scalar('accuracy', accuracy_operation)

merged = tf.summary.merge([rate_summary, accuracy_summary])

writer = tf.summary.FileWriter('./summary', sess.graph)

Evaluate the model

In [20]:
def evaluate(X_data, y_data):
    num_examples = len(X_data)
    total_accuracy = 0
    sess = tf.get_default_session()
    for offset in range(0, num_examples, BATCH_SIZE):
        batch_x, batch_y = X_data[offset:offset+BATCH_SIZE], y_data[offset:offset+BATCH_SIZE]
        accuracy = sess.run(accuracy_operation, feed_dict={x: batch_x, y: batch_y, prob: 1.})
        total_accuracy += (accuracy * len(batch_x))

        # Update learning rate if needed

    return total_accuracy / num_examples

Note: Use of `with`

In [21]:
pbar = tqdm(total = EPOCHS * (len(X_train) + 1)//BATCH_SIZE)

  0%|          | 0/4296 [00:00<?, ?it/s]

In [22]:
with tf.Session() as sess:

    sess.run(tf.global_variables_initializer())

    num_examples = len(X_train)

    count = 0

    print("Training...")

    for i in range(EPOCHS):
        
        X_train, y_train = shuffle(X_train, y_train)

        for offset in range(0, num_examples, BATCH_SIZE):
            end = offset + BATCH_SIZE
            batch_x, batch_y = X_train[offset:end], y_train[offset:end]
            
            _, summary = sess.run([training_operation, merged], feed_dict={x: batch_x, y: batch_y, prob: 0.5})
            
            writer.add_summary(summary, count)

            count += 1
            
            pbar.update(1)

        validation_accuracy = evaluate(X_validation, y_validation)
        print("EPOCH {} ...".format(i+1))
        print("Validation Accuracy = {:.3f}".format(validation_accuracy))
        print('\n')

    saver.save(sess, './neural')
    print("Model saved")

Training...


 10%|█         | 430/4296 [00:32<04:06, 15.71it/s]

EPOCH 1 ...
Validation Accuracy = 0.942




 20%|██        | 860/4296 [01:05<04:12, 13.60it/s]

EPOCH 2 ...
Validation Accuracy = 0.955




 21%|██        | 882/4296 [01:08<04:42, 12.08it/s]

KeyboardInterrupt: 

Test accuracy

In [None]:
with tf.Session() as sess:
    saver.restore(sess, tf.train.latest_checkpoint('.'))
    print("Test Accuracy = {:.3f}".format(test_accuracy))