In [1]:
import tensorflow as tf
import numpy as np
import timeit
from sklearn.utils import shuffle
old_v = tf.logging.get_verbosity()
tf.logging.set_verbosity(tf.logging.ERROR)

<h1>Extract MNIST data</h1>

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
#get mnist data, with one_hot encoding, reshape = False (that means images are not flatten)
mnist = input_data.read_data_sets("MNIST_data/",reshape=False,one_hot=True)
#suppress warnings
tf.logging.set_verbosity(old_v)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


<h1>Prepare training, validation and testing data</h1>

In [3]:
x_train, y_train           = mnist.train.images, mnist.train.labels
x_validation, y_validation = mnist.validation.images, mnist.validation.labels
x_test, y_test             = mnist.test.images, mnist.test.labels

#pad images with 0s (28x28 to 32x32)
# did it within lenet5 function


<h1>Define hyperparameter</h1>

In [4]:
lr=0.01
epochs=10
batch_size=256
num_classes=10

In [5]:
tf.reset_default_graph()

<h1>Placeholder</h1>

In [6]:
he_init=tf.contrib.layers.variance_scaling_initializer()

X = tf.placeholder(tf.float32, [None, 28, 28, 1], name="X")
Y = tf.placeholder(tf.int64, [None, num_classes], name="Y")

<h1>Define LeNet-5</h1>

In [7]:
def CNN(X):

    # Reshape input to 4-D vector
    input_layer = tf.reshape(X, [-1, 28, 28, 1])

    # Padding the input to make it 32x32
    padded_input = tf.pad(input_layer, [[0, 0], [2, 2], [2, 2], [0, 0]], "CONSTANT") 

    # Convolutional Layer #1
    # Output: 28 * 28 * 6
    conv1 = tf.layers.conv2d(
      inputs=padded_input,
      filters=6, # Number of filters.
      kernel_size=5, # Size of each filter is 5x5.
      padding="valid", # No padding is applied to the input.
      activation=tf.nn.relu,
      kernel_initializer=he_init)

    # Pooling Layer #1
    # Output: 14 * 14 * 6
    pool1 = tf.layers.average_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

    # Convolutional Layer #2
    # Output: 10 * 10 * 16
    conv2 = tf.layers.conv2d(
      inputs=pool1,
      filters=16, # Number of filters
      kernel_size=5, # Size of each filter is 5x5
      padding="valid", # No padding
      activation=tf.nn.relu,
      kernel_initializer=he_init)

    # Pooling Layer #2
    # Output: 5 * 5 * 16
    pool2 = tf.layers.average_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)

    # Reshaping output
    pool2_flat = tf.reshape(pool2, [-1, 5 * 5 * 16])

    # Fully connected layer #1
    dense1 = tf.layers.dense(inputs=pool2_flat, units=120, activation=tf.nn.relu,kernel_initializer=he_init)

    # Fully connected layer #2
    dense2 = tf.layers.dense(inputs=dense1, units=84, activation=tf.nn.relu,kernel_initializer=he_init)

    # Output layer
    logits = tf.layers.dense(inputs=dense2, units=10, kernel_initializer=None)

    return logits

<h1>Cost and optimization</h1>

In [8]:
logits = CNN(X)
softmax = tf.nn.softmax(logits)

# Convert our labels into one-hot-vectors
#labels = tf.one_hot(indices=tf.cast(Y, tf.int32), depth=10)

# Compute the cross-entropy loss
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,
                                                                 labels=Y))

# Use adam optimizer to reduce cost
optimizer = tf.train.AdamOptimizer(learning_rate=lr)
train_op = optimizer.minimize(cost)


# For testing and prediction
predictions = tf.argmax(softmax, axis=1)
correct_prediction = tf.equal(tf.argmax(logits,1),tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

# Initialize all the variables
init = tf.global_variables_initializer()

Instructions for updating:
Use keras.layers.conv2d instead.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.average_pooling2d instead.
Instructions for updating:
Use keras.layers.dense instead.


<h1>Training, validating, testing</h1>
<h2>1. Print out validation accuracy after each training epoch</h2>
<h2>2. Print out training time on each epoch</h2>
<h2>3. Print out testing accuracy</h2>

In [9]:
with tf.Session() as sess:
 
    sess.run(init)

    for epoch in range(epochs):
        num_samples = x_train.shape[0]
        num_batches = (num_samples // batch_size) + 1
        epoch_cost = 0.
        i = 0
        while i < num_samples:
            batch_x = x_train[i:i+batch_size,:]
            batch_y = y_train[i:i+batch_size]

            i += batch_size

            # Train on batch and get back cost
            _, c = sess.run([train_op, cost], feed_dict={X:batch_x, Y:batch_y})
            epoch_cost += (c/num_batches)

        # Get accuracy for validation
        valid_accuracy = accuracy.eval(
            feed_dict={X:x_validation, Y:y_validation})

        print ("Epoch {}: Cost: {}".format(epoch+1, epoch_cost))
        print("Validation accuracy: {}".format(valid_accuracy))

    test_accuracy = accuracy.eval(feed_dict={X:x_test, Y:y_test})
    
    print("Testing accuracy: {}".format(test_accuracy))

Epoch 1: Cost: 0.2148139478443839
Validation accuracy: 0.9649999737739563
Epoch 2: Cost: 0.06796196885406969
Validation accuracy: 0.9814000129699707
Epoch 3: Cost: 0.04771079716250995
Validation accuracy: 0.9810000061988831
Epoch 4: Cost: 0.03895494824567768
Validation accuracy: 0.98580002784729
Epoch 5: Cost: 0.03498408783357157
Validation accuracy: 0.9868000149726868
Epoch 6: Cost: 0.029689572844146694
Validation accuracy: 0.9807999730110168
Epoch 7: Cost: 0.024093717408314383
Validation accuracy: 0.9878000020980835
Epoch 8: Cost: 0.025010774639397198
Validation accuracy: 0.9860000014305115
Epoch 9: Cost: 0.025876719384456917
Validation accuracy: 0.9837999939918518
Epoch 10: Cost: 0.02532572560592725
Validation accuracy: 0.9801999926567078
Testing accuracy: 0.9797999858856201
