In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import tensorflow as tf 
from tensorflow.examples.tutorials.mnist import input_data

# Define path to TensorBoard log files
logPath = "./tensorboard_logTB/"

In [3]:
# TensorBoard visualization.  
# From https://www.tensorflow.org/get_started/summaries_and_tensorboard
def variable_summaries(var):
    with tf.name_scope('summaries'):
        mean = tf.reduce_mean(var)
    tf.summary.scalar('mean', mean)
    with tf.name_scope('stddev'):
        stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
    tf.summary.scalar('stddev', stddev)
    tf.summary.scalar('max', tf.reduce_max(var))
    tf.summary.scalar('min', tf.reduce_min(var))
    tf.summary.histogram('histogram', var)

In [4]:
# Fetch MNIST data set and one_hot encode labels. 
# Using TensorFlow example library.
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [5]:
# Print to verify the shapes of the data set. 
print('number of train images', mnist.train.images.shape)
print('number of test images', mnist.test.images.shape)

print('train labels', mnist.train.labels.shape)
print('test labels', mnist.test.labels.shape)

number of train images (55000, 784)
number of test images (10000, 784)
train labels (55000, 10)
test labels (10000, 10)


In [6]:
# Implement weight, bias and CNN layers. 
def weights(shape, name=None):
    initialValue = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initialValue, name=name)

def bias(shape, name=None):
    initialValue = tf.constant(0.1, shape=shape)
    return tf.Variable(initialValue, name=name)

def convolutional(x, W, name=None):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME',name=name)

def maxpooling(x, name=None):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME', name=name)



# Jupyter Notebook: Using Interactive session to avoid having to pass session. 
sess = tf.InteractiveSession()

# Define placeholders for MNIST input data and make start a scope for Tensorboard
with tf.name_scope("MNIST_Input"):
    x = tf.placeholder(tf.float32, shape=[None, 784], name="x")
    y = tf.placeholder(tf.float32, [None, 10], name="y")  

# Reshape input data back to greyscale matrix 28x28x1 that is used by the convolutional model. 
# Create next scope
with tf.name_scope("Input_Reshape"):
    xImage = tf.reshape(x, [-1,28,28,1], name="xImage")
    tf.summary.image('input_img', xImage, 5)

# Define the Model
# First Convolution layer with pooling to avoid overfitting
with tf.name_scope('First_Convolutional_Layer'):
    # Creates 32 features for each 5X5 patch of the image
    with tf.name_scope('weights'):
        weightsConv1 = weights([5, 5, 1, 32], name="weight")
        variable_summaries(weightsConv1)
    with tf.name_scope('biases'):
        biasConv1 = bias([32], name="bias")
        variable_summaries(biasConv1)
    # Do convolution on images, add bias and push through RELU activation
    conv1_wx_b = convolutional(xImage, weightsConv1,name="convolutional") + biasConv1
    tf.summary.histogram('conv1_wx_b', conv1_wx_b)
    conv1Relu = tf.nn.relu(conv1_wx_b, name="relu")
    tf.summary.histogram('conv1Relu', conv1Relu)
    # take results and run through max_pool
    conv1maxpooling = maxpooling(conv1Relu, name="pool")

# Second Convolution layer with pooling to avoid overfitting
with tf.name_scope('Second_Convolutional_Layer'):
# Process the 32 features from Convolution layer 1, in 5 X 5 patch.  return 64 features weights and biases
    with tf.name_scope('weights'):
        weightsConv2 = weights([5, 5, 32, 64], name="weight")
        variable_summaries(weightsConv2)
    with tf.name_scope('biases'):
        biasConv2 = bias([64], name="bias")
        variable_summaries(biasConv2)
    # Do convolution of the output of the 1st convolution layer.  Pool results 
    conv2_wx_b = convolutional(conv1maxpooling, weightsConv2, name="convolutional") + biasConv2
    tf.summary.histogram('conv2_wx_b', conv2_wx_b)
    conv2Relu = tf.nn.relu(conv2_wx_b, name="relu")
    tf.summary.histogram('conv2Relu', conv2Relu)
    conv2maxpooling = maxpooling(conv2Relu, name="pool")

# Fully Connected Layer
with tf.name_scope('Fully_Connected_Layer'):
  
    weightFC1 = weights([7 * 7 * 64, 1024], name="weight")
    biasFC1 = bias([1024], name="bias")
    # Connect output of pooling layer 2 as input to full connected layer
    conv2maxpooling_flat = tf.reshape(conv2maxpooling, [-1, 7*7*64])
    fc1 = tf.nn.relu(tf.matmul(conv2maxpooling_flat, weightFC1) + biasFC1, name="relu")

# dropout some neurons to reduce overfitting
keep_prob = tf.placeholder(tf.float32, name="keep_prob")  # get dropout probability as a training input.
fc1_drop = tf.nn.dropout(fc1, keep_prob)

# Readout layer
with tf.name_scope("Readout"):
    weightFC2 = weights([1024, 10], name="weight")
    biasFC2 = bias([10], name="bias")

# Define model
y_cnn_model  = tf.matmul(fc1_drop, weightFC2) + biasFC2

# Loss measurement
with tf.name_scope("cross_entropy"):
    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_cnn_model , labels=y))

# loss optimization. Using AdamOptimizer https://arxiv.org/pdf/1412.6980.pdf
# Define training: 
with tf.name_scope("loss_optimizer"):
    train_step = tf.train.AdamOptimizer(1e-3).minimize(cross_entropy)

with tf.name_scope("accuracy"):
    # What is correct
    correct_prediction = tf.equal(tf.argmax(y_cnn_model ,1), tf.argmax(y,1))
    # How accurate is it?
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

tf.summary.scalar("cross_entropy_loss", cross_entropy)
tf.summary.scalar("training_accuracy", accuracy)

# TensorBoard - Merge summaries 
summarize_all = tf.summary.merge_all()

# Initialize all of the variables
sess.run(tf.global_variables_initializer())

# TensorBoard - Write the default graph out so we can view it's structure
tbWriter = tf.summary.FileWriter(logPath, sess.graph)

# Train the model
import time

#  define number of steps and variable when progress is displayed
numberSteps = 2000
progressDisplay = 100

# Start timer
start_time = time.time()
end_time = time.time()
for i in range(numberSteps):
    batch = mnist.train.next_batch(50)
    #print(batch[1].shape)
    _, summary = sess.run([train_step, summarize_all], feed_dict={x: batch[0], y: batch[1], keep_prob: 0.5})


    # Periodic status display
    if i%progressDisplay == 0:
        train_accuracy = accuracy.eval(feed_dict={
            x:batch[0], y: batch[1], keep_prob: 1.0})
        end_time = time.time()
        print("step {0}, elapsed time {1:.2f} seconds, training accuracy {2:.3f}%".format(i, end_time-start_time, train_accuracy*100.0))
        # write summary to log
        tbWriter.add_summary(summary,i)


# Display summary 
# Time to train and test accuracy 
end_time = time.time()
print("Total training time for {0} batches: {1:.2f} seconds".format(i+1, end_time-start_time))
# Accuracy on test data
print("Test accuracy {0:.3f}%".format(accuracy.eval(feed_dict={
    x: mnist.test.images, y: mnist.test.labels, keep_prob: 1.0})*100.0))


Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.

step 0, elapsed time 3.06 seconds, training accuracy 22.000%
step 100, elapsed time 170.24 seconds, training accuracy 96.000%
step 200, elapsed time 340.09 seconds, training accuracy 98.000%
step 300, elapsed time 506.76 seconds, training accuracy 100.000%
step 400, elapsed time 669.59 seconds, training accuracy 98.000%
step 500, elapsed time 832.14 seconds, training accuracy 98.000%
step 600, elapsed time 996.46 seconds, training accuracy 98.000%
step 700, elapsed time 1174.50 seconds, training accuracy 100.000%
step 800, elapsed time 1368.83 seconds, training accuracy 100.000%
step 900, elapsed time 1556.97 seconds, training accuracy 100.000%
step 1000, elapsed time 1753.54 seconds, training accuracy 100.000%
step 1100, elapsed time 1964.04 seconds, training accuracy 100.000%
step 1200, elapsed 