## Deep Learning modeling on MNIST data set

We will use MNIST data set to perform multi-class  (10 class labels) classification of digit data using Tensorflow.

In [0]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import math
import os

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

In [0]:
# Reset any previous graphs and start session
tf.reset_default_graph()
sess = tf.Session()

In [0]:
# Restart TensorBoard to see the result
LOGDIR = './graphs'

In [4]:
# Set input data (one-hot-encode features)
mnist = input_data.read_data_sets('/tmp/data', one_hot=True)

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting /tmp/data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [0]:
# Number of neurons in each hidden layer
HIDDEN1_SIZE = 500
HIDDEN2_SIZE = 250

NUM_CLASSES = 10
NUM_PIXELS = 28 * 28

# Experiment with the nubmer of training steps to 
# see the effect
TRAIN_STEPS = 2000
BATCH_SIZE = 100

# Set learning rate
LEARNING_RATE = 0.001

In [0]:
# Define inputs
with tf.name_scope('input'):
    images = tf.placeholder(tf.float32, [None, NUM_PIXELS],  name="pixels")
    labels = tf.placeholder(tf.float32, [None, NUM_CLASSES], name="labels")

In [0]:
# Function to create a Fully Connected (FC) layer
def fc_layer(input, size_out, name="fc", activation=None):
  
    with tf.name_scope(name):
        size_in = int(input.shape[1])
        
        w = tf.Variable(tf.truncated_normal([size_in, size_out], stddev=0.1), name="weights")
        b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="bias")
        
        wx_plus_b = tf.matmul(input, w) + b
        
        if activation: 
          return activation(wx_plus_b)
        
        return wx_plus_b
      
# The way we initialize variables has an affect on how quickly 
# training converges. We may explore with different strategies later.
# w = tf.Variable(tf.truncated_normal(shape=[size_in, size_out], stddev=1.0 / math.sqrt(float(size_in))))

In [8]:
# Define the model

# Create two fully connected layers, with ReLU activations
fc1 = fc_layer(images, HIDDEN1_SIZE, "fc1", activation=tf.nn.relu)
fc2 = fc_layer(fc1, HIDDEN2_SIZE, "fc2", activation=tf.nn.relu)

# Next, apply Dropout to the second layer
# This can help prevent overfitting
keep_prob = 0.9
dropped = tf.nn.dropout(fc2, rate=(1 - keep_prob)) 

# Finally, we'll calculate logists. This will be
# the input to our Softmax function. 
y = fc_layer(dropped, NUM_CLASSES, name="output")

Instructions for updating:
Colocations handled automatically by placer.


In [9]:
# Define loss and an optimizer
with tf.name_scope("loss"):
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=labels))
    tf.summary.scalar('loss', loss)

with tf.name_scope("optimizer"):
    # TensorFlow will still automatically analyze our graph
    # and determine how to adjust the variables to decrease the loss.
    train = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss)

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



In [0]:
# Define evaluation
with tf.name_scope("evaluation"):
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(labels, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # summarize sample evaluation
    tf.summary.scalar('accuracy', accuracy)

In [0]:
# Set up logging.
# We'll use a second FileWriter to summarize accuracy on
# the test set. This will let us display it nicely in TensorBoard.
train_writer = tf.summary.FileWriter(os.path.join(LOGDIR, "train"))
train_writer.add_graph(sess.graph)

test_writer  = tf.summary.FileWriter(os.path.join(LOGDIR, "test"))
summary_op   = tf.summary.merge_all()

In [0]:
# Run TensorFlow graph session
sess.run(tf.global_variables_initializer())

In [13]:
for step in range(TRAIN_STEPS):
    batch_xs, batch_ys = mnist.train.next_batch(BATCH_SIZE)
    # summarize session
    summary_result, _ = sess.run([summary_op, train], 
                                    feed_dict={images: batch_xs, labels: batch_ys})

    train_writer.add_summary(summary_result, step)
    train_writer.add_run_metadata(tf.RunMetadata(), 'step%03d' % step)
    
    # calculate accuracy on the test set, every 100 steps.
    # we're using the entire test set here, so this will be a bit slow
    if step % 100 == 0:
        summary_result, acc = sess.run([summary_op, accuracy], 
                                       feed_dict={images: mnist.test.images, 
                                                  labels: mnist.test.labels})
        test_writer.add_summary(summary_result, step)
        test_writer.add_run_metadata(tf.RunMetadata(), 'step%03d' % step)
        print ("test accuracy: %f at step %d" % (acc, step))


print("Accuracy %f" % sess.run(accuracy, 
                               feed_dict={images: mnist.test.images,
                                          labels: mnist.test.labels}))
train_writer.close()
test_writer.close()

test accuracy: 0.213500 at step 0
test accuracy: 0.919800 at step 100
test accuracy: 0.938300 at step 200
test accuracy: 0.951400 at step 300
test accuracy: 0.956100 at step 400
test accuracy: 0.959900 at step 500
test accuracy: 0.965800 at step 600
test accuracy: 0.967100 at step 700
test accuracy: 0.965700 at step 800
test accuracy: 0.968600 at step 900
test accuracy: 0.972200 at step 1000
test accuracy: 0.971600 at step 1100
test accuracy: 0.973900 at step 1200
test accuracy: 0.971200 at step 1300
test accuracy: 0.970200 at step 1400
test accuracy: 0.975500 at step 1500
test accuracy: 0.968400 at step 1600
test accuracy: 0.975300 at step 1700
test accuracy: 0.976400 at step 1800
test accuracy: 0.974900 at step 1900
Accuracy 0.974200


Although this is a simple model, we can achieve about >97% accuracy on MNIST. 