In [11]:
import input_data
mnist = input_data.read_data_sets("data/", one_hot=True)

import tensorflow as tf
import time

# Architecture
n_hidden_1 = 256
n_hidden_2 = 256

# Parameters
learning_rate = 0.01
training_epochs = 100
batch_size = 1
display_step = 10

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting data/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting data/t10k-labels-idx1-ubyte.gz


In [17]:
##### from the author #####
def loss(output, y):
    xentropy = tf.nn.softmax_cross_entropy_with_logits(output, y)    
    loss = tf.reduce_mean(xentropy)
    return loss

def training(cost, global_step):
    tf.scalar_summary("cost", cost)
    optimizer = tf.train.AdamOptimizer(learning_rate)
    train_op = optimizer.minimize(cost, global_step=global_step)
    return train_op


def evaluate(output, y):
    correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    tf.scalar_summary("validation error", (1.0 - accuracy))
    return accuracy

def layer(input, weight_shape, bias_shape):
    weight_init = tf.random_normal_initializer(stddev=(2.0/weight_shape[0])**0.5)
    bias_init = tf.constant_initializer(value=0)
    W = tf.get_variable("W", weight_shape,
                        initializer=weight_init)
    b = tf.get_variable("b", bias_shape,
                        initializer=bias_init)
    return tf.nn.relu(tf.matmul(input, W) + b)
#############################
def conv2d(input,weight_shape,bias_shape):
    """generate a convolutional layer with a particular shape"""
    """set stride to be 1 and padding to keep the wifth and height constant"""
    inn=weight_shape[0]*weight_shape[1]*weight_shape[2]
    
    weight_init=tf.random_normal_initializer(stddev=(2.0/inn)**0.5)
    W = tf.get_variable('W',weight_shape,initializer=weight_init)
    
    bias_init = tf.constant_initializer(value=0)
    b = tf.get_variable('b',bias_shape,initializer=bias_init)
    
    conv_out = tf.nn.conv2d(input,W,strides=[1,1,1,1],padding='SAME')
    return tf.nn.relu(tf.nn.bias_add(conv_out,b))

def max_pool(input,k=2):
    """generate a max pooling layer with non-overlapping windows of size k"""
    """usually k=2 is recommended"""
    return tf.nn.max_pool(input,ksize=[1,k,k,1],strides=[1,k,k,1],padding='SAME')

def inference(x,keep_prob):
    # take flattened input pixel and reshape into tensor of N x 28 x 28 x 1
    # N = number of examples in a minibatch
    # 28 = width & height
    # 1 = depth (if RGB depth=3)
    x = tf.reshape(x,shape=[-1,28,28,1])
    
    # build convolutional layer of 32 filters
    # with spatial extent of 5
    # input tensor of depth 1 -> depth 32
    with tf.variable_scope("conv_1"):
        conv_1 = conv2d(x,[5,5,1,32],[32])
        # compress information
        pool_1 = max_pool(conv_1)
    
    # second convolutional layer with 64 filters
    # with spatial extent of 5
    # input tensor of depth 32 -> depth 64
    with tf.variable_scope("conv_2"):
        conv_2 = conv2d(pool_1,[5,5,32,64],[64])
        pool_2 = max_pool(conv_2)
    
    # fully connected layer
    # flaten the tesnor by computing the full size of each "subtensor" 
    # 64 filters corresponding to the depth of 64
    # each feature map as a hieght and width of 7
    with tf.variable_scope("fc"):
        pool_2_flat = tf.reshape(pool_2,[-1,7*7*64])
        # compress the flattened representation into a hidden state of size 1024
        fc_1 = layer(pool_2_flat,[7*7*64,1024],[1024])
        
        # apply dropout
        # dropout probability 0.5 during training and 1 during evaluation
        fc_1_drop = tf.nn.dropout(fc_1,keep_prob)
        
    # softmax output layer with 10 bins
    with tf.variable_scope("output"):
        output = layer(fc_1_drop,[1024,10],[10])
    return output

In [None]:
with tf.Graph().as_default():

        with tf.variable_scope("mnist_conv_model"):

            x = tf.placeholder("float", [None, 784]) # mnist data image of shape 28*28=784
            y = tf.placeholder("float", [None, 10]) # 0-9 digits recognition => 10 classes
            keep_prob = tf.placeholder(tf.float32) # dropout probability

            output = inference(x, keep_prob)

            cost = loss(output, y)

            global_step = tf.Variable(0, name='global_step', trainable=False)

            train_op = training(cost, global_step)

            eval_op = evaluate(output, y)

            summary_op = tf.merge_all_summaries()

            saver = tf.train.Saver()

            sess = tf.Session()

            summary_writer = tf.train.SummaryWriter("conv_mnist_logs/",graph_def=sess.graph_def)

                
            init_op = tf.initialize_all_variables()

            sess.run(init_op)


            # Training cycle
            for epoch in range(training_epochs):

                avg_cost = 0.
                total_batch = int(mnist.train.num_examples/batch_size)
                # Loop over all batches
                for i in range(total_batch):
                    minibatch_x, minibatch_y = mnist.train.next_batch(batch_size)
                    # Fit training using batch data
                    sess.run(train_op, feed_dict={x: minibatch_x, y: minibatch_y, keep_prob: 0.5})
                    # Compute average loss
                    avg_cost += sess.run(cost, feed_dict={x: minibatch_x, y: minibatch_y, keep_prob: 0.5})/total_batch
                # Display logs per epoch step
                if epoch % display_step == 0:
                    print "Epoch:", '%04d' % (epoch+1), "cost =", "{:.9f}".format(avg_cost)

                    accuracy = sess.run(eval_op, feed_dict={x: mnist.validation.images, y: mnist.validation.labels, keep_prob: 1})

                    print "Validation Error:", (1 - accuracy)

                    summary_str = sess.run(summary_op, feed_dict={x: minibatch_x, y: minibatch_y, keep_prob: 0.5})
                    summary_writer.add_summary(summary_str, sess.run(global_step))

                    saver.save(sess, "conv_mnist_logs/model-checkpoint", global_step=global_step)


            print "Optimization Finished!"


            accuracy = sess.run(eval_op, feed_dict={x: mnist.test.images, y: mnist.test.labels, keep_prob: 1})

            print "Test Accuracy:", accuracy

