Till now we have used single layer(accuracy ~ .92) as well as multilayer with 2 hidden layers(accuracy ~ .95) networks to classify the MNIST dataset which is not good for the real world problems.

Now we are going to understand how to tie "convolution","pooling", "Relu" and "fc" (fully-connected) layers together.

Some new attributes included below are: dropout, tf.variable_scope 

In [1]:
# import 
import tensorflow as tf
import numpy as np
seed = 21
np.random.seed = seed

# import MINST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("data/MNIST", one_hot=True)

Extracting data/MNIST\train-images-idx3-ubyte.gz
Extracting data/MNIST\train-labels-idx1-ubyte.gz
Extracting data/MNIST\t10k-images-idx3-ubyte.gz
Extracting data/MNIST\t10k-labels-idx1-ubyte.gz


In [24]:
# params
NUM_CLASS = 10
INPUT_SIZE = 28
INPUT_SIZE_FLAT = INPUT_SIZE * INPUT_SIZE

# network params
LEARNING_RATE = 0.001
DROPOUT = 0.75
BATCH_SIZE = 128
TRAINING_EPOCHS = 300000
DISPLAY_STEP = 50


# input placeholders
images = tf.placeholder(tf.float32, [None, INPUT_SIZE_FLAT], name="images")
labels = tf.placeholder(tf.float32, [None, NUM_CLASS], name="labels")
keep_prob = tf.placeholder(tf.float32, name='dropout')

# network params
weights = {
    # 5x5 conv filter size, 1 in_channel, 32 out_channel 
    'conv1_w': tf.Variable(tf.truncated_normal([5,5,1,32], stddev=0.1, seed=seed,name="conv1_w")),
    
    # 5x5 conv filter size, 32 in_channel, 64 out_channel
    'conv2_w': tf.Variable(tf.truncated_normal([5,5,32,64], stddev=0.1, seed=seed, name="conv2_w")),
    
    # fully connected layer, 7*7*64 inputs, 1024 output
    'fc_w': tf.Variable(tf.truncated_normal([7*7*64, 1024], stddev=0.1, seed=seed, name="fc_w")),
    
    # output layer(softmax layer), 1024 inputs, 10 outputs [NUM_CLASS]
    'soft_w': tf.Variable(tf.truncated_normal([1024, NUM_CLASS], stddev=0.1, seed=seed, name="softmax_w"))
}
biases = {
    # number of biases for conv1 layer = out_channels
    'conv1_b': tf.Variable(tf.random_normal([32], stddev=1.0, seed=seed, name="conv1_b")),
    
    # number of bias for conv2 layer = out_channel
    'conv2_b': tf.Variable(tf.random_normal([64], stddev=1.0, seed=seed, name="conv2_b")),
    
    # number of bias for fc layer = output
    'fc_b': tf.Variable(tf.random_normal([1024], stddev=1.0, seed=seed, name="fc_b")),
    
    # number of bias for softmax(output) layer
    'soft_b': tf.Variable(tf.random_normal([10],stddev=1.0, seed=seed, name="soft_b"))
}

## model 
### `conv -> relu -> pool -> conv -> relu -> pool -> fully connected -> softmax`

>tf.nn.conv2d(input, filter, strides, padding, use_cudnn_on_gpu=None, data_format=None, name=None)

Computes a 2-D convolution given 4-D `input` and `filter` tensors.

* input tensor of shape `[batch, in_height, in_width, in_channels]`
* a filter / kernel tensor of shape `[filter_height, filter_width, in_channels, out_channels]`
* Must have `strides[0] = strides[3] = 1`.  
* For the most common case of the same horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
* Padding is `SAME` 

In [25]:
####### convolutional layer1 #######
# reshape the input from (2d)[28x28] to (4d)[BATCH_SIZE, in_height, in_width, in_channels]
images_reshaped = tf.reshape(images, shape=[-1, 28, 28, 1], name="reshape_input")
# conv
conv1 = tf.nn.conv2d(input=images_reshaped, filter=weights["conv1_w"], strides=[1,1,1,1], padding='SAME')
# relu
conv1 = tf.nn.relu(features=(conv1+biases["conv1_b"]))
# pool
conv1 = tf.nn.max_pool(value=conv1, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME', name="output_of_conv1")
# output dimension => BATCH_SIZE x 14 x 14 x 32

In [26]:
####### convolutional layer2 #######
# conv
conv2 = tf.nn.conv2d(input=conv1, filter=weights["conv2_w"], strides=[1,1,1,1], padding='SAME')
# relu
conv2 = tf.nn.relu(features=(conv2+biases["conv2_b"]))
# pool
conv2 = tf.nn.max_pool(value=conv2, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME', name="output_of_conv2")
# output dimension => BATCH_SIZE x 7 x 7 x 64

In [27]:
####### fully connected layer #######
# reshape output of conv2(4d) to (2d) to be used by fully connected layer
fc = tf.reshape(tensor=conv2, shape=[-1, weights["fc_w"].get_shape().as_list()[0]], name="fc_reshape")

# fc 
fc = tf.add(tf.matmul(fc,weights["fc_w"]), biases["fc_b"])
# relu
fc = tf.nn.relu(fc)
# dropout
fc = tf.nn.dropout(fc, keep_prob=keep_prob, seed=seed, name="output_of_fc")

In [28]:
####### softmax layer #######
logits = tf.add(tf.matmul(fc, weights["soft_w"]), biases["soft_b"])

In [29]:
# cross entropy
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels)
# loss
loss = tf.reduce_mean(cross_entropy)
# optimizer 
optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE,name='Adam').minimize(loss)
# accuracy
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [30]:
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    step = 1
    while step * BATCH_SIZE < TRAINING_EPOCHS:
        batch_images, batch_labels = mnist.train.next_batch(BATCH_SIZE)
        
        # Run optimization op (backprop)
        sess.run(fetches=optimizer, feed_dict={images:batch_images, labels:batch_labels, keep_prob:DROPOUT})
        
        if step % DISPLAY_STEP == 0:
            # Calculate batch loss and accuracy
            los, acc = sess.run([loss, accuracy], feed_dict={images:batch_images, labels:batch_labels, keep_prob:1.0})
            
            print("iter " + str(step*BATCH_SIZE) + ", batch_loss " + "{:.4f}".format(los) + ", acc "+ "{:.4}".format(acc))
        
        step += 1
    
    print("optimization finished")
    
    # time to calculate test accuracy
    print("Testing accuracy ", sess.run(fetches=accuracy,feed_dict={images:mnist.test.images[:128], 
                                                                    labels:mnist.test.labels[:128],
                                                                    keep_prob:1.0}))
            

iter 6400, batch_loss 1.5000, acc 0.625
iter 12800, batch_loss 0.3078, acc 0.875
iter 19200, batch_loss 0.2291, acc 0.9297
iter 25600, batch_loss 0.1396, acc 0.9609
iter 32000, batch_loss 0.0941, acc 0.9766
iter 38400, batch_loss 0.0823, acc 0.9766
iter 44800, batch_loss 0.0873, acc 0.9688
iter 51200, batch_loss 0.1185, acc 0.9688
iter 57600, batch_loss 0.0461, acc 0.9844
iter 64000, batch_loss 0.1045, acc 0.9844
iter 70400, batch_loss 0.0346, acc 0.9844
iter 76800, batch_loss 0.0962, acc 0.9531
iter 83200, batch_loss 0.0429, acc 0.9922
iter 89600, batch_loss 0.0246, acc 0.9922
iter 96000, batch_loss 0.0390, acc 0.9844
iter 102400, batch_loss 0.0139, acc 1.0
iter 108800, batch_loss 0.0211, acc 0.9844
iter 115200, batch_loss 0.0340, acc 0.9844
iter 121600, batch_loss 0.0517, acc 0.9609
iter 128000, batch_loss 0.0616, acc 0.9766
iter 134400, batch_loss 0.0330, acc 0.9844
iter 140800, batch_loss 0.0215, acc 0.9844
iter 147200, batch_loss 0.0067, acc 1.0
iter 153600, batch_loss 0.0446, acc