In [1]:
#2 Layer CNN for MNIST classification

In [2]:
import tensorflow as tf                                     #For Machine Learning
from tensorflow.examples.tutorials.mnist import input_data  #MNIST data object

In [3]:
import os
import time

In [4]:
MNIST = input_data.read_data_sets('MNIST_data/', one_hot = True)    #Load MNIST dataset

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [5]:
#Size of batch to process at a time
batch_size = 128
#Learning rate for optimization
learning_rate = 1e-3
#Number of times to run the model
n_epochs = 1
#Number o fully connected neurons
n_fc_units = 1024
#Dropout regularization strength
dropout_strength = 0.5
#Number of classes for classification
num_classes = 10

In [6]:
#Dropout strength is variable at training and test time, so we define at first a placeholder for dropout
dropout = tf.placeholder(dtype = tf.float32, name = 'dropout_strength')

In [7]:
#Placeholders for batch input data
with tf.name_scope('data'):
    X = tf.placeholder(dtype = tf.float32, shape = [None, 784], name = 'X_placeholder')
    Y = tf.placeholder(dtype = tf.float32, shape = [None, 10], name = 'Y_placeholder')

In [8]:
#Variable scope binding for the first conv layer
with tf.variable_scope('conv1') as scope:
    #Reshape images from one hot vector back to matrices of 28x28
    images = tf.reshape(X, shape = [-1,28,28,1], name = 'images')
    #Shape of kernels = 5x5, no.of input channels = 1, no. of output channels = 32 
    kernels = tf.get_variable(name = 'kernels', shape = [5,5,1,32],
                             initializer = tf.truncated_normal_initializer())
    #Biases for 32 kernels
    biases = tf.get_variable(name = 'biases', shape = [32],
                            initializer = tf.random_normal_initializer())
    #First Conv layer  , padding = 'SAME' prevents dimensions of the input i.e 28x28
    conv = tf.nn.conv2d(images, kernels, strides = [1,1,1,1], padding = 'SAME')
    #Activation at first conv layer
    conv1 = tf.nn.relu(conv + biases, name = scope.name)

In [9]:
#Max Pooling after first activation layer, output dim: 14*14,no.of channels = 32 
with tf.variable_scope('pool1') as scope:
    pool1 = tf.nn.max_pool(conv1, ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'SAME', name = 'pool1')

In [10]:
#Variable scope binfding for Second conv layer
with tf.variable_scope('conv2') as scope:
    #Kernels of shape 5x5, no.of input channels = 32, no.of output channels = 64
    kernels = tf.get_variable(name = 'kernels', shape = [5,5,32,64],
                             initializer = tf.truncated_normal_initializer())
    #Biases for 64 kernels
    biases = tf.get_variable(name = 'biases', shape = [64],
                            initializer = tf.random_normal_initializer())
    #Second conv layer, padding = 'SAME' prevents dimensions of 14x14
    conv = tf.nn.conv2d(pool1, kernels, strides = [1,1,1,1], padding = 'SAME')
    #Activation at second conv layer
    conv2 = tf.nn.relu(conv + biases, name = scope.name)

In [11]:
#Max pooling after second activation layer, gives output of dim: 7x7, no.of channels = 64
with tf.variable_scope('pool2') as scope:
    pool2 = tf.nn.max_pool(conv2, ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'SAME', name = 'pool2')

In [12]:
#Fully connected layer
with tf.variable_scope('fc') as scope:
    #set of inputs feeding to the FC layer
    input_features = 7*7*64
    #shape of weight matrix = 7*7*64 x 1024
    w = tf.get_variable(name = 'weights', shape = [input_features, n_fc_units],
                       initializer = tf.truncated_normal_initializer())
    #biases for each 1024 FC layer units
    b = tf.get_variable(name = 'biases', shape = [n_fc_units], 
                       initializer = tf.constant_initializer(0.0))
    #Reshape pool2 to shape 1x (7*7*64) ro matrix multiplication
    pool2 = tf.reshape(pool2, shape = [-1, input_features])
    #Activation layer
    fc = tf.nn.relu(tf.matmul(pool2,w) + b, name = 'relu')
    #dropout of FC units
    fc = tf.nn.dropout(fc, keep_prob = dropout, name = 'relu_dropout')

In [13]:
#Final Softmax layer
with tf.variable_scope('softmax_layer') as scope:
    #weight matrix of shape 1024x10
    w = tf.get_variable(name = 'weights', shape = [n_fc_units, num_classes],
                       initializer = tf.truncated_normal_initializer())
    #biases for each 10 output units
    b = tf.get_variable(name = 'biases', shape = [num_classes], 
                       initializer = tf.constant_initializer(0.0))
    #Output for each neuron
    logits = tf.matmul(fc, w) + b

In [14]:
#Loss Definition
with tf.name_scope('loss'):
    #Softmax entropy of class scores
    entropy = tf.nn.softmax_cross_entropy_with_logits(labels = Y, logits = logits)
    #loss
    loss = tf.reduce_mean(entropy, name = 'loss')

In [15]:
with tf.name_scope('summaries'):
    tf.summary.scalar('loss',loss)
    tf.summary.histogram('loss', loss)
    summary_op = tf.summary.merge_all()
    

In [16]:
#Global step to keep count of number of updates made
global_step = tf.Variable(0, dtype = tf.int32, trainable = False, name = 'global_step')

In [17]:
#Optimizer for the NN
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(loss, global_step = global_step)

In [27]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    #For tensorboard visualizations
    writer = tf.summary.FileWriter('graphs/convnet', sess.graph)
    #Check if checkpoint present 
    ckpt = tf.train.get_checkpoint_state(os.path.dirname('/checkpoints/convnet_mnist/checkpoint'))
    #Restore the latest checkpoint if present
    if ckpt and ckpt.model_checkpoint_path:
        saver.restore(sess, ckpt.model_checkpoint_path)
        
    #Training Time
    n_batches = int(MNIST.train.num_examples/batch_size)
    loss_sum = 0.0
    start = time.time()
    for i in range(0,n_epochs*n_batches):
        X_batch, Y_batch = MNIST.train.next_batch(batch_size)
        opt, loss_batch, summary = sess.run([optimizer, loss, summary_op], feed_dict = {X: X_batch,
                                                                                       Y: Y_batch,
                                                                                       dropout: dropout_strength})
        loss_sum += loss_batch
        #Adding data to tensorboard summary
        writer.add_summary(summary, global_step=i)
        if (i+1)%10 == 0:
            print( "Loss at step {}: {:5.1f} ".format(i+1, loss_sum/i))
            saver.save(sess, '/checkpoints/convnet_mnist/mnist-convnet', i)
            loss_sum = 0.0
    print('Optimization finished!')
    print('Time taken: {}'.format(time.time() - start))
    
    #Testing time
    n_batches = int(MNIST.test.num_examples/batch_size)
    total_correct_preds = 0
    for i in range(0, n_batches):
        X_batch, Y_batch = MNIST.test.next_batch(batch_size)
        opt, loss_batch, logits_batch = sess.run([optimizer, loss, logits], feed_dict = {X: X_batch,
                                                                                        Y: Y_batch,
                                                                                        dropout: 1.0})
        preds = tf.nn.softmax(logits_batch)
        correct_preds = tf.equal(tf.argmax(preds,1), tf.argmax(Y_batch,1))
        accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
        total_correct_preds += sess.run(accuracy)
        
    print("Accuracy = {}". format(total_correct_preds/MNIST.test.num_examples))
        
    
writer.close()  
    

INFO:tensorflow:Restoring parameters from /home/aeros/GitHubRepos/tensorzone/checkpoints/convnet_mnist/mnist-convnet-419
Loss at step 10: 137.3 
Loss at step 20:  69.5 
Loss at step 30:  51.2 
Loss at step 40:  33.3 
Loss at step 50:  27.7 
Loss at step 60:  19.3 
Loss at step 70:  21.3 
Loss at step 80:  15.9 
Loss at step 90:  12.8 
Loss at step 100:  12.8 
Loss at step 110:  10.6 
Loss at step 120:  10.3 
Loss at step 130:   9.7 
Loss at step 140:   8.4 
Loss at step 150:   6.9 
Loss at step 160:   8.0 
Loss at step 170:   6.2 
Loss at step 180:   7.1 
Loss at step 190:   6.9 
Loss at step 200:   6.7 
Loss at step 210:   4.6 
Loss at step 220:   5.4 
Loss at step 230:   4.9 
Loss at step 240:   5.7 
Loss at step 250:   4.5 
Loss at step 260:   4.6 
Loss at step 270:   3.8 
Loss at step 280:   4.4 
Loss at step 290:   3.4 
Loss at step 300:   3.3 
Loss at step 310:   3.3 
Loss at step 320:   2.8 
Loss at step 330:   3.4 
Loss at step 340:   2.8 
Loss at step 350:   3.1 
Loss at step 