# MNIST data with Convolutional Neural Nets

In [1]:
import tensorflow as tf

## Importing the data

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


## Helper functionss

### Initializing the weights

In [3]:
def init_weights(shape):
    # truncated_normal will drop anything beyond Mu+/-2Sigma
    init_random_dist = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(init_random_dist)

### Initializing the Bias

In [4]:
def init_bias(shape):
    # Constant value of 0.1
    init_bias_vals = tf.constant(0.1, shape=shape)
    return tf.Variable(init_bias_vals)

### 2D Convolution

Convenient function that creates a 2d convolution

There is a tf function that creates the convolution with inputs as an input tensor, an input kernel/filter and a stride and padding

I'm creating a wrapper around it

In [5]:
# x is the input tensor of form [batchOfImages, Height, Width, Channels(1 channel for 
##grayscale)]
# W is the weights [filterHeight, filterWeight, channelsIN, channelsOUT] 
def conv2D(x, W):
    # conv2d returns a tensor of the same size as the input X
    return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')

### Pooling 

In [6]:
# x is of form [batch, height, width, channel(depth)]
def max_pool_2by2(x):
    # ksize and strides are set for a 2x2 window of batch=1 and depth=1
    # hence [1,2,2,1]
    return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')

## Convolutional Layer

In [7]:
def convolutional_layer(input_x, shape):
    W = init_weights(shape) # returns tf.Variable
    b = init_bias([shape[3]]) # returns tf.Variable
    return tf.nn.relu(conv2D(input_x, W)+b)

## Dense Layer

In [8]:
# input_layer also follows the convention [bat, H, W, channel]
# Densely connected
def normal_full_layer(input_layer, size):
    input_size = int(input_layer.get_shape()[1])
    W = init_weights([input_size, size])
    b = init_bias([size])
    return tf.matmul(input_layer, W)+b

## Placeholders

In [9]:
# I will use these for the feed dictionaries as usual
x = tf.placeholder(tf.float32, shape=[None,784]) # 28x28 pixels = 784
y_true = tf.placeholder(tf.float32, shape=[None,10]) # 0 to 9, one hot encoded labels

## Layers

In [10]:
# INPUT LAYER
# x_image is the inputlayer, with some no of bat, height & width is 28, with 1 color channel
x_image = tf.reshape(x, [-1, 28, 28, 1])

# LAYER 1
# First convLayer: computes 32 features for each 5x5 patch
convo_1 = convolutional_layer(x_image,
                             shape=[5,5,1,32]) # [patchH, patchW, channel, featuresCompting]
                                                # featuresComputing is no of o/p channels
# Passing the results of the first convLayer to maxPooling layer
convo1_pooling = max_pool_2by2(convo_1)

# Second ConvLayer
# Now the input to 2nd convLayer has 32 features, we need say 64 features as output
convo_2 = convolutional_layer(convo1_pooling, shape=[5,5,32,64])
convo_2_pooling = max_pool_2by2(convo_2)
convo_2_flat = tf.reshape(convo_2_pooling, [-1, 7*7*64])

# binding up the full First Layer
full_layer_one = tf.nn.relu(normal_full_layer(convo_2_flat, 1024))

Instructions for updating:
Colocations handled automatically by placer.


## Dropout to prevent overfitting

In [11]:
# Create holding probability, later during training feed it as 50%
hold_prob = tf.placeholder(tf.float32)
# The arg kepp_prob got deprecated, hence usign the one in the bottom
#full_one_dropout = tf.nn.dropout(full_layer_one, keep_prob=hold_prob)
full_one_dropout = tf.nn.dropout(full_layer_one, rate=1-hold_prob)

## Making y_preds ie output layer

In [12]:
# output layer consists of 10 neurons
y_pred = normal_full_layer(full_one_dropout, 10) # 10 output classes

## Defining the Loss Function

In [13]:
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_true,
                                                                         logits=y_pred))

## Defining the Optimizer and the train object

In [14]:
# Using Adam Optimizer with the same  parameters as in the theory notes from 
# NPTEL Prof Mithesh Khapra - Deep Learning
optimizer = tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.9, beta2=0.999, 
                                   epsilon=1e-8)
train = optimizer.minimize(cross_entropy)

## Initializing the Variables

In [15]:
# Creating the init object
init = tf.global_variables_initializer()

# Running the TensorFlow session for 1000 epochs

In [None]:

steps = 1000

with tf.Session() as sess:
    # Initializing the variables into the session
    sess.run(init)
    # training on 50 sized batches
    for i in range(steps):
#         print("training first batch")
        batch_x, batch_y = mnist.train.next_batch(50)
        # train it with a dropout rate of 50%
        sess.run(train, feed_dict={x:batch_x, y_true:batch_y, hold_prob:0.5})
        
        if i%100 == 0:
#             print("iteration no. ", i)
            # To calculate and print accuracy every 100 epochs
            # axis=1 gives indexOf the maximum element in the row
            # matches will be a 50 length boolean array
            matches = tf.equal(tf.argmax(y_pred, axis=1), tf.argmax(y_true, axis=1))
            # reduce matches to the mean of it to get accuracy
            acc = tf.reduce_mean(tf.cast(matches, tf.float32))
            # print("On step {0}, accuracy = {1}".format(i, acc))
            # the above doesnt work, my bad, need sess.run for it
            print("On step {0}, accuracy = {1}".format(i, sess.run(acc, 
                                                    feed_dict={
                                                        x:mnist.test.images,
                                                        y_true:mnist.test.labels,
                                                        hold_prob:1.0
                                                    })))


Taking too much time

I think the kernel is getting stuck

Excuse me for running TF on a potato PC