In [2]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

  from ._conv import register_converters as _register_converters


In [3]:
tf.__version__

'1.11.0'

## Reading the MNIST Data

In [4]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use urllib or similar directly.
Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py fr

Architecture of our network is:
    
- (Input) -> [batch_size, 28, 28, 1]  >> Apply 32 filter of [5x5]
- (Convolutional layer 1)  -> [batch_size, 28, 28, 32]
- (ReLU 1)  -> [?, 28, 28, 32]
- (Max pooling 1) -> [?, 14, 14, 32]
- (Convolutional layer 2)  -> [?, 14, 14, 64] 
- (ReLU 2)  -> [?, 14, 14, 64] 
- (Max pooling 2)  -> [?, 7, 7, 64] 
- [fully connected layer 3] -> [1x1024]
- [ReLU 3]  -> [1x1024]
- [Drop out]  -> [1x1024]
- [fully connected layer 4] -> [1x10]


The next cells will explore this new architecture.

In [19]:
## General Paramters for Model
width = 28 # width of the image in pixels 
height = 28 # height of the image in pixels
flat = width * height # number of pixels in one image 
class_output = 10 # number of possible classifications for the problem


In [10]:
## Create Placeholders for data
x_in = tf.placeholder(tf.float32,shape= [None, flat])
y_in = tf.placeholder(tf.float32,shape= [None,class_output])

In [13]:
## The input image is 28 pixels by 28 pixels, 1 channel (grayscale). We will reshape it
##In this case, the first dimension is the batch number of the image, 
##and can be of any size (so we set it to -1). 
##The second and third dimensions are width and height,
##and the last one is the image channels.
x_image = tf.reshape(x_in,[-1,width,height,1])
x_image

<tf.Tensor 'Reshape_1:0' shape=(?, 28, 28, 1) dtype=float32>

## First Convolutional Layer

In [27]:
filter_conv1_size = (5,5) ## Size of filter of first covolutional Layer
n_filter_conv1 = 32 ## Number of filters of first convolutional layer
stride1 = [1,1,1,1] ## [batch_size,widht,height,channels]
maxpoolfilter1 = [1,2,2,1] ## [batch_size,widht,height,channels]
maxpoolstride1 = [1,2,2,1] ## Stride to perform for maxpool. Here 2*2 is taken so that kernels don't overlap

In [28]:
w_conv1 = tf.Variable(tf.truncated_normal([filter_conv1_size[0],filter_conv1_size[1],1,n_filter_conv1], stddev =0.1))
b_conv1 = tf.Variable(tf.constant(0.1, shape=[32])) 

In [29]:
## Perform Convolution
convolve1 = tf.nn.conv2d(x_image, w_conv1, strides=stride1, padding='SAME') + b_conv1

In [30]:
## Apply RELU Function
h_conv1 = tf.nn.relu(convolve1)

In [34]:
## Apply maxpooling to Layer1
conv1_out =  tf.nn.max_pool(h_conv1, ksize = maxpoolfilter1, strides = maxpoolstride1, padding = 'SAME')
conv1_out
## size of conv1_out = [batch_size,14,14,32]

<tf.Tensor 'MaxPool_2:0' shape=(?, 14, 14, 32) dtype=float32>

## Second Convolution Layer

We apply convolution again at this layer.
Filter_size = (5,5)
Number of InputChannels = 32 (from previous layer as result of applying 32 filters)
In this layer we will apply 64 filters.

<b>Notice:</b> here, the input image is [14x14x32], the filter is [5x5x32], we use 64 filters of size [5x5x32], and the output of the convolutional layer would be 64 convolved image, [14x14x64].

<b>Notice:</b> the convolution result of applying a filter of size [5x5x32] on image of size [14x14x32] is an image of size [14x14x1], that is, the convolution is functioning on volume.

In [37]:
n_channels_2 = 32  ## From previous input layer
filter_conv2_size = (5,5) ## Size of filter of first covolutional Layer
n_filter_conv2 = 64 ## Number of filters of first convolutional layer
stride2 = [1,1,1,1] ## [batch_size,widht,height,channels]
maxpoolfilter2 = [1,2,2,1] ## [batch_size,widht,height,channels]
maxpoolstride2 = [1,2,2,1] ## Stride to perform for maxpool. Here 2*2 is taken so that kernels don't overlap

In [44]:
w_conv2 = tf.Variable(tf.truncated_normal([filter_conv2_size[0],filter_conv2_size[1],n_channels_2,n_filter_conv2], stddev =0.1))
b_conv2 = tf.Variable(tf.constant(0.1, shape=[n_filter_conv2])) 
print(w_conv2,b_conv2)

<tf.Variable 'Variable_8:0' shape=(5, 5, 32, 64) dtype=float32_ref> <tf.Variable 'Variable_9:0' shape=(64,) dtype=float32_ref>


In [45]:
convolve2 = tf.nn.conv2d(conv1_out, w_conv2, strides=stride2, padding='SAME') + b_conv2
convolve2

<tf.Tensor 'add_4:0' shape=(?, 14, 14, 64) dtype=float32>

In [43]:
h_conv2 = tf.nn.relu(convolve2)

In [47]:
conv2_out = tf.nn.max_pool(h_conv2, ksize = maxpoolfilter2, strides= maxpoolstride2,padding = 'SAME')
conv2_out

<tf.Tensor 'MaxPool_3:0' shape=(?, 7, 7, 64) dtype=float32>

## Fully connected Layers

In [48]:
## Flatten the conv2_out 
flatten_matrix = tf.reshape(conv2_out, [-1, 7 * 7 * 64])

In [49]:
## First Layer consists 1024 hidden units
w_fc1 = tf.Variable(tf.truncated_normal([7 * 7 * 64, 1024], stddev=0.1))
b_fc1 = tf.Variable(tf.constant(0.1, shape=[1024])) # need 1024 biases for 1024 outputs)

In [50]:
fc1 = tf.matmul(flatten_matrix, w_fc1) + b_fc1
h_fc1 = tf.nn.relu(fc1)

In [51]:
## Drop out Layer
keep_prob = tf.placeholder(tf.float32)
layer_drop = tf.nn.dropout(h_fc1, keep_prob)
layer_drop

<tf.Tensor 'dropout/mul:0' shape=(?, 1024) dtype=float32>

In [52]:
## Ouput Layer
w_fc2 = tf.Variable(tf.truncated_normal([1024, 10], stddev=0.1)) #1024 neurons
b_fc2 = tf.Variable(tf.constant(0.1, shape=[10])) # 10 possibilities for digits [0,1,2,3,4,5,6,7,8,

In [53]:
fc2 =  tf.matmul(layer_drop, w_fc2) + b_fc2
y_out =  tf.nn.softmax(fc2)

### Training 

In [55]:
## Loss Function
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_in * tf.log(y_out), reduction_indices=[1]))

In [56]:
optimizer = tf.train.AdamOptimizer(0.0001).minimize(cross_entropy)

In [57]:
correct_prediction = tf.equal(tf.argmax(y_out, 1), tf.argmax(y_in, 1))

In [58]:
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [62]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    ### Train the batch
    for i in range(10000):
        batch = mnist.train.next_batch(50)
        if i%100 == 0:
            train_accuracy = accuracy.eval(feed_dict={x_in:batch[0], y_in: batch[1], keep_prob: 1.0})
            print("step %d, training accuracy %g"%(i, float(train_accuracy)))
        optimizer.run(feed_dict={x_in: batch[0], y_in: batch[1], keep_prob: 0.5})
    
    ## Finding Testing Accuracy
    # evaluate in batches to avoid out-of-memory issues
    n_batches = mnist.test.images.shape[0] // 50
    cumulative_accuracy = 0.0
    for index in range(n_batches):
        batch = mnist.test.next_batch(50)
        cumulative_accuracy += accuracy.eval(feed_dict={x_in: batch[0], y_in: batch[1], keep_prob: 1.0})
    print("test accuracy {}".format(cumulative_accuracy / n_batches))

step 0, training accuracy 0.06
step 100, training accuracy 0.8
step 200, training accuracy 0.92
step 300, training accuracy 0.94
step 400, training accuracy 0.9
step 500, training accuracy 0.96
step 600, training accuracy 0.94
step 700, training accuracy 0.98
step 800, training accuracy 0.98
step 900, training accuracy 0.92
step 1000, training accuracy 0.92
step 1100, training accuracy 0.92
step 1200, training accuracy 1
step 1300, training accuracy 0.94
step 1400, training accuracy 0.96
step 1500, training accuracy 0.96
step 1600, training accuracy 0.98
step 1700, training accuracy 1
step 1800, training accuracy 0.96
step 1900, training accuracy 0.96
step 2000, training accuracy 0.92
step 2100, training accuracy 0.98
step 2200, training accuracy 0.98
step 2300, training accuracy 0.98
step 2400, training accuracy 0.98
step 2500, training accuracy 0.94
step 2600, training accuracy 0.98
step 2700, training accuracy 0.98
step 2800, training accuracy 1
step 2900, training accuracy 0.98
ste