# Convolutional Neural Network for MNIST dataset

In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data

%matplotlib inline

## load dataset

In [2]:
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


### Helper Functions

In [3]:
# returns a weights tensor with given shape
def init_weights(shape):
    init_random_dist = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(init_random_dist)

In [4]:
# return a bias tensor with given shape
def init_bias(shape):
    init_bias_vals = tf.constant(0.1, shape=shape)
    return tf.Variable(init_bias_vals)

In [5]:
# tensorflow operator for a convolutional layer
def conv2d(x, W):
    # x.shape -> [batch,H,W,Channels]
    # W.shape -> [filter H, filter W, Channels In, Channels OUT]
    
    return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')

In [6]:
# returns a pooling layer
def max_pool_2by2(x):
    # x.shape -> [batch, h, w, c]
    
    return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME")

In [7]:
# returns a convolutional layer given input tensor and shape
def convolutional_layer(input_x, shape):
    W = init_weights(shape)
    b = init_bias([shape[3]])
    return tf.nn.relu(conv2d(input_x, W) + b)

In [8]:
# Returns a fully connected layer of given size
def normal_full_layer(input_layer, size):
    input_size= int(input_layer.get_shape()[1])
    W = init_weights([input_size, size])
    b = init_bias([size])
    return tf.matmul(input_layer,W) + b

# TF GRAPH

### inputs

In [9]:
# placeholders
with tf.name_scope("inputs"):
    x = tf.placeholder(tf.float32, shape=[None,784])
    x_image = tf.reshape(x, [-1,28,28,1])
    y_true = tf.placeholder(tf.float32, shape=[None, 10])
    
    # holding probability used for dropout in our fully connected layers
    hold_prob = tf.placeholder(tf.float32)

### Convolution layers with maxpool

In [10]:
with tf.name_scope("first_conv_layer"):
    convo_1 = convolutional_layer(x_image, shape=[5,5,1,32])
    convo_1_pool = max_pool_2by2(convo_1)

In [11]:
with tf.name_scope("second_conv_layer"):
    convo_2 = convolutional_layer(convo_1_pool, shape=[5,5,32,64])
    convo_2_pool = max_pool_2by2(convo_2)

### Fully Connected Layers

In [12]:
with tf.name_scope("first_full_layer"):
    # reshape convolution layers outputs into a matrix of shape=[batch_size, num_features]
    convo_2_flat = tf.reshape(convo_2_pool, [-1, 7*7*64])
    
    # Fully connected layer with relu activation function
    full_layer_one = tf.nn.relu(normal_full_layer(convo_2_flat, 1024))
    
    # add dropout
    full_one_dropout = tf.nn.dropout(full_layer_one,keep_prob=hold_prob)

In [13]:
with tf.name_scope("second_full_layer"):
    full_layer_two = tf.nn.sigmoid(normal_full_layer(full_one_dropout,1024))
    
    # add dropout
    full_second_dropout = tf.nn.dropout(full_layer_two,keep_prob=hold_prob)

In [14]:
with tf.name_scope("output_layer"):
    y_pred = normal_full_layer(full_second_dropout,10)

### Cross-entropy loss and optimizer

In [15]:
with tf.name_scope("loss"):
    ce_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_true, logits=y_pred))

In [16]:
with tf.name_scope("optimizer"):
    optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
    train = optimizer.minimize(ce_loss)

In [17]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

# Train

In [18]:
def train_model(steps, batch_size, train_data, val_data, test_data):
    
    with tf.Session() as sess:
        sess.run(init)

        for i in range(steps):
            
            # get next training batch
            batch_x, batch_y = train_data.next_batch(batch_size=batch_size)
            
            feed_dict={x:batch_x, y_true:batch_y, hold_prob:0.9}
            
            if i % (steps/10) == 0:
                # NEED TO COMMENT OUT BELOW otherwise will take too long to train
                
                #train_loss = sess.run(ce_loss,feed_dict={x:train_data.images, y_true:train_data.labels, hold_prob:1.0})
                #val_loss = sess.run(ce_loss,feed_dict={x:val_data.images, y_true:val_data.labels, hold_prob:1.0})
                
                #print("train_loss: {}  val_loss: {}".format(train_loss, val_loss))
                print("step: {}".format(i))
            sess.run(train,feed_dict=feed_dict)  
        
        saver.save(sess,"./models/mnist_test_cnn.ckpt")
    
    print("finshed training")

In [25]:
train_model(2000, 10, mnist.train, mnist.validation, mnist.test)

step: 0
step: 200
step: 400
step: 600
step: 800
step: 1000
step: 1200
step: 1400
step: 1600
step: 1800
finshed training


In [22]:
mnist.test.images[:5000].shape


(5000, 784)

## Evaluate


In [28]:
with tf.Session() as sess:
    saver.restore(sess, "./models/mnist_test_cnn.ckpt")
    
    # get boolean list of correct classifications
    matches = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y_true, 1))
            
    # Calculate accuracy on test set
    acc = tf.reduce_mean(tf.cast(matches, tf.float32))
    
    print(sess.run(acc, feed_dict={x:mnist.test.images[:5000], y_true:mnist.test.labels[:5000], hold_prob:1.0}))
    

INFO:tensorflow:Restoring parameters from ./models/mnist_test_cnn.ckpt
0.9632


96% accuracy using a convolutional neural network, could probably improve accuracy by running for more steps but training is slow