In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from time import time
from tensorflow.examples.tutorials.mnist import input_data
%matplotlib inline

In [2]:
mnist = input_data.read_data_sets("datasets/MNIST_data/", one_hot=True)

Extracting datasets/MNIST_data/train-images-idx3-ubyte.gz
Extracting datasets/MNIST_data/train-labels-idx1-ubyte.gz
Extracting datasets/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting datasets/MNIST_data/t10k-labels-idx1-ubyte.gz


In [None]:
class ConvolutionalNeuralNetwork:
    def __init__(self):
        """
        Parameters
        ----------
        X : Input tensor with dimensions: 
            [batch, height, width, channels]
        """
        self.X = tf.placeholder(tf.float32, shape=[None, 784])
        self.y = tf.placeholder(tf.float32, shape=[None, 10])
        self.dropout_prob = tf.placeholder(tf.float32)
    
    def init_weights_and_bias(self, weights_shape, bias_shape):
        """ Randomly initializes biases and weights
        
        Parameters
        ----------
        weights_shape : shape of the weights Tensor.
        
        bias_shape : shape of the bias Tensor.
        """
        rand_dist = tf.truncated_normal(weights_shape, stddev=0.1)
        W = tf.Variable(rand_dist)
        consts = tf.constant(0.1, shape=bias_shape)
        b = tf.Variable(consts)
        return W, b
        
    def conv2d(self, X, kernel):
        """Performs 2D convolution
        
        Parameters
        ----------
        X : Input tensor with dimensions: 
            [batch, height, width, channels]
            
        kernel : Kernel tensor with dimensions:
            [filter height, filter width, channels in, 
                channels out]
        """
        # strides indicate the step of CNN to move in 
        # either direction or the number of steps to 
        # take in every dimension
        strides = [1, 1, 1, 1]
        return tf.nn.conv2d(X, kernel, strides, padding="SAME")
    
    def max_pooling(self, X):
        """Performs 2x2 Max Pooling
        
        Parameters
        ----------
        X : Input tensor with dimensions: 
            [batch, height, width, channels]
        """
        # ksize is the size of the window for 
        # each dimension of the inpuy since we are 
        # performing 2x2 pooling and our dimensions 
        # are [batch, height, width, channels] we
        # need 2 for height and width
        ksize = [1, 2, 2, 1]
        
        # same holds for strides, the only 
        # difference is strides is the step. Since we are
        # taking 2x2 pooling we need to increment our
        # step by 2 after every max pooling step.
        strides = [1, 2, 2, 1]
        return tf.nn.max_pool(X, ksize=ksize, strides=strides,
                              padding="SAME")
        
    def convolution_layer(self, X, shape):
        """Creates a Convolutional Layer
        
        Parameters
        ----------
        X : Input tensor with dimensions: 
            [batch, height, width, channels]
        
        shape : Shape of the Kernel with dimensions:
            [filter height, filter width, channels in, 
                channels out]
        """
        W, b = self.init_weights_and_bias(weights_shape=shape,
                                          bias_shape=[shape[3]])
        return tf.nn.relu(self.conv2d(X, W) + b)
    
    def dense_layer(self, input_layer, size):
        """ Creates a Fully Connected Layer
        
        Parameters
        ----------
        input_layer : Tensor of the previous layer.
        
        size : output size of the fully connected layer. 
        
        Returns
        -------
        Tensor output of the fully connected layer. 
        """
        input_layer_size = int(input_layer.get_shape()[1])
        W, b = self.init_weights_and_bias(weights_shape=
                                          [input_layer_size, size],
                                          bias_shape=[size])
        return tf.matmul(input_layer, W) + b
        
    def forward_pass(self):
        """ Performs a forward pass and outputs probabilities """
        # Reshape the image (None, 784) back to (-1, 28, 28, 1)
        X_image = tf.reshape(self.X, (-1, 28, 28, 1))
        
        ################### Layer 1 (CNN) ###########################
        
        cnn_layer_1 = self.convolution_layer(X_image, shape=[5, 5, 1, 32])
        cnn_layer_1_pooled = self.max_pooling(cnn_layer_1)
        
        ################### Layer 2 (CNN) ###########################
        
        cnn_layer_2 = self.convolution_layer(cnn_layer_1_pooled, 
                                             shape=[5, 5, 32, 64])
        cnn_layer_2_pooled = self.max_pooling(cnn_layer_2)
        
        ################### Layer 3 (Dense) ##########################
        
        cnn_layer_2_flat = tf.reshape(cnn_layer_2_pooled, (-1, 7*7*64))
        dense_layer_1 = tf.nn.relu(self.dense_layer(cnn_layer_2_flat, 1024))
        
        ######################## Dropout #############################
        
        dense_layer_1_drop = tf.nn.dropout(dense_layer_1, 
                                           keep_prob=self.dropout_prob)
        
        ######################## Predict #############################
        
        return self.dense_layer(dense_layer_1_drop, 10)
    
    def train(self):
        """ Trains the CNN using AdamOptimizer """
        y_pred = self.forward_pass()
        cross_entropy_loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=self.y,
                                                    logits=y_pred))
        optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
        train_cnn = optimizer.minimize(cross_entropy_loss)
        init = tf.global_variables_initializer()
        steps = 2001
        
        with tf.Session() as sess:
            sess.run(init)
            for step in range(steps):
                start_time = time()
                X_batch, y_batch = mnist.train.next_batch(50)
                sess.run(train_cnn, feed_dict={self.X: X_batch, self.y: y_batch,
                                               self.dropout_prob: 0.5})
                matches = tf.equal(tf.argmax(y_pred, 1), tf.argmax(self.y, 1))
                accuracy = tf.reduce_mean(tf.cast(matches, tf.float32))
                result = sess.run(accuracy, feed_dict={self.X: mnist.test.images, 
                                                       self.y: mnist.test.labels,
                                                       self.dropout_prob: 1.0})
                if step % 100 == 0:
                    print("Step: {0} Accuracy: {1:.2f}% Time: {2:.2f} secs/step"
                          .format(step, result * 100, time() - start_time))

In [None]:
ConvolutionalNeuralNetwork().train()
# Warning: this might take several hours without GPU

Step: 0 Accuracy: 18.97% Time: 0.38 secs/step
Step: 100 Accuracy: 94.42% Time: 0.39 secs/step
Step: 200 Accuracy: 95.74% Time: 0.39 secs/step
Step: 300 Accuracy: 96.69% Time: 0.39 secs/step
Step: 400 Accuracy: 97.52% Time: 0.39 secs/step
Step: 500 Accuracy: 98.05% Time: 0.40 secs/step
Step: 600 Accuracy: 97.87% Time: 0.55 secs/step
Step: 700 Accuracy: 98.24% Time: 0.42 secs/step
Step: 800 Accuracy: 97.71% Time: 0.48 secs/step
Step: 900 Accuracy: 98.14% Time: 0.48 secs/step
Step: 1000 Accuracy: 98.46% Time: 0.47 secs/step
Step: 1100 Accuracy: 98.58% Time: 0.47 secs/step
Step: 1200 Accuracy: 98.56% Time: 0.48 secs/step
Step: 1300 Accuracy: 98.55% Time: 0.52 secs/step
Step: 1400 Accuracy: 98.26% Time: 0.49 secs/step
Step: 1500 Accuracy: 98.58% Time: 0.55 secs/step
Step: 1600 Accuracy: 98.73% Time: 0.56 secs/step
Step: 1700 Accuracy: 98.86% Time: 0.60 secs/step
Step: 1900 Accuracy: 98.76% Time: 0.60 secs/step
