# Convolutional Neural Network with TensorFlow

## Imports

In [1]:
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
from tensorflow.python.framework import ops

# This is just to make TensorFlow use only one of my GPUs.
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

## Load the Data

In [2]:
mnist_data = input_data.read_data_sets("MNIST_data/", one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


## Create the Model
This model architecture is inspired by LeNet-5

1. A convolutional layer with 6 filters, a filter size of 5x5, same padding, relu activation.
2. A max pool layer with a size of 2x2 and a stride of 2.
3. A convulutional layer with 16 filters, a filter size of 5x5, same padding, relu activation.
4. A max pool layer with a size of 2x2 and a stride of 2.
5. A Fully connected layer with 120 hidden units and relu activation.
6. A Fully connected layer with 84 hidden units and relu activation.
7. The output fully connected layer with 10 units and softmax activation.

In [3]:
def build_model(x_tensor):
    
    # The first conv layer should have 6 filters, a filter size of 5x5, same padding,
    # and the relu activation function.
    with tf.variable_scope('Conv1'):
        w_tensor = tf.get_variable('w', 
                                   initializer=tf.truncated_normal([5, 5, 1, 6], stddev=0.1))
        
        b_tensor = tf.get_variable('b', initializer=tf.constant(0.1, shape=[6]))
        
        conv_tensor = tf.nn.conv2d(x_tensor, 
                                   w_tensor, 
                                   strides=[1, 1, 1, 1], 
                                   padding='SAME', 
                                   name='conv')
        
        a_tensor = tf.nn.relu(conv_tensor + b_tensor, name='a')
        
    # The first pool layer should have a size of 2x2.
    with tf.variable_scope('Pool1'):
        pool_tensor = tf.nn.max_pool(a_tensor, 
                                     ksize=[1, 2, 2, 1], 
                                     strides=[1, 2, 2, 1], 
                                     padding='SAME', 
                                     name='pool')
        
    # The second conv layer should have 16 filters, a filter size of 5x5, valid padding,
    # and the relu activation function.
    with tf.variable_scope('Conv2'):
        w_tensor = tf.get_variable('w', 
                                   initializer=tf.truncated_normal([5, 5, 6, 16], stddev=0.1))
        
        b_tensor = tf.get_variable('b', initializer=tf.constant(0.1, shape=[16]))
        
        conv_tensor = tf.nn.conv2d(pool_tensor, 
                                   w_tensor, 
                                   strides=[1, 1, 1, 1], 
                                   padding='SAME', 
                                   name='conv')
        
        a_tensor = tf.nn.relu(conv_tensor + b_tensor, name='a')
        
    # The second pool layer should have a size of 2x2.
    with tf.variable_scope('Pool2'):
        pool_tensor = tf.nn.max_pool(a_tensor, 
                                     ksize=[1, 2, 2, 1], 
                                     strides=[1, 2, 2, 1], 
                                     padding='SAME', 
                                     name='pool')
        
    # Flatten the tensor in preparation for the fully connected layer.
    # We started with 28x28, but each pool layer halved the dimensions.
    # Also, we ended with 16 filters, so the flattened tensor should
    # have a shape of (n, 7*7*16)
    with tf.variable_scope('Flatten'):
        flatten_tensor = tf.reshape(pool_tensor, [-1, 7*7*16], name='flatten')
        
    # 120 hidden units in the first layer.
    with tf.variable_scope('Fc1'):
        w_tensor = tf.get_variable('w', initializer=tf.truncated_normal([7*7*16, 120], stddev=0.1))
        b_tensor = tf.get_variable('b', initializer=tf.constant(0.1, shape=[120]))
        a_tensor = tf.nn.relu(tf.matmul(flatten_tensor, w_tensor) + b_tensor, name='a')
    
    # 84 hidden units in the second layer.
    with tf.variable_scope('Fc2'):
        w_tensor = tf.get_variable('w', initializer=tf.truncated_normal([120, 10], stddev=0.1))
        b_tensor = tf.get_variable('b', initializer=tf.constant(0.1, shape=[10]))
        z_tensor = tf.matmul(a_tensor, w_tensor) + b_tensor
   
    return z_tensor

## Define the Cost
The measurement of how how well the parameters fit the training values during training.  The goal is the minimize this difference.


In TensorFlow, the last activation function is built into the cost.  Here, we are using the softmax activation which gives a probability of each output class being true.  All the probabilities sum to 1 for each example.


The cost function we are using is cross entropy, which measures the distance between the tensor of probabilities from the output of the softmax and the actual values, y.

In [4]:
def build_cost(z_tensor, y_tensor):
    with tf.variable_scope('CostFunction'):
        cost_tensor = tf.nn.softmax_cross_entropy_with_logits(logits=z_tensor, labels=y_tensor)
        cost_tensor = tf.reduce_mean(cost_tensor)
        
    return cost_tensor

## Define the Optimizer
Chose which optimization algorithm to use.  This is the algorithm that adjusts the weights and biases each execution to bring the cost down.  TensorFlow comes with a good selection of pre-built optimizers.  We'll use Adam here.

In [5]:
def build_optimizer(cost_tensor, learning_rate):
    with tf.variable_scope('Optimizer'):
        optimizer_tensor = tf.train.AdamOptimizer(learning_rate).minimize(cost_tensor)
        
    return optimizer_tensor

## Define the Accuracy Measurement
The measurement of accurate the model is at predicting outcomes.

In [6]:
def build_accuracy(y_hat_tensor, y_tensor):
    with tf.variable_scope('AccuracyFunction'):
        # A bool tensor of where the predictions matched the labels.
        correct_predictions_tensor = tf.equal(tf.argmax(y_hat_tensor, axis=1), 
                                              tf.argmax(y_tensor, axis=1))
        
        # Convert the true/false values into 0 or 1.
        correct_predictions_tensor = tf.cast(correct_predictions_tensor, tf.float32)
        
        # The mean of the correct_preditions_tensor will now give us the accuracy.
        accuracy_tensor = tf.reduce_mean(correct_predictions_tensor)
        
    return accuracy_tensor

## Train Function
Put the pieces together to build the model, input features, and train it.

In [7]:
def train(learning_rate=0.0001, iterations=1000, batch_size=100):
    ops.reset_default_graph()
    
    x_tensor = tf.placeholder(tf.float32, [None, 28, 28, 1])
    y_tensor = tf.placeholder(tf.float32, [None, 10])
    
    z_tensor = build_model(x_tensor)
    cost_tensor = build_cost(z_tensor, y_tensor)
    optimizer_tensor = build_optimizer(cost_tensor, learning_rate)
    accuracy_tensor = build_accuracy(z_tensor, y_tensor)
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        
        for i in range(1, iterations + 1):
            x, y = mnist_data.train.next_batch(batch_size)
            x = x.reshape(x.shape[0], 28, 28, 1)
            accuracy, cost, _ = sess.run([accuracy_tensor, cost_tensor, optimizer_tensor], 
                                         feed_dict={x_tensor:x, y_tensor:y})
            
            if i % 100 == 0:
                print('Iteration {0} cost: {1}, accuracy: {2}'.format(i, cost, accuracy))

## Try it out!
With a convolutional neural network the train accuracy is often 100% for batches.  Keep in mind that test accuracy will be a bit lower.

In [8]:
%%time
train(learning_rate = 0.01, iterations=5000)

Iteration 100 cost: 0.15039171278476715, accuracy: 0.949999988079071
Iteration 200 cost: 0.14551039040088654, accuracy: 0.9399999976158142
Iteration 300 cost: 0.08272869884967804, accuracy: 0.9700000286102295
Iteration 400 cost: 0.11444041132926941, accuracy: 0.9599999785423279
Iteration 500 cost: 0.07795067876577377, accuracy: 0.9599999785423279
Iteration 600 cost: 0.06593916565179825, accuracy: 0.9700000286102295
Iteration 700 cost: 0.025510864332318306, accuracy: 0.9900000095367432
Iteration 800 cost: 0.0504082553088665, accuracy: 0.9800000190734863
Iteration 900 cost: 0.07934822887182236, accuracy: 0.9800000190734863
Iteration 1000 cost: 0.061871692538261414, accuracy: 0.9900000095367432
Iteration 1100 cost: 0.06026923656463623, accuracy: 0.9800000190734863
Iteration 1200 cost: 0.056168485432863235, accuracy: 0.9700000286102295
Iteration 1300 cost: 0.09596701711416245, accuracy: 0.9900000095367432
Iteration 1400 cost: 0.012660035863518715, accuracy: 1.0
Iteration 1500 cost: 0.02708