In [1]:
# Imports
import numpy as np
import tensorflow as tf

%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt



In [2]:
# MNIST data
import tensorflow.examples.tutorials.mnist.input_data as data
mnist = data.read_data_sets('MNIST_data/', one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [3]:
# Create a placeholders for input images and input image labels
n_input = mnist.train.images.shape[1]
n_output = 10
x = tf.placeholder(tf.float32, [None, n_input]) # First dim = None for mini-batch
y = tf.placeholder(tf.float32, [None, n_output])

In [4]:
# CNN requires 4D tensor: [N, H, W, C]
# Shape of x is 2D: [batch, height * width]

side = int(np.sqrt(mnist.train.images.shape[1]))

# Tensor shape "SPECIAL" value: -1 (CHECK: help(tf.reshape))
x_tensor = tf.reshape(x, [-1, side, side, 1]) # -1 makes total size constant

In [5]:
# Weight and bias initializer
def W_variable(shape):
    init = tf.random_normal(shape, mean=0.0, stddev=0.01)
    return tf.Variable(init)

def b_variable(shape):
    init = tf.random_normal(shape, mean=0.0, stddev=0.01)
    return tf.Variable(init)

**NOTE:** TensorFlow reqires image kernels/filters to be 4 dimensional (Filter Height x Filter Width x Number of Input Channels x Number of Output Channels)

In [6]:
# First convolutional layer
filter_size = 5
n_filters_1 = 16
C = 1
W_conv_1 = W_variable([filter_size, filter_size, C, n_filters_1])
b_conv_1 = b_variable([n_filters_1])

In [7]:
# Create computational graph to perform first layer of convolution
z_conv_1 = tf.add(tf.nn.conv2d(input=x_tensor, filter=W_conv_1, 
                               strides=[1, 2, 2, 1], padding='SAME'), b_conv_1)
a_conv_1 = tf.nn.relu(z_conv_1)

In [8]:
# Second convolutional layer
n_filters_2 = 8
W_conv_2 = W_variable([filter_size, filter_size, n_filters_1, n_filters_2])
b_conv_2 = b_variable([n_filters_2])

In [9]:
# Create computational graph to perform second layer of convolution
z_conv_2 = tf.add(tf.nn.conv2d(input=a_conv_1, filter=W_conv_2, 
                               strides=[1, 2, 2, 1], padding='SAME'), b_conv_2) 
a_conv_2 = tf.nn.relu(z_conv_2)

In [10]:
# Reshape activation output of second convolution layer to connect to a fully connected layer
a_conv_2_flat = tf.reshape(a_conv_2, [-1, 7 * 7 * n_filters_2])

In [11]:
# Create a fully connected layer
n_fc = 256
W_fc_1 = W_variable([7 * 7 * n_filters_2, n_fc])
b_fc_1 = b_variable([n_fc])
z_fc_1 = tf.add(tf.matmul(a_conv_2_flat, W_fc_1), b_fc_1)
a_fc_1 = tf.nn.relu(z_fc_1)

In [12]:
# Add dropout for regularize 
keep_prob = tf.placeholder(tf.float32)
a_fc_1_drop = tf.nn.dropout(a_fc_1, keep_prob=keep_prob)

In [13]:
# Softmax layer
W_fc_2 = W_variable([n_fc, n_output])
b_fc_2 = b_variable([n_output])
z_fc_2 = tf.add(tf.matmul(a_fc_1_drop, W_fc_2), b_fc_2)
y_pred = tf.nn.softmax(z_fc_2)

In [14]:
# Cross entropy loss and ADAM optimizer
loss = -tf.reduce_sum(y * tf.log(y_pred))
optimizer = tf.train.AdamOptimizer().minimize(loss)

In [15]:
# Accuracy
y_hat = tf.argmax(y_pred, dimension=1)
y_true = tf.argmax(y, dimension=1)
correct_pred = tf.equal(y_hat, y_true)
accuracy = tf.reduce_mean(tf.cast(correct_pred, dtype=tf.float32))

In [16]:
# Create a session to use the graph
ses = tf.Session()

# Initialize all variables
ses.run(tf.initialize_all_variables())

In [17]:
# Train
batch_size = 100
n_epochs = 10
batches = mnist.train.num_examples//batch_size

for epoch in range(n_epochs):
    for batch in range(batches):
        batch_x, batch_y = mnist.train.next_batch(batch_size=batch_size)
        ses.run(optimizer, feed_dict={x: batch_x, y: batch_y, keep_prob:0.5})
    acc = ses.run(accuracy, feed_dict={x: mnist.validation.images,
                                       y: mnist.validation.labels,
                                       keep_prob: 1.0})
    print 'Epoch-{} Validation Accuracy: {}'.format(epoch, acc)
    
print 'Test Accuracy: ', ses.run(accuracy, feed_dict={x: mnist.test.images,
                                                      y: mnist.test.labels,
                                                      keep_prob: 1.0})

Epoch-0 Validation Accuracy: 0.944199979305
Epoch-1 Validation Accuracy: 0.967999994755
Epoch-2 Validation Accuracy: 0.975600004196
Epoch-3 Validation Accuracy: 0.0957999974489
Epoch-4 Validation Accuracy: 0.0957999974489
Epoch-5 Validation Accuracy: 0.0957999974489
Epoch-6 Validation Accuracy: 0.0957999974489
Epoch-7 Validation Accuracy: 0.0957999974489
Epoch-8 Validation Accuracy: 0.0957999974489
Epoch-9 Validation Accuracy: 0.0957999974489
Test Accuracy:  0.098


In [18]:
# Visualize first layer filters
W = ses.run(W_conv_1)