# MNIST Classifier
##### MNIST classifier using Convolutional Neural Network
##### Link to official tutorial: https://www.tensorflow.org/versions/r1.2/get_started/mnist/pros

In [2]:
# load mnist data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [3]:
# import tensorflow for obvious reasons
import tensorflow as tf

In [4]:
# create a placeholder for input data
# data will be fed into tensorflow placeholder once a session is run
x = tf.placeholder(tf.float32, [None, 784])

In [5]:
# create weights ad biases with a small amount of noise
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

In [6]:
# convolution and pooling
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

In [7]:
# initialize weights and biases
W_conv1 = weight_variable([5,5,1,32])
b_conv1 = bias_variable([32])
W_conv2 = weight_variable([5,5,32,64])
b_conv2 = bias_variable([64])
W_fc1 = weight_variable([3136,1024])
b_fc1 = bias_variable([1024])
W_fc2 = weight_variable([1024,512])
b_fc2 = bias_variable([512])
W_fc3 = weight_variable([512,10])
b_fc3 = bias_variable([10])

In [8]:
# reshape x
x_images = tf.reshape(x, shape=[-1, 28, 28, 1])

In [9]:
# add dropout to conv layers
keep_prob_conv = tf.placeholder(tf.float32)

In [10]:
# conv layer 1
h_conv1 = conv2d(x_images, W_conv1)
h_conv1_activated = tf.nn.relu(h_conv1 + b_conv1)
h_pool1 = max_pool_2x2(h_conv1_activated)
h_pool1_dropout = tf.nn.dropout(h_pool1, keep_prob_conv)
# conv layer 2
h_conv2 = conv2d(h_pool1_dropout, W_conv2)
h_conv2_activated = tf.nn.relu(h_conv2 + b_conv2)
h_pool2 = max_pool_2x2(h_conv2_activated)
h_pool2_dropout = tf.nn.dropout(h_pool2, keep_prob_conv)

In [11]:
# flatten output from conv layers
x_fc = tf.reshape(h_pool2, shape=[-1, 7*7*64])

In [12]:
# add dropout to fc layers
keep_prob = tf.placeholder(tf.float32)

In [13]:
# fully conected layer 1
h_fc1 = tf.matmul(x_fc, W_fc1) + b_fc1
h_fc1_activated = tf.nn.relu(h_fc1)
h_fc1_dropout = tf.nn.dropout(h_fc1_activated, keep_prob)
# fully conected layer 2
h_fc2 = tf.matmul(h_fc1_dropout, W_fc2)
h_fc2_activated = tf.nn.relu(h_fc2)
h_fc2_dropout = tf.nn.dropout(h_fc2_activated, keep_prob)
# fully conected layer 3
y = tf.matmul(h_fc2_dropout, W_fc3)

In [14]:
# create a placeholder to input correct answers
y_ = tf.placeholder(tf.float32, [None, 10])

In [15]:
# define the cost function 
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=y, labels=y_))

In [16]:
# tell the tensorflow computational graph to minimize the cost using gradient descent
train = tf.train.AdamOptimizer().minimize(cost)

In [17]:
# initialize tensorflow interactive session
sess = tf.InteractiveSession()

In [18]:
# initialize variables inside the graph
tf.global_variables_initializer().run()

In [19]:
# train the model! (uses mini-batch)
num_epochs = 30
batch_size = 100
for epoch in range(num_epochs):
    epoch_loss = 0
    for _ in range(int(mnist.train.num_examples/batch_size)):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        t, c = sess.run([train, cost], feed_dict={x: batch_xs, y_: batch_ys, keep_prob: 0.5, keep_prob_conv: 0.25})
        epoch_loss += c
    print(f"Epoch {epoch+1} out of {num_epochs}, loss: {epoch_loss}")

Epoch 1 out of 30, loss: 1099.911477714777
Epoch 2 out of 30, loss: 199.05607546120882
Epoch 3 out of 30, loss: 126.77534003928304
Epoch 4 out of 30, loss: 96.00221067667007
Epoch 5 out of 30, loss: 78.03182378411293
Epoch 6 out of 30, loss: 69.88709651771933
Epoch 7 out of 30, loss: 60.6324864840135
Epoch 8 out of 30, loss: 54.239332656841725
Epoch 9 out of 30, loss: 47.37119508301839
Epoch 10 out of 30, loss: 45.24328855331987
Epoch 11 out of 30, loss: 42.27774349111132
Epoch 12 out of 30, loss: 39.34553468413651
Epoch 13 out of 30, loss: 36.46979158301838
Epoch 14 out of 30, loss: 34.59762412670534
Epoch 15 out of 30, loss: 33.16444252850488
Epoch 16 out of 30, loss: 31.264316155808046
Epoch 17 out of 30, loss: 31.819046836462803
Epoch 18 out of 30, loss: 29.074158981675282
Epoch 19 out of 30, loss: 30.246673419489525
Epoch 20 out of 30, loss: 26.465616556815803
Epoch 21 out of 30, loss: 26.42076426348649
Epoch 22 out of 30, loss: 25.39834358112421
Epoch 23 out of 30, loss: 26.45360

In [20]:
# predict output using our model
# returns a boolean array
predictions = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))

In [21]:
# cast the boolean array to float and calculate accuracy
accuracy = tf.reduce_mean(tf.cast(predictions, tf.float32))

In [22]:
# print accuracy
print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0, keep_prob_conv: 0.25}))

0.9889


###### 98.89% accuracy is ot that bad. Best models can get to over 99.7% accuracy!