In [1]:
import numpy as np
import tensorflow as tf
from PIL import Image

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) # reads mnist data

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [3]:
x = tf.placeholder(tf.float32, [None, 784])
# x -> placeholder, value which will be inputed when we ask TensorFlow to run a computation
# dimension is 784 because each image is 28x28
# None is beacuse we want to be able to recieve any number of input images

In [4]:
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
# these dimensions, so that we can go x * W + b

In [5]:
# For our cost function, we will use cross entropy
y_ = tf.placeholder(tf.float32, [None, 10]) # place holder for true labels, one_hot encoded vector
# y_ REAL VALUES
# y PREDICTED VALUES

In [6]:
y = tf.matmul(x, W) + b
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y)) ## more stable

In [7]:
# Tensorflow knows the graph of whole model, so we can easily use backpropagation
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) # gradient descent with learning rate 0.5

In [33]:
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

In [34]:
for _ in range(1000):
    batch_xs, batch_ys = mnist.train.next_batch(100)
    sess.run(train_step, feed_dict={x: batch_xs, y_:batch_ys})

In [35]:
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
print correct_prediction

Tensor("Equal_1:0", shape=(?,), dtype=bool)


In [36]:
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [37]:
print sess.run(accuracy, feed_dict={x: mnist.test.images, y_:mnist.test.labels})

0.9149


We will create a small CNN from here on

In [52]:
sess = tf.InteractiveSession()

In [53]:
def weight_variable(shape):
    # Because of reLU activations, we will initialize weights as positive
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

### Convolution and Pooling

In [54]:
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

In [55]:
# First convolutional layer
W_conv1 = weight_variable([5, 5, 1, 32]) # 32 features, width = height = 5
b_conv1 = bias_variable([32])

In [56]:
x_image = tf.reshape(x, [-1, 28, 28, 1]) # 2nd arg = width, 3rd arg = height, 4th arg = no of channels 

In [57]:
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1) 
# convlove the image x_image with the weight tensor and max pool the result
# dimensions of h_pool1 is 14x14

In [58]:
# Second convolutional layer
W_conv2 = weight_variable([5, 5, 32, 64]) # 64 features for each 5x5 patch
b_conv2 = bias_variable([64])

In [59]:
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
# Image is now of dimensions 7x7

In [60]:
# Densely connected variable, fully connected layer
W_fc1 = weight_variable([7 * 7 * 64, 1024]) # 1024 neurons
b_fc1 = bias_variable([1024])

In [61]:
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

In [62]:
# Dropout layer
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

In [63]:
# Readout layer, final layer
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])

y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2

* We will use the ADAM optimizer
* Ever 100th iteration will be logged

In [66]:
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess.run(tf.global_variables_initializer())

for i in range(2000):
    batch = mnist.train.next_batch(50)
    if i % 100 == 0:
        train_accuracy = accuracy.eval(feed_dict={x: batch[0], y_:batch[1], keep_prob:1.0})
        print "step %d, training accuracy %g"%(i, train_accuracy)
    train_step.run(feed_dict={x: batch[0], y_:batch[1], keep_prob:0.5})

print "Test accuracy: %g"%(accuracy.eval(feed_dict={x: mnist.test.images, y_:mnist.test.labels, keep_prob:1.0}))

step 0, training accuracy 0.14
step 100, training accuracy 0.78
step 200, training accuracy 0.9
step 300, training accuracy 0.88
step 400, training accuracy 0.92
step 500, training accuracy 0.9
step 600, training accuracy 0.92
step 700, training accuracy 0.98
step 800, training accuracy 0.96
step 900, training accuracy 0.96
step 1000, training accuracy 0.92
step 1100, training accuracy 0.96
step 1200, training accuracy 0.98
step 1300, training accuracy 0.94
step 1400, training accuracy 0.96
step 1500, training accuracy 1
step 1600, training accuracy 0.96
step 1700, training accuracy 0.94
step 1800, training accuracy 0.98
step 1900, training accuracy 0.94
Test accuracy: 0.975
