In [1]:
import tensorflow as tf
import numpy as np

### LeNet Architecture
<img src='../pics/lenet0.png'>
<img src='../pics/lenet6.png' width='1000px'>

### LeNet First Two Layers
<img src='../pics/lenet_first_two_convs.png'>

* In the above diagram, you can see how the input image is processed in the first convolutional layer using the filter weights. 
* This results in 32 new images, one for each filter in the convolutional layer. 
* The images are also down-sampled with the pooling operation, so the image resolution is decreased from 28×28 to 14×14. 
* These 32 smaller images are then processed in the second convolutional layer. 
* We need filter weights again for each of these 32 images and we need filter weights for each output channel of this layer. 
* The images are again down-sampled with a pooling operation, so that the image resolution is decreased from 14×14 to 7×7. 
* The total number of features for this convolutional layer is 64.

### LeNet Last Three Convolutional Layers
<img src='../pics/lenet_last_three.jpg'>

* The 64 resulting images are filtered again by a (3×3) third convolutional layer. No pooling operation is applied to this layer. 
* The output of the third convolutional layer is 128 7×7-pixel images. 
* These images are then flattened to become a single vector, of length 4×4×128 = 2048, which is used as input to a fully connected layer.
* The output layer has 625 neurons as input (that is, the output of the fully connected layer), and 10 neurons as output, since there are 10 classes of images that we are deciding between.

### LeNet Architecture Again
<img src='../pics/lenet3.png'>

In [2]:
def model(X, w, w2, w3, w4, w_o, p_keep_conv, p_keep_hidden):

    conv1 = tf.nn.conv2d(X, w,\
                         strides=[1, 1, 1, 1],\
                         padding='SAME')

    conv1_a = tf.nn.relu(conv1)
    conv1 = tf.nn.max_pool(conv1_a, ksize=[1, 2, 2, 1]\
                        ,strides=[1, 2, 2, 1],\
                        padding='SAME')
    conv1 = tf.nn.dropout(conv1, p_keep_conv)

    conv2 = tf.nn.conv2d(conv1, w2,\
                         strides=[1, 1, 1, 1],\
                         padding='SAME')
    conv2_a = tf.nn.relu(conv2)
    conv2 = tf.nn.max_pool(conv2_a, ksize=[1, 2, 2, 1],\
                        strides=[1, 2, 2, 1],\
                        padding='SAME')
    conv2 = tf.nn.dropout(conv2, p_keep_conv)

    conv3=tf.nn.conv2d(conv2, w3,\
                       strides=[1, 1, 1, 1]\
                       ,padding='SAME')

    conv3 = tf.nn.relu(conv3)


    FC_layer = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1],\
                        strides=[1, 2, 2, 1],\
                        padding='SAME')
    
    FC_layer = tf.reshape(FC_layer,\
                          [-1, w4.get_shape().as_list()[0]])    
    FC_layer = tf.nn.dropout(FC_layer, p_keep_conv)


    output_layer = tf.nn.relu(tf.matmul(FC_layer, w4))
    output_layer = tf.nn.dropout(output_layer, p_keep_hidden)

    result = tf.matmul(output_layer, w_o)
    return result

In [3]:
from mnist_loader import read_data_sets
mnist = read_data_sets('../data/mnist', one_hot=True)

trX, trY, teX, teY = mnist.train.images,\
                     mnist.train.labels, \
                     mnist.test.images, \
                     mnist.test.labels

batch_size = 128
test_size = 256
img_size = 28
num_classes = 10
            
trX = trX.reshape(-1, img_size, img_size, 1)  # 28x28x1 input img
teX = teX.reshape(-1, img_size, img_size, 1)  # 28x28x1 input img

X = tf.placeholder("float", [None, img_size, img_size, 1])
Y = tf.placeholder("float", [None, num_classes])

# The init_weights function builds new variables in the shape provided 
# and initializes the network's weights with random values:
def init_weights(shape):
    return tf.Variable(tf.random_normal(shape, stddev=0.01))

# Each neuron of the first convolutional layer is convoluted to 
# a small subset of the input tensor, with the dimensions 3×3×1. 
# The value 32 is just the number of feature maps we are considering 
# for this first layer. The weight, w, is then defined:
w = init_weights([3, 3, 1, 32])       # 3x3x1 conv, 32 outputs


w2 = init_weights([3, 3, 32, 64])     # 3x3x32 conv, 64 outputs
w3 = init_weights([3, 3, 64, 128])    # 3x3x32 conv, 128 outputs
w4 = init_weights([128 * 4 * 4, 625]) # FC 128 * 4 * 4 inputs, 625 outputs
w_o = init_weights([625, num_classes])# FC 625 inputs, 10 outputs (labels)

p_keep_conv = tf.placeholder("float")
p_keep_hidden = tf.placeholder("float")
py_x = model(X, w, w2, w3, w4, w_o, p_keep_conv, p_keep_hidden)

Y_ = tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y,logits=py_x)

cost = tf.reduce_mean(Y_)
optimizer  = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
predict_op = tf.argmax(py_x, 1)

Extracting ../data/mnist/train-images-idx3-ubyte.gz
Extracting ../data/mnist/train-labels-idx1-ubyte.gz
Extracting ../data/mnist/t10k-images-idx3-ubyte.gz
Extracting ../data/mnist/t10k-labels-idx1-ubyte.gz


In [4]:
print_every = 2
num_epochs = 20
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True,log_device_placement=True))
#with tf.Session() as sess:
with sess.as_default():
    tf.global_variables_initializer().run()
    for i in range(num_epochs):
        training_batch = \
                       zip(range(0, len(trX), \
                                 batch_size),
                             range(batch_size, \
                                   len(trX)+1, \
                                   batch_size))
        for start, end in training_batch:
            sess.run(optimizer , feed_dict={X: trX[start:end],\
                                          Y: trY[start:end],\
                                          p_keep_conv: 0.8,\
                                          p_keep_hidden: 0.5})

        test_indices = np.arange(len(teX)) # Get A Test Batch
        np.random.shuffle(test_indices)
        test_indices = test_indices[0:test_size]

        if (i+1) % print_every == 0:
            ACCURACY = np.mean(np.argmax(teY[test_indices], axis=1) ==\
                             sess.run(predict_op,\
                              feed_dict={X: teX[test_indices],\
                                         Y: teY[test_indices], \
                                         p_keep_conv: 1.0,\
                                         p_keep_hidden: 1.0}))

            print("Epoch: ",i," Accuracy: ",ACCURACY)

Epoch:  1  Accuracy:  0.984375
Epoch:  3  Accuracy:  0.984375


KeyboardInterrupt: 