In [14]:
import tensorflow as tf
import numpy as np
import time
from sklearn.utils import shuffle
old_v = tf.logging.get_verbosity()
tf.logging.set_verbosity(tf.logging.ERROR)


<h1>Extract MNIST data</h1>

In [15]:
from tensorflow.examples.tutorials.mnist import input_data
#get mnist data, with one_hot encoding, reshape = False (that means images are not flatten)
mnist = input_data.read_data_sets("MNIST_data/",reshape=False,one_hot=True)
#suppress warnings
tf.logging.set_verbosity(old_v)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


<h1>Prepare training, validation and testing data</h1>

In [16]:
x_train, y_train           = mnist.train.images, mnist.train.labels
x_validation, y_validation = mnist.validation.images, mnist.validation.labels
x_test, y_test             = mnist.test.images, mnist.test.labels

#pad images with 0s (28x28 to 32x32)
x_train=np.pad(x_train,((0,0),(2,2),(2,2),(0,0)),'constant')
x_validation=np.pad(x_validation, ((0,0),(2,2),(2,2),(0,0)),'constant')
x_test=np.pad(x_test,((0,0),(2,2),(2,2),(0,0)),'constant')


<h1>Define hyperparameter</h1>

In [17]:
#learning rate
lr = 0.01
#number of traning steps
num_steps =10
#number of batch_size
batch_size = 256

#network parameters
#hidden_layers=5
n_hidden_1_filters = 6
n_hidden_2_filters = 16
n_hidden_3=120
n_hidden_4=84
num_input = 32
num_classes = 10

In [18]:
tf.reset_default_graph()

LeNet-5 Architecture
Input:32 by 32

Convolution Layer 1: # of Filters nc=6, Filter size f=5, Padding p=0, Stride, s=1 Thus output is (n^(l-1)+2p-f)/s+1=(32-5)/1+1=28 by 28 by 6

Pooling Layer 1: # of filter nc=6, filter size f=2, padding p=0, stride s=2 The output is (28-2)/2+1=14. So 14 by 14 by 6

Covolutional Layer 2: # of Filters nc=16, Filter size f=5, Padding p=0, Stride, s=1 Thus output is (n^(l-1)+2p-f)/s+1=(-5)/1+1=28 by 28 by 6. The output is (14-5)/1+1=10. So 10 by 10 by 16

Pooling Layer 2: # of filters nc=16, filter size f=2, padding p=0, stride s=2 The output is (10-2)/2+1=5. So 5 by 5 by 16.

Fully Connect layer 3
We flatten it to be 5x5x16=400 nodes

Fully Connected Layer 4
It will be 84 nodes

Output Layer 5
It will be 10 since we have 10 possible values


<h1>Placeholder</h1>

In [19]:
X = tf.placeholder(tf.float32,[None,num_input,num_input,1],name='X')
Y = tf.placeholder(tf.int32,[None,num_classes],name='Y')

#each filter: f x f x nc(l-1)
#weights: f x f x nc(l-1) x nc(l)
#bias: nc(l)

#Layers weight & bias
weights = {
    'W1_conv1': tf.Variable(tf.random_normal([5,5,1,6]),name='W1_conv1'),
    'W2_conv2': tf.Variable(tf.random_normal([5,5,6,16]),name='W2_conv2'),
    'W3': tf.Variable(tf.random_normal([400, n_hidden_3]),name='W3'),
    'W4': tf.Variable(tf.random_normal([120, n_hidden_4]),name='W4'),
    'Wout': tf.Variable(tf.random_normal([n_hidden_4, num_classes]),name='Wout')
}

biases = {
    'b1_conv1': tf.Variable(tf.zeros(shape=[n_hidden_1_filters]),name='b1_conv1'),
    'b2_conv2': tf.Variable(tf.zeros(shape=[n_hidden_2_filters]),name='b2_conv2'),
    'b3': tf.Variable(tf.zeros(shape=[n_hidden_3]),name='b3'),
    'b4': tf.Variable(tf.zeros(shape=[n_hidden_4]),name='b4'),

    'bout': tf.Variable(tf.zeros(shape=[num_classes]),name='bout')
} 

In [20]:
np.shape(X)[0]

Dimension(None)

<h1>Define LeNet-5</h1>

In [21]:
#define a neural net model
def lenet(x):
    #k=np.shape(x)[0]
    conv_1_out = tf.nn.relu(tf.add(tf.nn.conv2d(x,weights['W1_conv1'],strides=[1,1,1,1],padding='VALID'),biases['b1_conv1']))
    pool_1_out=tf.nn.avg_pool(conv_1_out,ksize=[1,2,2,1], strides=[1,2,2,1],padding='VALID')
    conv_2_out = tf.nn.relu(tf.add(tf.nn.conv2d(pool_1_out,weights['W2_conv2'],strides=[1,1,1,1],padding='VALID'),biases['b2_conv2']))
    pool_2_out=tf.nn.avg_pool(conv_2_out,ksize=[1,2,2,1], strides=[1,2,2,1],padding='VALID')
    pool_2_flatten=tf.reshape(pool_2_out,[-1,5*5*16])
    layer_3_out = tf.nn.relu(tf.add(tf.matmul(pool_2_flatten,weights['W3']),biases['b3']))
    layer_4_out = tf.nn.relu(tf.add(tf.matmul(layer_3_out,weights['W4']),biases['b4']))

    out = tf.add(tf.matmul(layer_4_out,weights['Wout']),biases['bout'])
    return out

<h1>Cost and optimization</h1>

In [22]:
#predicted labels
logits = lenet(X)

#define loss
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,labels=Y),name='loss')
#define optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=lr)
train_op = optimizer.minimize(loss)

#compare the predicted labels with true labels
correct_pred = tf.equal(tf.argmax(logits,1),tf.argmax(Y,1))

#compute the accuracy by taking average
accuracy = tf.reduce_mean(tf.cast(correct_pred,tf.float32),name='accuracy')

#Initialize the variables
init = tf.global_variables_initializer()


# loss_summary=tf.summary.scalar('loss',loss)
# accuracy_summary=tf.summary.scalar('accuracy',accuracy)
# #file_writer=tf.summary.FileWriter(logdir,tf.get_default_graph())

<h1>Training, validating, testing</h1>
<h2>1. Print out validation accuracy after each training epoch</h2>
<h2>2. Print out training time on each epoch</h2>
<h2>3. Print out testing accuracy</h2>

In [23]:
with tf.Session() as sess:
    sess.run(init)
    num=len(x_train)
    for i in range(num_steps):
        #fetch batch
        x_train, y_train=shuffle(x_train, y_train)
        acc=[]
        st=time.time()
        for j in range(0,num,batch_size):
            k=j+batch_size
            batch_x, batch_y = x_train[j:k], y_train[j:k]
        #run optimization
            sess.run(train_op, feed_dict={X:batch_x, Y:batch_y})

            acc.append(sess.run(accuracy,feed_dict={X:batch_x, Y:batch_y}))
        mid=time.time()
        print("Epoch "+str(i)+", Time:{} ".format(mid-st)+", Training Accuracy= {:.3f}".format(np.average(acc)))
        
        val_acc=(sess.run(accuracy,feed_dict={X:x_validation, Y:y_validation}))
        print("Epoch "+str(i)+", Validation Accuracy= {:.3f}".format(np.average(val_acc)))

    print("Training finished!")
    tes=sess.run(accuracy, feed_dict={X:x_test, Y:mnist.test.labels})
    print("Testing Accuracy:",tes)
    #testing_accuracy.append(tes)
#file_writer.close()

Epoch 0, Time:101.93583416938782 , Training Accuracy= 0.819
Epoch 0, Validation Accuracy= 0.909
Epoch 1, Time:111.10178709030151 , Training Accuracy= 0.924
Epoch 1, Validation Accuracy= 0.931
Epoch 2, Time:96.2876603603363 , Training Accuracy= 0.943
Epoch 2, Validation Accuracy= 0.941
Epoch 3, Time:107.33812761306763 , Training Accuracy= 0.954
Epoch 3, Validation Accuracy= 0.945
Epoch 4, Time:106.95116114616394 , Training Accuracy= 0.960
Epoch 4, Validation Accuracy= 0.952
Epoch 5, Time:107.67722177505493 , Training Accuracy= 0.966
Epoch 5, Validation Accuracy= 0.954
Epoch 6, Time:107.37203526496887 , Training Accuracy= 0.971
Epoch 6, Validation Accuracy= 0.956
Epoch 7, Time:76.16144943237305 , Training Accuracy= 0.975
Epoch 7, Validation Accuracy= 0.958
Epoch 8, Time:68.68642592430115 , Training Accuracy= 0.979
Epoch 8, Validation Accuracy= 0.960
Epoch 9, Time:68.46801114082336 , Training Accuracy= 0.982
Epoch 9, Validation Accuracy= 0.959
Training finished!
Testing Accuracy: 0.9578


# Insights

Here we were to implement the LeNet-5 model in TensorFlow. With this we were able to achieve a testing accuracy of 95% with a learning rate of .01. We should have been able to get to around 99% accuracy, but since the homework focused on creating the actual LeNet architecture, I didn't worry as much about the testing accuracy. 