In [1]:
## This notebook trains a CNN on Mnist data set with tensorflow


import tensorflow as tf
import numpy as np
from datetime import datetime

In [2]:
## Import data
from tensorflow.examples.tutorials.mnist import input_data
mnist= input_data.read_data_sets('/tmp/data/')

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [3]:
## Placeholder for input data

X=tf.placeholder(tf.float32,shape=[None,28,28,1],name='X')
Y=tf.placeholder(tf.int64,name='Y')

In [4]:
## Creat a place to save the log files, which can be used for visualization in the tensorboard

from datetime import datetime

now=datetime.utcnow().strftime('%Y%m%d%H%M%S')
root_logdir='CNN_Mnist2'
logdir='{}/run-{}/'.format(root_logdir,now)

In [5]:
# Define convolution layer
def con_layer(X,size_out,name):
    with tf.name_scope(name):
        size_in=int(X.get_shape()[3])
        w=tf.Variable(tf.truncated_normal((3, 3, size_in, size_out), stddev=0.1),name='weights')
        b=tf.Variable(tf.zeros(size_out),name='bias')
        conv=tf.nn.conv2d(X,w,strides=[1,2,2,1],padding='SAME')
        act = tf.nn.relu(conv + b)
        tf.summary.histogram("weights", w)
        tf.summary.histogram("biases", b)
        ##so we can visualize the distributions of activations coming off this layer
        tf.summary.histogram("activations", act)
        return act

In [6]:
# Define Max_pooling layer
def pooling(X,height,width,name):
    with tf.name_scope(name):    
        return tf.nn.max_pool(X,ksize=[1,height,width,1],strides=[1,2,2,1],padding='VALID')

In [7]:
# Define Fully_connected layer
def fully_connect(X,n_neurons,name,activation=None):
    with tf.name_scope(name):
        n_inputs=int(X.get_shape()[1])
        stddev=2/np.sqrt(n_inputs)
        w=tf.Variable(tf.truncated_normal([n_inputs,n_neurons],stddev=stddev),name='weights')
        b=tf.Variable(tf.constant(0.1,shape=(n_neurons,)),name='bias')
        z=tf.matmul(X,w)+b
        tf.summary.histogram("weights", w)
        tf.summary.histogram("biases", b)
        if activation=='relu':
            tf.summary.histogram("activations", tf.nn.relu(z))
            return tf.nn.relu(z)
        else:
            return z

In [8]:
# Building the CNN
with tf.name_scope('CNN'):
    hidden1=con_layer(X,12,'Conv_1')
    hidden2=pooling(hidden1,3,3,'Pooling_1')
    hidden3=con_layer(hidden2,36,'Conv_2')
    hidden4=con_layer(hidden3,64,'Conv_3')
    fc_input=tf.reshape(hidden4,[-1,int(hidden4.get_shape()[1])*int(hidden4.get_shape()[2])*int(hidden4.get_shape()[3])])
    hidden5=fully_connect(fc_input,84,'fc_1',activation='relu')
    hidden6=fully_connect(hidden5,10,'fc_2')

In [9]:
## Define loss function

with tf.name_scope('Loss'):
    entropy=tf.nn.sparse_softmax_cross_entropy_with_logits(labels=Y,logits=hidden6)
    loss=tf.reduce_mean(entropy,name='loss')

In [10]:
## Define optimizer and training step
with tf.name_scope('Train'):
    optimizer=tf.train.AdamOptimizer(learning_rate=0.01,epsilon=0.1)
    train_op=optimizer.minimize(loss)

In [11]:
## Define evaluation metrics
with tf.name_scope('eval'):
    correct=tf.nn.in_top_k(hidden6,Y,1)
    accuracy=tf.reduce_mean(tf.cast(correct,tf.float32))

In [12]:
## Creat summary protocol buffer for visualization in the tensorboard
loss_summary=tf.summary.scalar('Loss',loss)
acc_train_summary=tf.summary.scalar('train_acc',accuracy)

In [13]:
merged = tf.summary.merge_all()
train_file_writer=tf.summary.FileWriter(logdir+'/train', tf.get_default_graph())
test_file_writer=tf.summary.FileWriter(logdir+'/test', tf.get_default_graph())

In [14]:
acc_test_summary=tf.summary.scalar('test_acc',accuracy)

In [16]:
## t
saver=tf.train.Saver()
init=tf.global_variables_initializer()
n_epochs=500
batch_size=50
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples//batch_size):
            x_batch, y_batch=mnist.train.next_batch(batch_size)
            sess.run(train_op,feed_dict={X:np.reshape(x_batch,(-1,28,28,1)),Y:y_batch})
        summary_str_train=merged.eval(feed_dict={X:np.reshape(x_batch,(-1,28,28,1)),Y:y_batch})
        summary_str_test=acc_test_summary.eval(feed_dict={X:np.reshape(mnist.test.images,(-1,28,28,1)),Y:mnist.test.labels})
        train_file_writer.add_summary(summary_str_train,epoch)
        test_file_writer.add_summary(summary_str_test,epoch)
    

In [None]:
train_file_writer.close()
test_file_writer.close()