In [3]:
# Automatically reload changes to external code
%load_ext autoreload
%autoreload 2


#A Convolutional Network implementation example using TensorFlow library.
#This example is using the MNIST database of handwritten digits
#(http://yann.lecun.com/exdb/mnist/)

#Author: Aymeric Damien
#Project: https://github.com/aymericdamien/TensorFlow-Examples/

# Based on above project, modified by James Chan

In [4]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from mnist import loader #loader for mnist dataset
from matplotlib import pyplot as plt
import numpy as np
import pdb, time, cv2

# Mnist dataset

The MNIST database of handwritten digits. [[website]](http://yann.lecun.com/exdb/mnist/)<br>
There are **60,000** training images and **10,000** testing images in this dataset.<br>
Each digit is a one-channel image. Size of image = 28*28 = 784.

![](imgs/mnist_ex.png)

There are some build-in mnist function can be used in tensorflow.

Ex.<br>
from tensorflow.examples.tutorials.mnist import input_data<br>
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

Instead of using these functions, I'll use the orginal dataset manually in this code.<br>
It's more clear to trace the data-processing.

When we load the data


In [5]:
# load mnist data manually
# loading 'train' or 'test' data
# ex. load_mnist_data('train')
# return images, labels and mean of all images. (But, we'll only use the mean of training data.)
# ims: [N * 784]
# labels: [N]
# ims_mean: [784]

mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

def load_mnist_data(flag, data_path='data'):
    data_loader = loader.MNIST(data_path)
    if flag == 'train':
        ims, labels = data_loader.load_training()
    elif flag == 'test':
        ims, labels = data_loader.load_testing()
    else:
        raise ValueError("Error. Only training or testing data.")
    ims = ims/255.0
    ims_mean = np.mean(ims, axis=0)
    return ims, labels, ims_mean

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [39]:
# Parameters
learning_rate = 0.001  # default = 0.001
training_epochs = 1 # default = 1
batch_size = 64   # training batch size, default = 64
test_batch_size = 100
display_step = 50  # testing, default = 50

# Network Parameters
n_input = 784  # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)
stddev=0.01    # standard deviation for random initialization


# Functions of Convolutional Neural Network

In [42]:
# Conv2D wrapper, with bias and given activation
def convolution_layer(x, kernel_shape, bias_shape, activation_function=None, strides=1):
    # kernel_shape: [kernel_height, kernel_width, input_channel, output_channel]
    weight = tf.Variable(tf.truncated_normal(kernel_shape, mean=0, stddev=stddev))
    bias = tf.Variable(tf.random_normal(bias_shape))
    output = tf.add(tf.nn.conv2d(x, weight, strides=[1, strides, strides, 1], padding='SAME'), bias)
    if activation_function == None:
        return output
    else:
        return activation_function(output)

# MaxPool2D wrapper
def maxpooling_layer(x, k=2):
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')

# Fully-connected wrapper, with bias and given activation
def fully_connected_layer(x, kernel_shape, bias_shape, activation_function=None):    
    weight = tf.Variable(tf.truncated_normal(kernel_shape, mean=0, stddev=stddev))
    bias = tf.Variable(tf.random_normal(bias_shape))
    output = tf.add(tf.matmul(x, weight), bias)
    if activation_function == None:
        return output
    else:
        return activation_function(output)

                 
def lenet():
    
    # tf Graph input
    x = tf.placeholder(tf.float32, [None, n_input])  # mnist input images, [batch_size x 784]
    y = tf.placeholder(tf.int32,[None])              # label, [batch_size]
    dropout = tf.placeholder(tf.float32)  #dropout ratio
    
    # Construct model
    x_reshape = tf.reshape(x, shape=[-1, 28, 28, 1]) # Transfer shape. Prepare for convolution

    ######## PART I #########
    # Convolution Layer (5x5 conv, 1 input, 32 outputs)
    conv1 = ???
    # Max Pooling (down-sampling)
    pool1 = ???

    # Convolution Layer (5x5 conv, 32 inputs, 64 outputs)
    conv2 = ???
    # Max Pooling (down-sampling)
    pool2 = ???

    ######## PART II #########
    # Fully connected layer (7*7*64 inputs, 100 outputs)
    # Reshape conv2 output to fit fully connected layer input
    fc1 = tf.reshape(pool2, [-1, 7*7*64])
    fc1 = ???

    # Fully connected layer(Output layer, 1024 inputs, 10 outputs (class prediction))
    # Map the fc1 to the number of class prediction                        
    logits = ???
    probs = tf.nn.softmax(logits)

    # Define loss and optimizer
    one_hot_y = tf.one_hot(y, n_classes, on_value=1, off_value=0, axis=-1)
    one_hot_y = tf.cast(one_hot_y, tf.float32)
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=one_hot_y))
        
    # Evaluate model
    correct_pred = tf.equal(tf.argmax(probs, 1), tf.argmax(one_hot_y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))


    return x, y, dropout, loss, probs, accuracy, conv1, conv2


In [11]:
# Evaluate whole dataset

def eval_model(sess, x, y, dropout, ims, labels, ims_test, labels_test, ims_mean, iter_per_epoch, test_iter):
    Train_Loss = 0
    Test_Loss = 0
    Train_Acc = 0
    Test_Acc = 0
    # Eval training dataset
    for idx in xrange(iter_per_epoch):
        batch_xs = ims[order_list[idx*batch_size:(idx+1)*batch_size]] - ims_mean
        batch_ys = labels[order_list[idx*batch_size:(idx+1)*batch_size]]
        C, A = sess.run([cost, accuracy], feed_dict={x: batch_xs, y: batch_ys, dropout: 0.0})
        Train_Loss += C/batch_size   # calculate the loss in average (per image).
        Train_Acc += A
    # Eval testing dataset
    for idx in xrange(test_iter):
        batch_xs = ims_test[order_list[idx*test_batch_size:(idx+1)*test_batch_size]] - ims_mean
        batch_ys = labels_test[order_list[idx*test_batch_size:(idx+1)*test_batch_size]]
        C, A = sess.run([cost, accuracy], feed_dict={x: batch_xs, y: batch_ys, dropout: 0.0})
        Test_Loss += C/test_batch_size
        Test_Acc += A
    return Train_Loss, Train_Acc, Test_Loss, Test_Acc

# Main function



In [43]:
# loading training and testing data
ims, labels, ims_mean = load_mnist_data('train', data_path='MNIST_data')
#ims_mean = np.zeros((28*28))
ims_test, labels_test, _ = load_mnist_data('test', data_path='MNIST_data')

order_list = range(len(ims))

# parameters related to mnist dataset 
test_iter = len(ims_test)/test_batch_size # number of testing-minibatch.

iter_per_epoch = len(ims)/batch_size      # number of training-minibatch.

# Launch the graph
with tf.Session() as sess:
#with tf.device("/gpu:0"):
#    config = tf.ConfigProto(allow_soft_placement = True)
#    sess = tf.Session(config = config)

    # Keep training until reach max iterations
    x, y, dropout, cost, pred, accuracy, conv1, conv2= lenet()
    train_loss = cost/batch_size # loss per image
    #optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,momentum=0.9).minimize(train_loss)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(train_loss)



    # initialize all variables
    init = tf.global_variables_initializer()
    sess.run(init)
    Loss_plt = {'x':[], 'train_y':[], 'test_y':[]}
    Acc_plt  = {'x':[], 'train_y':[], 'test_y':[]} 
    # Before Training (Random initialization), Evaluate the model one-time.
    begin = time.time()
    Train_Loss, Train_Acc, Test_Loss, Test_Acc=eval_model(sess, x, y, dropout, ims, labels, ims_test, labels_test, ims_mean, iter_per_epoch, test_iter)
    print "------After Random Initialization------"
    print "Training: loss=%f, acc=%f.\t\tTesting: loss=%f, acc=%f" %(Train_Loss/iter_per_epoch, Train_Acc/iter_per_epoch,
                                                                     Test_Loss/test_iter, Test_Acc/test_iter)
    
    epoch = 0
    step = 1      
    
    Loss_plt['x'].append(float(step)/iter_per_epoch)
    Loss_plt['train_y'].append(Train_Loss/iter_per_epoch)
    Loss_plt['test_y'].append(Test_Loss/test_iter)
    Acc_plt['x'].append(float(step)/iter_per_epoch)
    Acc_plt['train_y'].append(Train_Acc/iter_per_epoch)
    Acc_plt['test_y'].append(Test_Acc/test_iter)
    
    duration = time.time()-begin
    print " %f seconds"%(duration)
    
    print "------Start Training------"

    for epoch in xrange(training_epochs):
        begin = time.time()
        Train_Loss = 0
        Test_Loss = 0
        Train_Acc = 0
        Test_Acc = 0
        for idx in xrange(iter_per_epoch):
            batch_xs = ims[order_list[idx*batch_size:(idx+1)*batch_size]] - ims_mean
            batch_ys = labels[order_list[idx*batch_size:(idx+1)*batch_size]]
            # Run optimization op (backprop)
            #print batch_ys.shape
            sess.run([optimizer], feed_dict={x: batch_xs, y: batch_ys, dropout: 0.5})
            if step % display_step == 0:
                Train_Loss, Train_Acc, Test_Loss, Test_Acc=eval_model(sess, x, y, dropout, ims, labels, ims_test, labels_test, ims_mean, iter_per_epoch, test_iter)
                print "Epoch %.2f, Training: loss=%f, acc=%f.\t\tTesting: loss=%f, acc=%f"%(float(step)/iter_per_epoch, Train_Loss/iter_per_epoch, Train_Acc/iter_per_epoch, 
                                                                                         Test_Loss/test_iter, Test_Acc/test_iter)
                Loss_plt['x'].append(float(step)/iter_per_epoch)
                Loss_plt['train_y'].append(Train_Loss/iter_per_epoch)
                Loss_plt['test_y'].append(Test_Loss/test_iter)
                Acc_plt['x'].append(float(step)/iter_per_epoch)
                Acc_plt['train_y'].append(Train_Acc/iter_per_epoch)
                Acc_plt['test_y'].append(Test_Acc/test_iter)
            step += 1
        
        # Evaluate after each epoch finished.
        Train_Loss, Train_Acc, Test_Loss, Test_Acc=eval_model(sess, x, y, dropout, ims, labels, ims_test, labels_test, ims_mean, iter_per_epoch, test_iter)
        print "Epoch %d, Training: loss=%f, acc=%f.\t\tTesting: loss=%f, acc=%f"%(epoch+1, Train_Loss/iter_per_epoch, Train_Acc/iter_per_epoch, 
                                                                                         Test_Loss/test_iter, Test_Acc/test_iter)
        Loss_plt['x'].append(float(step)/iter_per_epoch)
        Loss_plt['train_y'].append(Train_Loss/iter_per_epoch)
        Loss_plt['test_y'].append(Test_Loss/test_iter)
        Acc_plt['x'].append(float(step)/iter_per_epoch)
        Acc_plt['train_y'].append(Train_Acc/iter_per_epoch)
        Acc_plt['test_y'].append(Test_Acc/test_iter)

        duration = time.time()-begin
        print "Cost %f seconds"%(duration)
        
        # Save results to npz, change the filename by yourself.
        np.savez('npzfiles/outfilename', loss=Loss_plt, acc=Acc_plt)
        
        # Visualiztion
        Vis=False
        # Vis
        if Vis == True:
            index = np.random.randint(len(ims))
            batch_ys = np.zeros((1))
            batch_ys[0] = labels[index]
            C1, C2 = sess.run([conv1, conv2], feed_dict={x: ims[index][np.newaxis,:], y:batch_ys, dropout: 0.0})
            print C1.shape, C2.shape
            for i in xrange(32):
                plt.subplot(10,10,i+1),plt.imshow(C1[0,:,:,i],cmap='Greys_r'),plt.title('CONV1')
            for i in xrange(64):
                plt.subplot(10,10,100-i),plt.imshow(C2[0,:,:,i],cmap='Greys_r'),plt.title('CONV2')
            plt.show()                                 
    print("Optimization Finished!")
    

------After Random Initialization------
Training: loss=0.042481, acc=0.099253.		Testing: loss=0.027142, acc=0.103200
 2.440453 seconds
------Start Training------
Epoch 0.05, Training: loss=0.036073, acc=0.112377.		Testing: loss=0.023081, acc=0.113500
Epoch 0.11, Training: loss=0.035291, acc=0.140675.		Testing: loss=0.022583, acc=0.148400
Epoch 0.16, Training: loss=0.014542, acc=0.683398.		Testing: loss=0.009204, acc=0.686500
Epoch 0.21, Training: loss=0.007848, acc=0.842266.		Testing: loss=0.004826, acc=0.851800
Epoch 0.27, Training: loss=0.005334, acc=0.892326.		Testing: loss=0.003160, acc=0.901400
Epoch 0.32, Training: loss=0.003923, acc=0.925494.		Testing: loss=0.002321, acc=0.928000
Epoch 0.37, Training: loss=0.003527, acc=0.929763.		Testing: loss=0.002045, acc=0.934000
Epoch 0.43, Training: loss=0.003007, acc=0.940068.		Testing: loss=0.001760, acc=0.942900
Epoch 0.48, Training: loss=0.003110, acc=0.941152.		Testing: loss=0.001801, acc=0.943700
Epoch 0.53, Training: loss=0.002817, 

In [12]:
# plot the figure to compare different results.
# loading npzfile (saved by main function)
# Usage : plot_fcn([npz1,npz2,npz3], tag='train'/'test')
# Ex.
# types=['batchsize5', 'batchsize50', 'batchsize500']
# plot_fcn(types, tag='train')  
def plot_fcn(types, tag='train'):
    npzfiles=[]
    colors = ['b-', 'r-', 'k-', 'g-', 'y-', 'c-', 'm-']
    if len(types) > len(colors):
        print "only accept %d types"%(len(color))
        return 
    
    for i in xrange(len(types)):
        npz = np.load('npzfiles/'+types[i]+'.npz')
        npzfiles.append(npz)
    print npzfiles
    plt.figure(1)
    plt.subplot(211)
    plt.title('Loss.')
    plt.xlabel('Epochs')
    plt.axis([0, training_epochs, 0, 5.0])
    for i in xrange(len(npzfiles)):
        plt.plot(npzfiles[i]['loss'].item()['x'], npzfiles[i]['loss'].item()[tag+'_y'], colors[i], label=types[i])
    plt.legend(loc='upper left')
    plt.subplot(212)
    plt.title('Accuracy.')
    plt.xlabel('Epochs')
    plt.ylabel('(%)')
    plt.axis([0, training_epochs, 0, 1.0])
    for i in xrange(len(npzfiles)):
        plt.plot(npzfiles[i]['acc'].item()['x'], npzfiles[i]['acc'].item()[tag+'_y'], colors[i], label=types[i])
    plt.show()

In [34]:
# See results
types=['batchsize5', 'batchsize50', 'batchsize500']
plot_fcn(types, tag='train')

[<numpy.lib.npyio.NpzFile object at 0x7f28699eb790>, <numpy.lib.npyio.NpzFile object at 0x7f28699eb650>, <numpy.lib.npyio.NpzFile object at 0x7f28699eb690>]


# Possible Results 


![](imgs/train_test.png)

# Tensorflow python API

### tf.nn.conv2d(input, filter, strides, padding, use_cudnn_on_gpu=None, data_format=None, name=None)

Computes a 2-D convolution given 4-D input and filter tensors.

Given an **input tensor of shape [batch, in_height, in_width, in_channels]** and a **filter / kernel tensor of shape [filter_height, filter_width, in_channels, out_channels]**, this op performs the following:

Flattens the filter to a 2-D matrix with shape [filter_height * filter_width * in_channels, output_channels].

Extracts image patches from the input tensor to form a virtual tensor of shape [batch, out_height, out_width, filter_height * filter_width * in_channels].
For each patch, right-multiplies the filter matrix and the image patch vector.

In detail, with the default NHWC format,

output[b, i, j, k] = sum_{di, dj, q} input[b, strides[1] * i + di, strides[2] * j + dj, q] * filter[di, dj, q, k]


Must have strides[0] = strides[3] = 1. For the most common case of the same horizontal and vertices strides, strides = [1, stride, stride, 1].

**Args:**

> input: A Tensor. Must be one of the following types: half, float32, float64.

> filter: A Tensor. Must have the same type as input.

> strides: A list of ints. 1-D of length 4. The stride of the sliding window for each dimension of input. Must be in the same order as the dimension specified with format.

> padding: A string from: "SAME", "VALID". The type of padding algorithm to use.
use_cudnn_on_gpu: An optional bool. Defaults to True.

> data_format: An optional string from: "NHWC", "NCHW". Defaults to "NHWC". Specify the data format of the input and output data. With the default format "NHWC", the data is stored in the order of: [batch, in_height, in_width,
in_channels]. Alternatively, the format could be "NCHW", the data storage order of: [batch, in_channels, in_height, in_width].

> name: A name for the operation (optional).


**Returns:**

> A Tensor. Has the same type as input.
