In [1]:
#
# Dropout Convolutional Neural Network MNIST
#
# @author becxer
# @email becxer87@gmail.com
# @reference https://github.com/sjchoi86/Tensorflow-101
#

import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline  

print ("packages are loaded")

packages are loaded


In [2]:
# Get MNIST data set from tensorflow
from tensorflow.examples.tutorials.mnist import input_data
mnist_set = input_data.read_data_sets('images/MNIST/', one_hot = True)

train_img = mnist_set.train.images
train_label = mnist_set.train.labels
test_img = mnist_set.test.images
test_label = mnist_set.test.labels

Extracting mnist/train-images-idx3-ubyte.gz
Extracting mnist/train-labels-idx1-ubyte.gz
Extracting mnist/t10k-images-idx3-ubyte.gz
Extracting mnist/t10k-labels-idx1-ubyte.gz


In [3]:
# Options for training
learning_rate = 0.001
training_epochs = 100
batch_size = 100
display_step = 10

# Options for Convolution
x_conv_shape =  [-1,28,28,1] # because MNIST data is oneline

n_conv_shapes = [[3,3,1,64],[3,3,64,128]]
n_conv_strides = [[1,1,1,1],[1,1,1,1]]
n_maxp_shapes = [[1,2,2,1],[1,2,2,1]]
n_maxp_strides = [[1,2,2,1],[1,2,2,1]]

# Options for Dense layer
x_dense_size = 7 * 7 * 128
n_dense = [1024, 100]
w_dev = 0.1

# Options for dropout
drop_out_ratio = 0.7

In [4]:
# Build Graph of Convolution Neural Network

# Define placeholder & Variables
x = tf.placeholder("float", [None, train_img.shape[1]])
y = tf.placeholder("float", [None, train_label.shape[1]])
drop_out_prob = tf.placeholder("float")

def one_cnn_layer(_x, _weight_C, _stride_C, _bias_C, _shape_MP, _stride_MP, _dop):
    _conv1 = tf.nn.conv2d(_x, _weight_C, strides=_stride_C, padding='SAME')
    _conv2 = tf.nn.batch_normalization(_conv1, 0.001, 1.0, 0, 1, 0.0001)
    _conv3 = tf.nn.bias_add(_conv2, _bias_C)
    _conv4 = tf.nn.relu(_conv3)
    _pool = tf.nn.max_pool(_conv4, ksize=_shape_MP, strides=_stride_MP, padding='SAME')
    _out = tf.nn.dropout(_pool, _dop)
    return {'conv1':_conv1, 'conv2':_conv2, 'conv3':_conv3, 'conv4':_conv4, 'pool':_pool, 'out':_out}

def one_dense_layer(_x, _W, _b, _dop):
    return tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(_x, _W),_b)), _dop)

WS = {}
BS = {}

last_input_layer = tf.reshape(x, shape = x_conv_shape)
for idx in range(len(n_conv_shapes)):
    _weight_C = tf.Variable(tf.random_normal(n_conv_shapes[idx], stddev=w_dev))
    _stride_C = n_conv_strides[idx]
    _bias_C = tf.Variable(tf.random_normal([n_conv_shapes[idx][-1]], stddev=w_dev))
    _shape_MP = n_maxp_shapes[idx]
    _stride_MP = n_maxp_strides[idx]
    layer = one_cnn_layer(last_input_layer, _weight_C, _stride_C, _bias_C, _shape_MP, _stride_MP, drop_out_prob)
    last_input_layer = layer['out']
    WS['wc_' + str(idx)] = _weight_C
    BS['bc_' + str(idx)] = _bias_C

last_input_layer_size = x_dense_size
last_input_layer = tf.reshape(last_input_layer, [-1, x_dense_size])

for idx, hl_size in enumerate(n_dense):
    _W = tf.Variable(tf.random_normal([last_input_layer_size, hl_size], stddev=w_dev))
    _b = tf.Variable(tf.random_normal([hl_size])) 
    last_input_layer = one_dense_layer(last_input_layer, _W, _b, drop_out_prob)
    last_input_layer_size = hl_size
    WS['wd_' + str(idx)] = _W
    BS['bd_' + str(idx)] = _b

WS['out'] = tf.Variable(tf.random_normal([last_input_layer_size, train_label.shape[1]], stddev=w_dev))
BS['out'] = tf.Variable(tf.random_normal([train_label.shape[1]], stddev=w_dev))

# Define operators
out = one_dense_layer(last_input_layer, WS['out'], BS['out'], 1.0)

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(out, y))
optm = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

pred = tf.argmax(out, 1)
accr = tf.reduce_mean(tf.cast(tf.equal(pred, tf.argmax(y, 1)),"float"))

init = tf.initialize_all_variables()
print ("Graph build")

Graph build


In [5]:
# Training Graph
sess = tf.Session()
sess.run(init)

for epoch in range(training_epochs):
    avg_cost = 0.
    num_batch = int(train_img.shape[0]/batch_size)
    for i in range(num_batch):
        randidx = np.random.randint(train_img.shape[0], size=batch_size)
        batch_xs = train_img[randidx, :]
        batch_ys = train_label[randidx, :]                
            
        sess.run(optm, feed_dict={x: batch_xs, y: batch_ys, drop_out_prob : drop_out_ratio})
        avg_cost += sess.run(cost, feed_dict={x: batch_xs, y: batch_ys, drop_out_prob : 1.})/num_batch
            
    if epoch % display_step == 0:
        train_img_acc = sess.run(accr , ({x: batch_xs, y: batch_ys, drop_out_prob : 1.}))
        print ("epoch: %03d/%03d  ,  cost: %.6f  ,  train_img_acc: %.3f" \
               % (epoch, training_epochs, avg_cost, train_img_acc))

avg_acc = 0.
num_batch_test = int(test_img.shape[0]/batch_size)
for i in range(num_batch_test):
    batch_xs_test = test_img[i * batch_size : (i+1) * batch_size ]
    batch_ys_test = test_label[i * batch_size : (i+1) * batch_size ]            
    avg_acc += sess.run(accr, feed_dict={x : batch_xs_test, y : batch_ys_test, drop_out_prob : 1.})
    
print ("Training complete, Accuracy : %.6f" \
       % (avg_acc / num_batch_test,))


epoch: 000/100  ,  cost: 0.871719  ,  train_img_acc: 1.000
epoch: 010/100  ,  cost: 0.008190  ,  train_img_acc: 1.000
epoch: 020/100  ,  cost: 0.002319  ,  train_img_acc: 1.000
epoch: 030/100  ,  cost: 0.001057  ,  train_img_acc: 1.000
epoch: 040/100  ,  cost: 0.000615  ,  train_img_acc: 1.000
epoch: 050/100  ,  cost: 0.000132  ,  train_img_acc: 1.000
epoch: 060/100  ,  cost: 0.000197  ,  train_img_acc: 1.000
epoch: 070/100  ,  cost: 0.000100  ,  train_img_acc: 1.000
epoch: 080/100  ,  cost: 0.000108  ,  train_img_acc: 1.000
epoch: 090/100  ,  cost: 0.000136  ,  train_img_acc: 1.000
Training complete, Accuracy : 0.992000
