In [None]:
"""Tutorial on how to build a convnet w/ modern changes, e.g.
Batch Normalization, Leaky rectifiers, and strided convolution.

Parag K. Mital, Jan 2016.
"""

In [1]:
# %%
%matplotlib notebook
import tensorflow as tf
from libs.batch_norm import batch_norm
from libs.activations import lrelu
from libs.connections import conv2d, linear
from libs.datasets import MNIST


In [2]:
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
config = tf.ConfigProto(gpu_options=gpu_options)

In [3]:
# %% Setup input to the network and true output label.  These are
# simply placeholders which we'll fill in later.
mnist = MNIST()


Extracting ../../mnist/train-images-idx3-ubyte.gz
Extracting ../../mnist/train-labels-idx1-ubyte.gz
Extracting ../../mnist/t10k-images-idx3-ubyte.gz
Extracting ../../mnist/t10k-labels-idx1-ubyte.gz


In [4]:
# %% We add a new type of placeholder to denote when we are training.
# This will be used to change the way we compute the network during
# training/testing.
tf.reset_default_graph()
x = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32, [None, 10])
is_training = tf.placeholder(tf.bool, name='is_training')

In [5]:
# %% We'll convert our MNIST vector data to a 4-D tensor:
# N x W x H x C
x_tensor = tf.reshape(x, [-1, 28, 28, 1])

In [6]:
# %% We'll use a new method called  batch normalization.
# This process attempts to "reduce internal covariate shift"
# which is a fancy way of saying that it will normalize updates for each
# batch using a smoothed version of the batch mean and variance
# The original paper proposes using this before any nonlinearities
h_1 = lrelu(conv2d(x_tensor, 32, name='conv1'), name='lrelu1')
h_2 = lrelu(conv2d(h_1, 64, name='conv2'), name='lrelu2')
h_3 = lrelu(conv2d(h_2, 64, name='conv3'), name='lrelu3')
# h_1 = lrelu(batch_norm(conv2d(x_tensor, 32, name='conv1'),
#                        is_training, scope='bn1'), name='lrelu1')
# h_2 = lrelu(batch_norm(conv2d(h_1, 64, name='conv2'),
#                        is_training, scope='bn2'), name='lrelu2')
# h_3 = lrelu(batch_norm(conv2d(h_2, 64, name='conv3'),
#                        is_training, scope='bn3'), name='lrelu3')
h_3_flat = tf.reshape(h_3, [-1, 64 * 4 * 4])
h_4 = linear(h_3_flat, 10)
y_pred = tf.nn.softmax(h_4)

In [7]:
# %% Define loss/eval/training functions
cross_entropy = -tf.reduce_sum(y * tf.log(y_pred))
train_step = tf.train.AdamOptimizer().minimize(cross_entropy)

correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))

In [8]:
# %% We now create a new session to actually perform the initialization the
# variables:
sess = tf.Session(config=config)
init = tf.global_variables_initializer()
sess.run(init)

In [None]:
# %% We'll train in minibatches and report accuracy:
# results with batch norm
n_epochs = 10
batch_size = 100
for epoch_i in range(n_epochs):
    for batch_i in range(mnist.train.num_examples // batch_size):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        sess.run(train_step, feed_dict={
            x: batch_xs, y: batch_ys, is_training: True})
    print(sess.run(accuracy,
                   feed_dict={
                       x: mnist.validation.images,
                       y: mnist.validation.labels,
                       is_training: False
                   }))

In [9]:
# %% We'll train in minibatches and report accuracy:
# results without batch norm
n_epochs = 10
batch_size = 100
for epoch_i in range(n_epochs):
    for batch_i in range(mnist.train.num_examples // batch_size):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        sess.run(train_step, feed_dict={
            x: batch_xs, y: batch_ys, is_training: True})
    print(sess.run(accuracy,
                   feed_dict={
                       x: mnist.validation.images,
                       y: mnist.validation.labels,
                       is_training: False
                   }))

0.9672
0.9812
0.985
0.984
0.9854
0.9836
0.9876
0.9878
0.9882
0.988


In [None]:
# the performance is slightly better with batch normalization, 
# but without restarting the session everything will be wrong