# Simple LeNet-5 convolutional MNIST model example

In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import gzip
import os
import sys
import time

import numpy
from six.moves import urllib
from six.moves import xrange
import tensorflow as tf

SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/'
WORK_DIRECTORY = 'data'
IMAGE_SIZE = 28
NUM_CHANNELS = 1
PIXEL_DEPTH = 255
NUM_LABELS = 10
VALIDATION_SIZE = 5000
SEED = 66478 # set to None for random seed
BATCH_SIZE = 64
NUM_EPOCHS = 10
EVAL_BATCH_SIZE = 64
EVAL_FREQUENCY = 100

#tf.app.flags.DEFINE_boolean("self_test", False, "True if running a self test.")
FLAGS = tf.app.flags.FLAGS

## Download the MNIST dataset

In [2]:

def maybe_download(filename):
  """Download the data from Yann's website, unless it's already here."""
  if not tf.gfile.Exists(WORK_DIRECTORY):
    tf.gfile.MakeDirs(WORK_DIRECTORY)
  filepath = os.path.join(WORK_DIRECTORY, filename)
  if not tf.gfile.Exists(filepath):
    filepath, _ = urllib.request.urlretrieve(SOURCE_URL + filename, filepath)
    with tf.gfile.GFile(filepath) as f:
      size = f.Size()
    print('Successfully downloaded', filename, size, 'bytes.')
  return filepath


In [3]:

train_data_filename = maybe_download('train-images-idx3-ubyte.gz')
train_labels_filename = maybe_download('train-labels-idx1-ubyte.gz')
test_data_filename = maybe_download('t10k-images-idx3-ubyte.gz')
test_labels_filename = maybe_download('t10k-labels-idx1-ubyte.gz')


## Extract data

In [4]:
def extract_data(filename, num_images):
  """Extract the images into a 4D tensor [image index, y, x, channels].
  Values are rescaled from [0, 255] down to [-0.5, 0.5].
  """
  print('Extracting', filename)
  with gzip.open(filename) as bytestream:
    bytestream.read(16)
    buf = bytestream.read(IMAGE_SIZE * IMAGE_SIZE * num_images)
    data = numpy.frombuffer(buf, dtype=numpy.uint8).astype(numpy.float32)
    data = (data - (PIXEL_DEPTH / 2.0)) / PIXEL_DEPTH
    data = data.reshape(num_images, IMAGE_SIZE, IMAGE_SIZE, 1)
    return data

In [5]:
def extract_labels(filename, num_images):
  """Extract the labels into a vector of int64 label IDs."""
  print('Extracting', filename)
  with gzip.open(filename) as bytestream:
    bytestream.read(8)
    buf = bytestream.read(1 * num_images)
    labels = numpy.frombuffer(buf, dtype=numpy.uint8).astype(numpy.int64)
  return labels

In [6]:
def dense_to_one_hot(labels_dense, num_classes):
    num_labels = labels_dense.shape[0]
    index_offset = numpy.arange(num_labels)*num_classes
    labels_one_hot = numpy.zeros((num_labels, num_classes))
    labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
    return labels_one_hot

In [7]:
train_data = extract_data(train_data_filename, 60000)
train_labels = extract_labels(train_labels_filename, 60000)
#train_labels = dense_to_one_hot(train_labels, NUM_LABELS)
test_data = extract_data(test_data_filename, 10000)
test_labels = extract_labels(test_labels_filename, 10000)
#test_labels = dense_to_one_hot(test_labels, NUM_LABELS)

Extracting data/train-images-idx3-ubyte.gz
Extracting data/train-labels-idx1-ubyte.gz
Extracting data/t10k-images-idx3-ubyte.gz
Extracting data/t10k-labels-idx1-ubyte.gz


## Generate training and validation set

In [8]:
validation_data = train_data[:VALIDATION_SIZE, ...]
validation_labels = train_labels[:VALIDATION_SIZE]
train_data = train_data[VALIDATION_SIZE:, ...]
train_labels = train_labels[VALIDATION_SIZE:]
train_labels = dense_to_one_hot(train_labels, NUM_LABELS)
num_epochs = NUM_EPOCHS
train_size = train_labels.shape[0]

## Define Place holders for inputs and outputs

In [9]:
train_data_node = tf.placeholder(tf.float32, shape=(BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))
train_labels_node = tf.placeholder(tf.int64, shape= (BATCH_SIZE,NUM_LABELS))
eval_data = tf.placeholder(tf.float32, shape=(EVAL_BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))

## Define Variables

In [None]:
conv1_weights = tf.Variable(tf.truncated_normal([5,5,NUM_CHANNELS, 32], # 5x5 filter, depth 32
                                               stddev=0.1, seed=SEED))
conv1_biases = tf.Variable(tf.zeros([32]))
conv2_weights = tf.Variable(tf.truncated_normal([5,5,32,64], stddev=0.1, seed=SEED))
conv2_biases = tf.Variable(tf.constant(0.1, shape=[64]))
fc1_weights = tf.Variable(tf.truncated_normal([IMAGE_SIZE // 4 * IMAGE_SIZE // 4 * 64, 512], stddev=0.1, seed=SEED))
fc1_biases = tf.Variable(tf.constant(0.1, shape=[512]))
fc2_weights = tf.Variable(tf.truncated_normal([512, NUM_LABELS], stddev=0.1, seed=SEED))
fc2_biases = tf.Variable(tf.constant(0.1, shape=[NUM_LABELS]))


## Define Model

In [None]:
def model(data, train=False):
    conv = tf.nn.conv2d(data, conv1_weights, strides=[1,1,1,1], padding='SAME')
    relu = tf.nn.relu(tf.nn.bias_add(conv, conv1_biases))
    pool = tf.nn.max_pool(relu, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
    conv = tf.nn.conv2d(pool, conv2_weights, strides=[1,1,1,1], padding='SAME')
    relu = tf.nn.relu(tf.nn.bias_add(conv, conv2_biases))
    pool = tf.nn.max_pool(relu, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
    pool_shape = pool.get_shape().as_list()
    reshape = tf.reshape(pool, [pool_shape[0], pool_shape[1]*pool_shape[2]*pool_shape[3]])
    hidden = tf.nn.relu(tf.matmul(reshape, fc1_weights)+fc1_biases)
    if train:
        hidden = tf.nn.dropout(hidden, 0.5, seed=SEED)
    return tf.matmul(hidden, fc2_weights)+fc2_biases

logits = model(train_data_node, True)
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits, train_labels_node))
#### L2 regularization for fully connected parameters
regularizers = (tf.nn.l2_loss(fc1_weights)+tf.nn.l2_loss(fc1_biases)
                +tf.nn.l2_loss(fc2_weights)+tf.nn.l2_loss(fc2_biases))
loss += 5e-4 * regularizers

## Define Model using TensorFlow-Slim

In [10]:
import tensorflow.contrib.slim as slim
def model_slim(data, train=False):
    if train:
        reuse = None
    else:
        reuse = True
    with slim.arg_scope([slim.conv2d], padding='SAME', activation_fn=tf.nn.relu,
                      weights_initializer=tf.truncated_normal_initializer(stddev=0.01),
                      weights_regularizer=slim.l2_regularizer(0.0005)):
        net = slim.layers.conv2d(data, 32, [5, 5], 1,scope='conv1', reuse=reuse)
        net = slim.layers.max_pool2d(net, [2,2], scope='pool1')
        net = slim.layers.conv2d(net, 64, [5, 5],scope='conv2', reuse=reuse)
        net = slim.layers.max_pool2d(net, [2,2], scope='pool2')
    net = slim.layers.flatten(net, scope='flatten3')
    net = slim.layers.fully_connected(net, 512, scope='fc1', reuse=reuse)
    if train:
        net = tf.nn.dropout(net, 0.5, seed=SEED)
    net = slim.layers.fully_connected(net, NUM_LABELS, activation_fn=None, scope='fc2', reuse=reuse)
    return net

logits = model_slim(train_data_node, True)
loss = slim.losses.softmax_cross_entropy(logits, train_labels_node)

## Optimizer

In [11]:
batch = tf.Variable(0)
learning_rate = tf.train.exponential_decay(0.01, batch*BATCH_SIZE, train_size, 0.95, staircase=True)
optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9).minimize(loss, global_step=batch)

train_prediction = tf.nn.softmax(logits)
eval_prediction = tf.nn.softmax(model_slim(eval_data))

## Evaluating function

In [12]:
def eval_in_batches(data, sess):
    size = data.shape[0]
    if size < EVAL_BATCH_SIZE:
        raise ValueErro('batch size for evals larger than dataset: %d' %size)
    predictions = numpy.ndarray(shape=(size, NUM_LABELS), dtype=numpy.float32)
    for begin in xrange(0, size, EVAL_BATCH_SIZE):
        end = begin + EVAL_BATCH_SIZE
        if end <= size:
            predictions[begin:end,:] = sess.run(eval_prediction, feed_dict={eval_data:data[begin:end,...]})
        else:
            batch_predictions = sess.run(eval_prediction, feed_dict={eval_data:data[-EVAL_BATCH_SIZE:,...]})
    return predictions

### Error rate function

In [13]:
def error_rate(predictions, labels):
    return 100.0 - (100.0 * numpy.sum(numpy.argmax(predictions,1)== labels) / predictions.shape[0])

## Session to run training

In [14]:
start_time = time.time()
with tf.Session() as sess:
    tf.initialize_all_variables().run()
    print('Initialized')
    for step in xrange(int(num_epochs*train_size) // BATCH_SIZE):
        offset = (step * BATCH_SIZE) % (train_size - BATCH_SIZE)
        batch_data = train_data[offset:(offset + BATCH_SIZE),...]
        batch_labels = train_labels[offset:(offset + BATCH_SIZE), ...]
        feed_dict = {train_data_node: batch_data, train_labels_node: batch_labels}
        _, l, lr, predictions = sess.run([optimizer, loss, learning_rate, train_prediction], feed_dict=feed_dict)
        if step % EVAL_FREQUENCY == 0:
            elapsed_time = time.time() - start_time
            start_time = time.time()
            print('Step %d (epoch %.2f), %.1f ms' % (step, float(step) % BATCH_SIZE / train_size, 
                                                     1000 * elapsed_time / EVAL_FREQUENCY))
            print('Minibatch loss: %.3f, learning rate: %.6f' % (l, lr))
            #print('Minibatch error: %.1f%%' % error_rate(predictions, batch_labels))
            print('Validation error: %.1f%%' % error_rate(eval_in_batches(validation_data, sess), validation_labels))
            sys.stdout.flush()
    test_error = error_rate(eval_in_batches(test_data, sess), test_labels)
    print('Test error: %.1f%%' % test_error)     

Initialized
Step 0 (epoch 0.00), 5.8 ms
Minibatch loss: 2.303, learning rate: 0.010000
Validation error: 88.4%
Step 100 (epoch 0.00), 235.4 ms
Minibatch loss: 0.999, learning rate: 0.010000
Validation error: 25.0%
Step 200 (epoch 0.00), 267.5 ms
Minibatch loss: 0.438, learning rate: 0.010000
Validation error: 6.5%
Step 300 (epoch 0.00), 263.0 ms
Minibatch loss: 0.270, learning rate: 0.010000
Validation error: 4.3%
Step 400 (epoch 0.00), 266.9 ms
Minibatch loss: 0.369, learning rate: 0.010000
Validation error: 3.5%
Step 500 (epoch 0.00), 253.8 ms
Minibatch loss: 0.272, learning rate: 0.010000
Validation error: 3.2%
Step 600 (epoch 0.00), 246.1 ms
Minibatch loss: 0.088, learning rate: 0.010000
Validation error: 3.7%
Step 700 (epoch 0.00), 248.9 ms
Minibatch loss: 0.069, learning rate: 0.010000
Validation error: 2.5%
Step 800 (epoch 0.00), 288.0 ms
Minibatch loss: 0.109, learning rate: 0.010000
Validation error: 2.9%
Step 900 (epoch 0.00), 297.8 ms
Minibatch loss: 0.058, learning rate: 0.