original: https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10

In [None]:
import os
import re
import sys
import tarfile

In [None]:
from datetime import datetime
import time

In [None]:
from six.moves import urllib
import tensorflow as tf
import numpy as np

In [None]:
# Process images of this size. Note that this differs from the original CIFAR
# image size of 32 x 32. If one alters this number, then the entire model
# architecture will change and any model would need to be retrained.
IMAGE_SIZE = 24

In [None]:
# Global constants describing the CIFAR-10 data set.
NUM_CLASSES = 10
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 10000

In [None]:
# Constants describing the training process.
INITIAL_LEARNING_RATE = 0.001       # Initial learning rate.

In [None]:
DATA_URL = 'https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz'

## Download and Extract (if not yet)

In [None]:
def maybe_download_and_extract(dest_directory='./', DATA_URL=DATA_URL):
    """Download and extract the tarball from Alex's website."""
    if not os.path.exists(dest_directory):
        os.makedirs(dest_directory)
    filename = DATA_URL.split('/')[-1]
    filepath = os.path.join(dest_directory, filename)
    if not os.path.exists(filepath):
        def _progress(count, block_size, total_size):
            sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename,
                    float(count * block_size) / float(total_size) * 100.0))
            sys.stdout.flush()
        filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress)
        print()
        statinfo = os.stat(filepath)
        print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
    extracted_dir_path = os.path.join(dest_directory, 'cifar-10-batches-bin')
    if not os.path.exists(extracted_dir_path):
        tarfile.open(filepath, 'r:gz').extractall(dest_directory)

In [None]:
maybe_download_and_extract()

## Input Data (Definition)

In [None]:
def read_cifar10(filename_queue):
    """Reads and parses examples from CIFAR10 data files."""

    class CIFAR10Record(object):
        pass
    result = CIFAR10Record()

    # Dimensions of the images in the CIFAR-10 dataset.
    # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the
    # input format.
    label_size = 1  # 2 for CIFAR-100
    image_height = 32
    image_width = 32
    result.depth = 3
    image_size = image_height * image_width * result.depth
    # Every record consists of a label followed by the image, with a
    # fixed number of bytes for each.
    record_size = label_size + image_size

    # Read a record, getting filenames from the filename_queue.  No
    # header or footer in the CIFAR-10 format, so we leave header_bytes
    # and footer_bytes at their default of 0.
    reader = tf.FixedLengthRecordReader(record_bytes=record_size)
    _, value = reader.read(filename_queue)

    # Convert from a string to a vector of uint8 that is record_bytes long.
    record_bytes = tf.decode_raw(value, tf.uint8)

    # The first bytes represent the label, which we convert from uint8->int32.
    result.label = tf.cast(
            tf.strided_slice(record_bytes, [0], [label_size]), tf.int32)

    # The remaining bytes after the label represent the image, which we reshape
    # from [depth * height * width] to [depth, height, width].
    depth_major = tf.reshape(
            tf.strided_slice(record_bytes, [label_size],
                             [label_size + image_size]),
            [result.depth, image_height, image_width])
    # Convert from [depth, height, width] to [height, width, depth].
    result.uint8image = tf.transpose(depth_major, [1, 2, 0])

    return result

In [None]:
def _generate_image_and_label_batch(image, label, min_queue_examples, batch_size, shuffle):
    """Construct a queued batch of images and labels."""
    # Create a queue that shuffles the examples, and then
    # read 'batch_size' images + labels from the example queue.
    num_preprocess_threads = 16
    if shuffle:
        images, label_batch = tf.train.shuffle_batch(
                [image, label],
                batch_size=batch_size,
                num_threads=num_preprocess_threads,
                capacity=min_queue_examples + 3 * batch_size,
                min_after_dequeue=min_queue_examples)
    else:
        images, label_batch = tf.train.batch(
                [image, label],
                batch_size=batch_size,
                num_threads=num_preprocess_threads,
                capacity=min_queue_examples + 3 * batch_size)

    # Display the training images in the visualizer.
    tf.summary.image('images', images)

    return images, tf.reshape(label_batch, [batch_size])

In [None]:
def train_inputs(data_dir, batch_size):
    """Construct distorted input for CIFAR training using the Reader ops."""
    filenames = [os.path.join(data_dir, 'data_batch_{0:d}.bin'.format(i))
                 for i in range(1, 6)]

    # Create a queue that produces the filenames to read.
    filename_queue = tf.train.string_input_producer(filenames)

    # Read examples from files in the filename queue.
    read_input = read_cifar10(filename_queue)
    reshaped_image = tf.cast(read_input.uint8image, tf.float32)

    height = IMAGE_SIZE
    width = IMAGE_SIZE

    # Image processing for training the network. Note the many random
    # distortions applied to the image.

    # Randomly crop a [height, width] section of the image.
    distorted_image = tf.random_crop(reshaped_image, [height, width, 3])

    # Randomly flip the image horizontally.
    distorted_image = tf.image.random_flip_left_right(distorted_image)

    # Because these operations are not commutative, consider randomizing
    # the order their operation.
    # NOTE: since per_image_standardization zeros the mean and makes
    # the stddev unit, this likely has no effect see tensorflow#1458.
    distorted_image = tf.image.random_brightness(distorted_image, max_delta=63)
    distorted_image = tf.image.random_contrast(distorted_image, lower=0.2, upper=1.8)

    # Subtract off the mean and divide by the variance of the pixels.
    float_image = tf.image.per_image_standardization(distorted_image)

    # Set the shapes of tensors.
    float_image.set_shape([height, width, 3])
    read_input.label.set_shape([1])

    # Ensure that the random shuffling has good mixing properties.
    min_fraction_of_examples_in_queue = 0.4
    min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN *
                             min_fraction_of_examples_in_queue)
    print ('Filling queue with %d CIFAR images before starting to train. '
                 'This will take a few minutes.' % min_queue_examples)

    # Generate a batch of images and labels by building up a queue of examples.
    return _generate_image_and_label_batch(float_image, read_input.label,
                                           min_queue_examples, batch_size,
                                           shuffle=True)

In [None]:
def test_inputs(data_dir, batch_size, train=False):
    """Construct input for CIFAR evaluation using the Reader ops."""
    if train:
        filenames = [os.path.join(data_dir, 'data_batch_{0:d}.bin'.format(i))
                     for i in range(1, 6)]
        # num_examples_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN
    else:
        filenames = [os.path.join(data_dir, 'test_batch.bin')]
    num_examples_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_EVAL

    # Create a queue that produces the filenames to read.
    filename_queue = tf.train.string_input_producer(filenames)

    # Read examples from files in the filename queue.
    read_input = read_cifar10(filename_queue)
    reshaped_image = tf.cast(read_input.uint8image, tf.float32)

    height = IMAGE_SIZE
    width = IMAGE_SIZE

    # Image processing for evaluation.
    # Crop the central [height, width] of the image.
    resized_image = tf.image.resize_image_with_crop_or_pad(reshaped_image, height, width)

    # Subtract off the mean and divide by the variance of the pixels.
    float_image = tf.image.per_image_standardization(resized_image)

    # Set the shapes of tensors.
    float_image.set_shape([height, width, 3])
    read_input.label.set_shape([1])

    # Ensure that the random shuffling has good mixing properties.
    min_fraction_of_examples_in_queue = 0.4
    min_queue_examples = int(num_examples_per_epoch * min_fraction_of_examples_in_queue)

    # Generate a batch of images and labels by building up a queue of examples.
    return _generate_image_and_label_batch(float_image, read_input.label,
                                           min_queue_examples, batch_size,
                                           shuffle=False)

## For Trainings

In [None]:
def _activation_summary(x):
    """Helper to create summaries for activations."""
    tensor_name = x.op.name
    tf.summary.histogram(tensor_name + '/activations', x)
    tf.summary.scalar(tensor_name + '/sparsity', tf.nn.zero_fraction(x))

In [None]:
def _variable_on_cpu(name, shape, initializer):
    """Helper to create a Variable stored on CPU memory."""
    with tf.device('/cpu:0'):
        var = tf.get_variable(name, shape, initializer=initializer, dtype=tf.float32)
    return var

In [None]:
def _variable_with_stddev(name, shape, stddev, wd=None):
    """Helper to create an initialized Variable with truncated-normal initializer."""
    initializer = tf.truncated_normal_initializer(stddev=stddev, dtype=tf.float32)
    var = _variable_on_cpu(name, shape, initializer)
    if wd is not None:
        weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
        tf.add_to_collection('losses', weight_decay)
    return var

In [None]:
def inference(images, batch_size=None):
    """Build the CIFAR-10 model."""
    if batch_size is None:
        batch_size = images.get_shape()[0].value
    # conv1
    with tf.variable_scope('conv1') as scope:
        kernel = _variable_with_stddev('weights', shape=[5, 5, 3, 64], stddev=5e-2)
        conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
        biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
        pre_activation = tf.nn.bias_add(conv, biases)
        conv1 = tf.nn.relu(pre_activation, name=scope.name)
        _activation_summary(conv1)

    # pool1
    pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
                           padding='SAME', name='pool1')
    # norm1
    norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1')

    # conv2
    with tf.variable_scope('conv2') as scope:
        kernel = _variable_with_stddev('weights', shape=[5, 5, 64, 64], stddev=5e-2)
        conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME')
        biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
        pre_activation = tf.nn.bias_add(conv, biases)
        conv2 = tf.nn.relu(pre_activation, name=scope.name)
        _activation_summary(conv2)

    # norm2
    norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2')
    # pool2
    pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1],
                           strides=[1, 2, 2, 1], padding='SAME', name='pool2')

    # local3
    with tf.variable_scope('local3') as scope:
        # Move everything into depth so we can perform a single matrix multiply.
        reshape = tf.reshape(pool2, [batch_size, -1])
        dim = reshape.get_shape()[1].value
        weights = _variable_with_stddev('weights', shape=[dim, 384], stddev=0.04, wd=0.004)
        biases = _variable_on_cpu('biases', [384], tf.constant_initializer(0.1))
        local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)
        _activation_summary(local3)

    # local4
    with tf.variable_scope('local4') as scope:
        weights = _variable_with_stddev('weights', shape=[384, 192], stddev=0.04, wd=0.004)
        biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1))
        local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name=scope.name)
        _activation_summary(local4)

    # linear layer(WX + b),
    # We don't apply softmax here because
    # tf.nn.sparse_softmax_cross_entropy_with_logits accepts the unscaled logits
    # and performs the softmax internally for efficiency.
    with tf.variable_scope('softmax_linear') as scope:
        weights = _variable_with_stddev('weights', [192, NUM_CLASSES], stddev=1/192.0)
        biases = _variable_on_cpu('biases', [NUM_CLASSES], tf.constant_initializer(0.0))
        softmax_linear = tf.add(tf.matmul(local4, weights), biases, name=scope.name)
        _activation_summary(softmax_linear)

    return softmax_linear

In [None]:
def loss(logits, labels):
    """Add L2Loss to all the trainable variables."""
    # Calculate the average cross entropy loss across the batch.
    labels = tf.cast(labels, tf.int64)
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=labels, logits=logits, name='cross_entropy_per_example')
    cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
    tf.add_to_collection('losses', cross_entropy_mean)

    # The total loss is defined as the cross entropy loss plus all of the weight
    # decay terms (L2 loss).
    return tf.add_n(tf.get_collection('losses'), name='total_loss')

In [None]:
def _add_loss_summaries(total_loss):
    """Add summaries for losses in CIFAR-10 model."""
    # Compute the moving average of all individual losses and the total loss.
    loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
    losses = tf.get_collection('losses')
    loss_averages_op = loss_averages.apply(losses + [total_loss])

    # Attach a scalar summary to all individual losses and the total loss; do the
    # same for the averaged version of the losses.
    for l in losses + [total_loss]:
        # Name each loss as '(raw)' and name the moving average version of the loss
        # as the original loss name.
        tf.summary.scalar(l.op.name + '__raw_', l)
        tf.summary.scalar(l.op.name, loss_averages.average(l))

    return loss_averages_op

In [None]:
def train(data_dir, batch_size=128):
    """Train CIFAR-10 model."""
    train_images, train_labels = train_inputs(data_dir, batch_size)
    with tf.variable_scope("logits"), tf.name_scope("train") as scope:
        global_step = tf.get_variable(
            'global_step', [],
            initializer=tf.constant_initializer(0), trainable=False)
        train_logits = inference(train_images, batch_size=batch_size)
        _loss = loss(train_logits, train_labels)
        # Assemble all of the losses for the current tower only.
        losses = tf.get_collection('losses', scope)
        # Calculate the total loss for the current tower.
        total_loss = tf.add_n(losses, name='total_loss')
        # Generate moving averages of all losses and associated summaries.
        loss_averages_op = _add_loss_summaries(total_loss)
        # Compute gradients.
        with tf.control_dependencies([loss_averages_op]):
            # Adam
            opt = tf.train.AdamOptimizer(learning_rate=INITIAL_LEARNING_RATE)
            grads = opt.compute_gradients(total_loss)
        # Apply gradients.
        train_op = opt.apply_gradients(grads, global_step=global_step)

        # Add histograms for trainable variables.
        for var in tf.trainable_variables():
            tf.summary.histogram(var.op.name, var)

        # Add histograms for gradients.
        for grad, var in grads:
            if grad is not None:
                tf.summary.histogram(var.op.name + '/gradients', grad)

        return train_op, train_logits, total_loss

In [None]:
def validate(data_dir, batch_size=128, train=True):
    """Validate CIFAR-10 model."""
    test_images, test_labels = test_inputs(data_dir, batch_size, train=train)
    with tf.variable_scope("logits", reuse=True), tf.name_scope("test") as scope:
        test_logits = inference(test_images, batch_size=batch_size)
    test_op = tf.nn.in_top_k(test_logits, test_labels, 1)

    return test_op, test_logits

In [None]:
def evaluate(sess, test_op, batch_size):
    num_iter = np.ceil(NUM_EXAMPLES_PER_EPOCH_FOR_EVAL / batch_size)
    true_count = 0  # Counts the number of correct predictions.
    total_sample_count = num_iter * batch_size
    for step in range(int(num_iter)):
        _predictions = sess.run([test_op])
        true_count += np.sum(_predictions)

    precision = true_count / total_sample_count
    # print('%s: precision @ 1 = %.3f' % (datetime.now(), precision))
    return precision

In [None]:
train_dir = './train'
max_steps = 1000

In [None]:
data_dir = os.path.join('./', 'cifar-10-batches-bin')
batch_size = 128

In [None]:
# Build inference Graph for Training / Test / Evaluation.
with tf.Graph().as_default():
    train_op, train_logits, total_loss = train(data_dir, batch_size=batch_size)
    # Retain the summaries and Build the summary operation.
    summaries = tf.get_collection(tf.GraphKeys.SUMMARIES)
    train_summary_op = tf.summary.merge(summaries)
    # Build an initialization operation to run below.
    init = tf.global_variables_initializer()
    
    test_op, test_logits = validate(data_dir, batch_size=1000, train=True)
    
    eval_op, eval_logits = validate(data_dir, batch_size=1000, train=False)

    # Create a saver.
    saver = tf.train.Saver(tf.global_variables())

In [None]:
checkpoint_path = os.path.join(train_dir, 'model.ckpt')

In [None]:
# sess = tf.Session()
sess = tf.InteractiveSession(graph=train_op.graph)
sess.run(init)

In [None]:
# Start the queue runners.
tf.train.start_queue_runners(sess=sess)

In [None]:
summary_dir = './summary'
summary_train_dir = os.path.join(summary_dir, 'train')
summary_writer = tf.summary.FileWriter(summary_train_dir, sess.graph)

In [None]:
summary_eval_dir = os.path.join(summary_dir, 'eval')
eval_summary_writer = tf.summary.FileWriter(summary_eval_dir, sess.graph)

In [None]:
for step in range(max_steps):
    start_time = time.time()
    _, loss_value = sess.run([train_op, total_loss])
    duration = time.time() - start_time
    assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

    if step % 10 == 0:
        num_examples_per_step = batch_size
        examples_per_sec = num_examples_per_step / duration
        sec_per_batch = duration

        format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                                    'sec/batch)')
        print (format_str % (datetime.now(), step, loss_value,
                                                 examples_per_sec, sec_per_batch))

    if step % 100 == 0:
        test_prec = evaluate(sess, test_op, 1000)
        print('%s: accuracy (train) = %.3f' % (datetime.now(), test_prec))

        eval_prec = evaluate(sess, eval_op, 1000)
        print('%s: accuracy (eval) = %.3f' % (datetime.now(), eval_prec))

        summary_str = sess.run(train_summary_op)
        summary = tf.Summary()
        summary.ParseFromString(summary_str)
        summary.value.add(tag='Accuracy', simple_value=test_prec)
        summary_writer.add_summary(summary, step)
        eval_summary = tf.Summary()
        eval_summary.value.add(tag='Accuracy', simple_value=eval_prec)
        eval_summary_writer.add_summary(eval_summary, step)

    # Save the model checkpoint periodically.
    if step % 100 == 0 or (step + 1) == max_steps:
        saver.save(sess, checkpoint_path, global_step=step)


In [None]:
summary_writer.close()
eval_summary_writer.close()