In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import time
import read_data
import read_data_val
import model_cnn
import tensorflow as tf

from os.path import join
import tensorflow as tf

# Constants used for dealing with the files, matches convert_to_records.
TRAIN_FILE = 'train.tfrecords'
VALIDATION_FILE = 'validation.tfrecords'
TEST_FILE = 'test.tfrecords'
#DATA_DIR = 'data/'                     # Local CPU
DATA_DIR = '/data1/ankur/CatVsDog/'      # Berkeley GPU
NUM_CLASSES = 2
IMG_HEIGHT = 128
IMG_WIDTH = 128
IMG_CHANNELS = 3
IMG_PIXELS = IMG_HEIGHT * IMG_WIDTH * IMG_CHANNELS


def dense_to_one_hot(labels_dense, num_classes):
    """Convert class labels from scalars to one-hot vectors."""
    num_labels = labels_dense.shape[0]
    index_offset = np.arange(num_labels) * num_classes
    labels_one_hot = np.zeros((num_labels, num_classes))
    labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
    return labels_one_hot


def read_and_decode(filename_queue):
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    features = tf.parse_single_example(
        serialized_example,
        # Defaults are not specified since both keys are required.
        features={
            'image_raw': tf.FixedLenFeature([], tf.string),
            'label': tf.FixedLenFeature([], tf.int64),
            'height': tf.FixedLenFeature([], tf.int64),
            'width': tf.FixedLenFeature([], tf.int64),
            'depth': tf.FixedLenFeature([], tf.int64)
        })

    image = tf.decode_raw(features['image_raw'], tf.uint8)
    img_height = tf.cast(features['height'], tf.int32)
    img_width = tf.cast(features['width'], tf.int32)
    img_depth = tf.cast(features['depth'], tf.int32)
    # Convert label from a scalar uint8 tensor to an int32 scalar.
    label = tf.cast(features['label'], tf.int32)

    image.set_shape([IMG_PIXELS])
    image = tf.reshape(image, [IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS])

    # Convert from [0, 255] -> [-0.5, 0.5] floats.
    image = tf.cast(image, tf.float32) * (1. / 255) - 0.5

    return image, label


def inputs(data_set, batch_size, num_epochs):
    """Reads input data num_epochs times.
    Args:
    train: Selects between the train , validation and test data.
    batch_size: Number of examples per returned batch.
    num_epochs: Number of times to read the input data, or 0/None to
       train forever.
    Returns:
    A tuple (images, labels), where:
    * images is a float tensor with shape [batch_size, mnist.IMAGE_PIXELS]
      in the range [-0.5, 0.5].
    * labels is an int32 tensor with shape [batch_size] with the true label,
      a number in the range [0, mnist.NUM_CLASSES).
    Note that an tf.train.QueueRunner is added to the graph, which
    must be run using e.g. tf.train.start_queue_runners().
    """
    if not num_epochs:
        num_epochs = None
    if data_set == 'train':
        file = TRAIN_FILE
    elif data_set == 'validation':
        file = VALIDATION_FILE
    elif data_set == 'test':
        file = TEST_FILE
    else:
        raise ValueError('data_set should be one of \'train\', \'validation\' or \'test\'')
    filename = join(DATA_DIR, file)

    with tf.name_scope('input'):
        filename_queue = tf.train.string_input_producer(
            [filename], num_epochs=num_epochs)

    # Even when reading in multiple threads, share the filename
    # queue.
    image, label = read_and_decode(filename_queue)

    # Shuffle the examples and collect them into batch_size batches.
    # (Internally uses a RandomShuffleQueue.)
    # We run this in two threads to avoid being a bottleneck.
    images, sparse_labels = tf.train.shuffle_batch(
        [image, label], batch_size=batch_size, num_threads=2,
        capacity=1000 + 3 * batch_size,
        # Ensures a minimum amount of shuffling of examples.
        min_after_dequeue=1000)

    return images, sparse_labels


In [2]:
BATCH_SIZE = 100
NUM_EPOCHS = 100
LEARNING_RATE = 1e-3
DROP_PROB = 0.5
DATA_DIR = 'data/'                     # Local CPU
#DATA_DIR = '/data1/ankur/CatVsDog/'      # Berkeley GPU

NUM_ITER = 10000

def _variable_with_weight_decay(name, shape, stddev, wd):
    var = tf.Variable(tf.truncated_normal(shape=shape, stddev=stddev, name=name))
    if wd is not None:
        weight_decay = tf.mul(tf.nn.l2_loss(var), wd, name='reg_loss')
        tf.add_to_collection('losses', weight_decay)
    return var


def inference(images):
    # conv 1
    with tf.variable_scope('conv1') as scope:
        weights = _variable_with_weight_decay('weights', shape=[5, 5, 3, 32], stddev=1/np.sqrt(5*5*3), wd=0.00)
        biases = tf.Variable(tf.constant(0.0, shape=[32]))
        conv = tf.nn.conv2d(images, weights, [1, 1, 1, 1], padding='SAME')
        bias = tf.nn.bias_add(conv, biases)
        conv1 = tf.nn.relu(bias, name=scope.name)

    # conv 2
    with tf.variable_scope('conv2') as scope:
        weights = _variable_with_weight_decay('weights', shape=[5, 5, 32, 64], stddev=1/np.sqrt(5*5*32), wd=0.00)
        biases = tf.Variable(tf.constant(0.0, shape=[64]))
        conv = tf.nn.conv2d(conv1, weights, [1, 1, 1, 1], padding='SAME')
        bias = tf.nn.bias_add(conv, biases)
        conv2 = tf.nn.relu(bias, name=scope.name)

    # pool 1
    with tf.variable_scope('pool1') as scope:
        pool1 = tf.nn.max_pool(conv2, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')

    # conv 3
    with tf.variable_scope('conv3') as scope:
        weights = _variable_with_weight_decay('weights', shape=[3, 3, 64, 64], stddev=1/np.sqrt(3*3*64), wd=0.00)
        biases = tf.Variable(tf.constant(0.0, shape=[64]))
        conv = tf.nn.conv2d(pool1, weights, [1, 1, 1, 1], padding='SAME')
        bias = tf.nn.bias_add(conv, biases)
        conv3 = tf.nn.relu(bias, name=scope.name)

    # conv 4
    with tf.variable_scope('conv4') as scope:
        weights = _variable_with_weight_decay('weights', shape=[3, 3, 64, 64], stddev=1/np.sqrt(3*3*64), wd=0.00)
        biases = tf.Variable(tf.constant(0.0, shape=[64]))
        conv = tf.nn.conv2d(conv3, weights, [1, 1, 1, 1], padding='SAME')
        bias = tf.nn.bias_add(conv, biases)
        conv4 = tf.nn.relu(bias, name=scope.name)

    # pool 2
    with tf.variable_scope('pool2') as scope:
        pool2 = tf.nn.max_pool(conv4, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')

    # conv 5
    with tf.variable_scope('conv5') as scope:
        weights = _variable_with_weight_decay('weights', shape=[3, 3, 64, 64], stddev=1/np.sqrt(3*3*64), wd=0.00)
        biases = tf.Variable(tf.constant(0.0, shape=[64]))
        conv = tf.nn.conv2d(pool2, weights, [1, 1, 1, 1], padding='SAME')
        bias = tf.nn.bias_add(conv, biases)
        conv5 = tf.nn.relu(bias, name=scope.name)

    # conv 6
    with tf.variable_scope('conv6') as scope:
        weights = _variable_with_weight_decay('weights', shape=[3, 3, 64, 64], stddev=1/np.sqrt(3*3*64), wd=0.00)
        biases = tf.Variable(tf.constant(0.0, shape=[64]))
        conv = tf.nn.conv2d(conv5, weights, [1, 1, 1, 1], padding='SAME')
        bias = tf.nn.bias_add(conv, biases)
        conv6 = tf.nn.relu(bias, name=scope.name)

    # pool 3
    with tf.variable_scope('pool3') as scope:
        pool3 = tf.nn.max_pool(conv6, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')


    # fully connected 1
    with tf.variable_scope('fc1') as scope:
        batch_size = images.get_shape()[0].value
        pool3_flat = tf.reshape(pool3, [batch_size, -1])
        dim = pool3_flat.get_shape()[1].value
        weights = _variable_with_weight_decay('weights', shape=[dim, 384], stddev=1/np.sqrt(dim), wd=REG_STRENGTH)
        biases = tf.Variable(tf.constant(0.0, shape=[384]))
        fc1 = tf.nn.relu(tf.matmul(pool3_flat, weights) + biases, name=scope.name)

    # fully connected 2
    with tf.variable_scope('fc2') as scope:
        weights = _variable_with_weight_decay('weights', shape=[384, 192], stddev=1/np.sqrt(384), wd=REG_STRENGTH)
        biases = tf.Variable(tf.constant(0.0, shape=[192]))
        fc2 = tf.nn.relu(tf.matmul(fc1, weights) + biases, name=scope.name)

    # dropout
        fc2_drop = tf.nn.dropout(fc2, DROP_PROB)

    # Softmax
    with tf.variable_scope('softmax_linear') as scope:
        weights = _variable_with_weight_decay('weights', shape=[192, NUM_CLASSES], stddev=1/np.sqrt(192), wd=0.000)
        biases = tf.Variable(tf.constant(0.0, shape=[NUM_CLASSES]))
        # softmax_linear = tf.nn.softmax(tf.matmul(fc2_drop, weights) + biases, name=scope.name)#<--BLUNDER!
        logits = tf.add(tf.matmul(fc2_drop, weights), biases, name=scope.name)

    return logits


def lossfn(logits, labels):
    labels = tf.cast(labels, tf.int64)
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels, name='xentropy')
    data_loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
    tf.add_to_collection('losses', data_loss)
    total_loss = tf.add_n(tf.get_collection('losses'), name='total_loss')
    return total_loss


def training(total_loss, learning_rate):

    optimizer = tf.train.AdamOptimizer(learning_rate)

    global_step = tf.Variable(0, name='global_step', trainable=False)

    train_op = optimizer.minimize(total_loss, global_step=global_step)

    return train_op


def evaluation(logits, true_labels):
    correct_pred = tf.nn.in_top_k(logits, true_labels, 1)
    return tf.reduce_sum(tf.cast(correct_pred, tf.int32))

In [3]:
def run_training():
    with tf.Graph().as_default():
        images, labels = inputs(data_set='train', batch_size=BATCH_SIZE, num_epochs=NUM_EPOCHS)

        logits = inference(images)

        loss = lossfn(logits, labels)

        train_op = training(loss, learning_rate=LEARNING_RATE)

        train_accuracy = evaluation(logits, labels)

        # Don't specify number of epochs in validation set, otherwise that limits the training duration as the
        # validation set is 10 times smaller than the training set
        val_images, val_labels = inputs(data_set='validation', batch_size=BATCH_SIZE, num_epochs=None)
        val_logits = inference(val_images)
        val_accuracy = evaluation(val_logits, val_labels)

        init_op = tf.initialize_all_variables()

        sess = tf.Session()

        sess.run(init_op)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        try:
            step = 0
            while not coord.should_stop():
                start_time = time.time()

                _, loss_value, train_acc_val, valid_acc_val = sess.run([train_op, loss, train_accuracy, val_accuracy])

                duration = time.time() - start_time
                assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

                if step % 1 == 0:
                    print('Step %d : loss = %.5f , training accuracy = %.1f, validation accuracy = %.1f (%.3f sec)'
                          % (step, loss_value, train_acc_val, valid_acc_val, duration))
                step += 1
        except tf.errors.OutOfRangeError:
            print('Done training for %d epochs, %d steps' % (NUM_EPOCHS, step))
        finally:
            coord.request_stop()

        coord.join(threads)
        sess.close()



In [None]:
#inference(1)
run_training()

Step 0 : loss = 1.93766 , training accuracy = 100.0, validation accuracy = 100.0 (44.571 sec)
Step 1 : loss = 1.72173 , training accuracy = 48.0, validation accuracy = 100.0 (39.994 sec)
