In [2]:
import tensorflow as tf
import numpy as np
from scipy.misc import imread, imresize, imsave
import scipy.io
from scipy.io import savemat, loadmat
from os import listdir, walk
from os.path import isfile, join
#import PIL
import matplotlib.pyplot as plt
import time

NUM_CLASSES = 2
IMG_HEIGHT = 64
IMG_WIDTH = 64
IMG_CHANNELS = 3
IMG_PIXELS = IMG_HEIGHT * IMG_WIDTH * IMG_CHANNELS

start_time = time.time()

trainPath = '../data/train/'
testPath = '../data/test'

imgClasses = ['dog', 'cat']
numClasses = len(imgClasses)
numTrain = len(next(walk(trainPath))[2])

def readData(path):
    fileNames = next(walk(trainPath))[2]
    numTrain = len(fileNames)
    images = np.zeros((numTrain, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.float32)
    labels = np.zeros((numTrain), dtype=np.int32)
    for i, fileName in enumerate(fileNames):
        img = imread(join(trainPath, fileName))
        img = imresize(img, (IMG_HEIGHT, IMG_WIDTH))
        images[i, :, :, :] = img

        labels[i] = 0*(fileName[0:3] == imgClasses[0]) + 1*(fileName[0:3] == imgClasses[1])

    images = (images) * (1. / 255) - 0.5
    return images, labels

train_images, train_labels = readData(trainPath)
test_images, test_labels = readData(testPath)

print('Time taken to load data = ', time.time()-start_time)

Time taken to load data =  225.58342790603638


In [12]:
BATCH_SIZE = 100
NUM_EPOCHS = 100
LEARNING_RATE = 1e-3
REG_STRENGTH = 0.001
DROP_PROB = 0.5
DATA_DIR = 'data/'                     # Local CPU
#DATA_DIR = '/data1/ankur/CatVsDog/'      # Berkeley GPU

NUM_ITER = 10000

def _variable_with_weight_decay(name, shape, stddev, wd):
    var = tf.Variable(tf.truncated_normal(shape=shape, stddev=stddev, name=name))
    if wd is not None:
        weight_decay = tf.mul(tf.nn.l2_loss(var), wd, name='reg_loss')
        tf.add_to_collection('losses', weight_decay)
    return var

def inference(images):
    # conv 1
    with tf.variable_scope('conv1') as scope:
        weights = _variable_with_weight_decay('weights', shape=[5, 5, 3, 32], stddev=1/np.sqrt(5*5*3), wd=0.00)
        biases = tf.Variable(tf.constant(0.0, shape=[32]))
        conv = tf.nn.conv2d(images, weights, [1, 1, 1, 1], padding='SAME')
        bias = tf.nn.bias_add(conv, biases)
        conv1 = tf.nn.relu(bias, name=scope.name)

    # conv 2
    with tf.variable_scope('conv2') as scope:
        weights = _variable_with_weight_decay('weights', shape=[5, 5, 32, 64], stddev=1/np.sqrt(5*5*32), wd=0.00)
        biases = tf.Variable(tf.constant(0.0, shape=[64]))
        conv = tf.nn.conv2d(conv1, weights, [1, 1, 1, 1], padding='SAME')
        bias = tf.nn.bias_add(conv, biases)
        conv2 = tf.nn.relu(bias, name=scope.name)

    # pool 1
    with tf.variable_scope('pool1') as scope:
        pool1 = tf.nn.max_pool(conv2, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')

    # conv 3
    with tf.variable_scope('conv3') as scope:
        weights = _variable_with_weight_decay('weights', shape=[3, 3, 64, 64], stddev=1/np.sqrt(3*3*64), wd=0.00)
        biases = tf.Variable(tf.constant(0.0, shape=[64]))
        conv = tf.nn.conv2d(pool1, weights, [1, 1, 1, 1], padding='SAME')
        bias = tf.nn.bias_add(conv, biases)
        conv3 = tf.nn.relu(bias, name=scope.name)

    # conv 4
    with tf.variable_scope('conv4') as scope:
        weights = _variable_with_weight_decay('weights', shape=[3, 3, 64, 64], stddev=1/np.sqrt(3*3*64), wd=0.00)
        biases = tf.Variable(tf.constant(0.0, shape=[64]))
        conv = tf.nn.conv2d(conv3, weights, [1, 1, 1, 1], padding='SAME')
        bias = tf.nn.bias_add(conv, biases)
        conv4 = tf.nn.relu(bias, name=scope.name)

    # pool 2
    with tf.variable_scope('pool2') as scope:
        pool2 = tf.nn.max_pool(conv4, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')

    # conv 5
    with tf.variable_scope('conv5') as scope:
        weights = _variable_with_weight_decay('weights', shape=[3, 3, 64, 64], stddev=1/np.sqrt(3*3*64), wd=0.00)
        biases = tf.Variable(tf.constant(0.0, shape=[64]))
        conv = tf.nn.conv2d(pool2, weights, [1, 1, 1, 1], padding='SAME')
        bias = tf.nn.bias_add(conv, biases)
        conv5 = tf.nn.relu(bias, name=scope.name)

    # conv 6
    with tf.variable_scope('conv6') as scope:
        weights = _variable_with_weight_decay('weights', shape=[3, 3, 64, 64], stddev=1/np.sqrt(3*3*64), wd=0.00)
        biases = tf.Variable(tf.constant(0.0, shape=[64]))
        conv = tf.nn.conv2d(conv5, weights, [1, 1, 1, 1], padding='SAME')
        bias = tf.nn.bias_add(conv, biases)
        conv6 = tf.nn.relu(bias, name=scope.name)

    # pool 3
    with tf.variable_scope('pool3') as scope:
        pool3 = tf.nn.max_pool(conv6, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')


    # fully connected 1
    with tf.variable_scope('fc1') as scope:
        batch_size = images.get_shape()[0].value
        pool3_flat = tf.reshape(pool3, [batch_size, -1])
        dim = pool3_flat.get_shape()[1].value
        weights = _variable_with_weight_decay('weights', shape=[dim, 384], stddev=1/np.sqrt(dim), wd=REG_STRENGTH)
        biases = tf.Variable(tf.constant(0.0, shape=[384]))
        fc1 = tf.nn.relu(tf.matmul(pool3_flat, weights) + biases, name=scope.name)

    # fully connected 2
    with tf.variable_scope('fc2') as scope:
        weights = _variable_with_weight_decay('weights', shape=[384, 192], stddev=1/np.sqrt(384), wd=REG_STRENGTH)
        biases = tf.Variable(tf.constant(0.0, shape=[192]))
        fc2 = tf.nn.relu(tf.matmul(fc1, weights) + biases, name=scope.name)

    # dropout
        fc2_drop = tf.nn.dropout(fc2, DROP_PROB)

    # Softmax
    with tf.variable_scope('softmax_linear') as scope:
        weights = _variable_with_weight_decay('weights', shape=[192, NUM_CLASSES], stddev=1/np.sqrt(192), wd=0.000)
        biases = tf.Variable(tf.constant(0.0, shape=[NUM_CLASSES]))
        # softmax_linear = tf.nn.softmax(tf.matmul(fc2_drop, weights) + biases, name=scope.name)#<--BLUNDER!
        logits = tf.add(tf.matmul(fc2_drop, weights), biases, name=scope.name)

    return logits


def loss(logits, labels):
    labels = tf.cast(labels, tf.int64)
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels, name='xentropy')
    data_loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
    tf.add_to_collection('losses', data_loss)
    total_loss = tf.add_n(tf.get_collection('losses'), name='total_loss')
    return total_loss


def training(total_loss, learning_rate):

    optimizer = tf.train.AdamOptimizer(learning_rate)

    global_step = tf.Variable(0, name='global_step', trainable=False)

    train_op = optimizer.minimize(total_loss, global_step=global_step)

    return train_op


def evaluation(logits, true_labels):
    correct_pred = tf.nn.in_top_k(logits, true_labels, 1)
    return tf.reduce_mean(tf.cast(correct_pred, tf.float32))*100

In [13]:
def run_training():
    with tf.Graph().as_default():
        X = tf.placeholder(tf.float32, [BATCH_SIZE, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS])
        y = tf.placeholder(tf.int32, [BATCH_SIZE])

        logits = inference(X)

        total_loss = loss(logits, y)

        train_op = training(total_loss, learning_rate=LEARNING_RATE)

        accuracy = evaluation(logits, y)

        # Don't specify number of epochs in validation set, otherwise that limits the training duration as the
        # validation set is 10 times smaller than the training set
        #val_logits = model_cnn.inference(val_images)
        #val_accuracy = model_cnn.evaluation(val_logits, val_labels)

        init_op = tf.initialize_all_variables()

        sess = tf.Session()

        sess.run(init_op)

        for i in range(NUM_ITER):
            start_time = time.time()
            sampleIndices = np.random.choice(np.arange(numTrain), BATCH_SIZE)
            batch_xs = train_images[sampleIndices]   #
            batch_ys = train_labels[sampleIndices]
            sess.run(train_op, feed_dict={X: batch_xs, y: batch_ys})
            duration = time.time() - start_time
            if i%10 == 0:
                #print(sess.run(logits, feed_dict={X:batch_xs, y: batch_ys}))
                print("Iteration = ", i, "Loss = ", sess.run(total_loss, feed_dict={X:batch_xs, y: batch_ys}),
                      "Train Accuracy = ", sess.run(accuracy, feed_dict={X:batch_xs, y: batch_ys}), 
                      "Test Accuracy = ", sess.run(accuracy, feed_dict={X:test_images[np.arange(BATCH_SIZE)], y: test_labels[np.arange(BATCH_SIZE)]}), 
                      "Duration = %.1f sec" % duration)

In [14]:
#inference(1)
print()
run_training()


Iteration =  0 Loss =  0.897286 Train Accuracy =  56.0 Test Accuracy =  49.0 Duration = 3.2 sec
Iteration =  10 Loss =  0.804559 Train Accuracy =  50.0 Test Accuracy =  49.0 Duration = 5.7 sec
Iteration =  20 Loss =  0.765104 Train Accuracy =  46.0 Test Accuracy =  49.0 Duration = 7.4 sec
Iteration =  30 Loss =  0.733826 Train Accuracy =  53.0 Test Accuracy =  58.0 Duration = 9.4 sec
Iteration =  40 Loss =  0.702011 Train Accuracy =  56.0 Test Accuracy =  50.0 Duration = 5.8 sec
Iteration =  50 Loss =  0.712868 Train Accuracy =  50.0 Test Accuracy =  48.0 Duration = 6.3 sec
Iteration =  60 Loss =  0.712052 Train Accuracy =  40.0 Test Accuracy =  67.0 Duration = 5.8 sec
Iteration =  70 Loss =  0.706997 Train Accuracy =  48.0 Test Accuracy =  46.0 Duration = 5.7 sec
Iteration =  80 Loss =  0.69991 Train Accuracy =  59.0 Test Accuracy =  51.0 Duration = 7.5 sec
Iteration =  90 Loss =  0.674792 Train Accuracy =  58.0 Test Accuracy =  59.0 Duration = 6.0 sec
Iteration =  100 Loss =  0.6989

KeyboardInterrupt: 