Setup
=====

In [None]:
import tensorflow as tf
import numpy as np
import math
import timeit
import matplotlib.pyplot as plt
%matplotlib inline

from cs231n.data_utils import load_CIFAR10

def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=10000):
    """
    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    it for the two-layer neural net classifier. These are the same steps as
    we used for the SVM, but condensed to a single function.  
    """
    # Load the raw CIFAR-10 data
    cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image

    return X_train, y_train, X_val, y_val, X_test, y_test


# Invoke the above function to get our data.
X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)

In [None]:
def run_model(session, predict, loss_val, Xd, yd,
              epochs=1, batch_size=64, print_every=100,
              training=None, plot_losses=False):
    # have tensorflow compute accuracy
    correct_prediction = tf.equal(tf.argmax(predict,1), y)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    # shuffle indicies
    train_indicies = np.arange(Xd.shape[0])
    np.random.shuffle(train_indicies)

    training_now = training is not None
    
    # setting up variables we want to compute (and optimizing)
    # if we have a training function, add that to things we compute
    variables = [mean_loss,correct_prediction,accuracy]
    if training_now:
        variables[-1] = training
    
    # counter 
    iter_cnt = 0
    for e in range(epochs):
        # keep track of losses and accuracy
        correct = 0
        losses = []
        # make sure we iterate over the dataset once
        for i in range(int(math.ceil(Xd.shape[0]/batch_size))):
            # generate indicies for the batch
            start_idx = (i*batch_size)%Xd.shape[0]
            idx = train_indicies[start_idx:start_idx+batch_size]
            
            # create a feed dictionary for this batch
            feed_dict = {X: Xd[idx,:],
                         y: yd[idx],
                         is_training: training_now }
            # get batch size
            actual_batch_size = yd[idx].shape[0]
            
            # have tensorflow compute loss and correct predictions
            # and (if given) perform a training step
            loss, corr, _ = session.run(variables,feed_dict=feed_dict)
            
            # aggregate performance stats
            losses.append(loss*actual_batch_size)
            correct += np.sum(corr)
            
            # print every now and then
            if training_now and (iter_cnt % print_every) == 0:
                print("Iteration {0}: with minibatch training loss = {1:.3g} and accuracy of {2:.2g}"\
                      .format(iter_cnt,loss,np.sum(corr)/actual_batch_size))
            iter_cnt += 1
        total_correct = correct/Xd.shape[0]
        total_loss = np.sum(losses)/Xd.shape[0]
        print("Epoch {2}, Overall loss = {0:.3g} and accuracy of {1:.3g}"\
              .format(total_loss,total_correct,e+1))
        if plot_losses:
            plt.plot(losses)
            plt.grid(True)
            plt.title('Epoch {} Loss'.format(e+1))
            plt.xlabel('minibatch number')
            plt.ylabel('minibatch loss')
            plt.show()
    return total_loss,total_correct

In [None]:
def run_model_short(session, predict, loss_val, Xd, yd,
              epochs=1, batch_size=64, print_every=100,
              training=None, plot_losses=False):
    # have tensorflow compute accuracy
    correct_prediction = tf.equal(tf.argmax(predict,1), y)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    # shuffle indicies
    train_indicies = np.arange(Xd.shape[0])
    np.random.shuffle(train_indicies)

    training_now = training is not None
    
    # setting up variables we want to compute (and optimizing)
    # if we have a training function, add that to things we compute
    variables = [mean_loss,correct_prediction,accuracy]
    if training_now:
        variables[-1] = training
    
    # counter 
    iter_cnt = 0
    accuracies = []
    for e in range(epochs):
        # keep track of losses and accuracy
        correct = 0
        losses = []
        # make sure we iterate over the dataset once
        for i in range(int(math.ceil(Xd.shape[0]/batch_size))):
            # generate indicies for the batch
            start_idx = (i*batch_size)%Xd.shape[0]
            idx = train_indicies[start_idx:start_idx+batch_size]
            
            # create a feed dictionary for this batch
            feed_dict = {X: Xd[idx,:],
                         y: yd[idx],
                         is_training: training_now }
            # get batch size
            actual_batch_size = yd[idx].shape[0]
            
            # have tensorflow compute loss and correct predictions
            # and (if given) perform a training step
            loss, corr, _ = session.run(variables,feed_dict=feed_dict)
            
            # aggregate performance stats
            losses.append(loss*actual_batch_size)
            correct += np.sum(corr)
            
            # print every now and then
            #if training_now and (iter_cnt % print_every) == 0:
            #    print("Iteration {0}: with minibatch training loss = {1:.3g} and accuracy of {2:.2g}"\
            #          .format(iter_cnt,loss,np.sum(corr)/actual_batch_size))
            #iter_cnt += 1
        total_correct = correct/Xd.shape[0]
        accuracies.append(total_correct)
        total_loss = np.sum(losses)/Xd.shape[0]
        print("Epoch {2}, Overall loss = {0:.3g} and accuracy of {1:.3g}"\
              .format(total_loss,total_correct,e+1))
    if plot_losses:
        plt.plot(accuracies)
        plt.grid(True)
        plt.title('Accuracies'.format(e+1))
        plt.xlabel('epoch')
        plt.ylabel('accuracy')
        plt.show()
    return total_loss,total_correct

Starting model
============

* 7x7 conv 32 filters stride 1x1
* batch normalization
* max_pooling 2x2
* affine layer 5408->1024
* affine layer 1024->10

*Training*

Epoch 10, Overall loss = 0.129 and accuracy of 0.959

*Validation*

Epoch 1, Overall loss = 2.47 and accuracy of 0.653

In [None]:
def my_model(X,y,is_training):
    # convolutional layer with 32 7x7 filters
    Wconv1 = tf.get_variable("Wconv1", shape=[7, 7, 3, 32])
    bconv1 = tf.get_variable("bconv1", shape=[32])
    # a1 shape (?, 26, 26, 32)
    a1 = tf.nn.conv2d(X, Wconv1, strides=[1,1,1,1], padding='VALID') + bconv1
    h1 = tf.nn.relu(a1)
    
    # batch normalization
    bn = tf.layers.batch_normalization(h1, axis=3, training=is_training)

    # max pooling 2x2 with stride 2
    # max_pool shape (?, 13, 13, 32)
    max_pool = tf.nn.max_pool(bn, [1, 2, 2, 1], [1, 2, 2, 1], padding='VALID')

    # affine layer with 1024 output units and relu
    W1 = tf.get_variable("W1", shape=[5408, 1024])
    b1 = tf.get_variable("b1", shape=[1024]) 
    max_pool_flat = tf.reshape(max_pool,[-1,5408])
    affine1 = tf.matmul(max_pool_flat, W1) + b1
    h2 = tf.nn.relu(affine1)
    
    # affine layer 2 with 10 outputs
    W2 = tf.get_variable("W2", shape=[1024, 10])
    b2 = tf.get_variable("b2", shape=[10]) 
    y_out = tf.matmul(h2, W2) + b2
    
    return y_out

Sandbox
=======

```
out_height = ceil(float(in_height - filter_height + 1) / float(strides[1]))
out_width  = ceil(float(in_width - filter_width + 1) / float(strides[2]))
```

In [None]:
input_height = input_width = 32
strides = 2
for i in range(1, 10):
    out_height = math.ceil(float(input_height - i + 1) / float (strides))
    out_width = math.ceil(float(input_width - i + 1) / float (strides))
    print("Filter: {}\t{}, {}".format(i, out_width, out_height))

In [None]:
tf.reset_default_graph()

def sandbox_model(X,y,is_training, filter_size):  
    # 32 layer conv2d with 3x3 filters
    h1 = tf.layers.conv2d(X, 32, [filter_size, filter_size], strides=(1, 1), activation=tf.nn.relu)
    
    # batch normalization
    bn = tf.layers.batch_normalization(h1, axis=3, training=is_training)

    # max pooling 2x2 with stride 2
    # max_pool shape (?, 15, 15, 32)
    max_pool = tf.layers.max_pooling2d(bn, [2, 2], [2, 2])

    # affine layer with 1024 output units and relu
    inputs = max_pool.shape[1] * max_pool.shape[2] * max_pool.shape[3]
    max_pool_flat = tf.reshape(max_pool,[-1,inputs])
    h2 = tf.layers.dense(max_pool_flat, 1024, activation=tf.nn.relu)
    
    # affine layer 2 with 10 outputs  
    y_out = tf.layers.dense(h2, 10, activation=None)
    
    return y_out

Test model
==========

In [None]:
model_func = sandbox_model

for filter_size in range(1, 12):
    print("==============================================================")
    print("Filter size: " + str(filter_size))
    print("==============================================================")
    tf.reset_default_graph()

    X = tf.placeholder(tf.float32, [None, 32, 32, 3])
    y = tf.placeholder(tf.int64, [None])
    is_training = tf.placeholder(tf.bool)

    y_out = model_func(X,y,is_training, filter_size)
    total_loss = tf.losses.softmax_cross_entropy(tf.one_hot(y,10),y_out)
    mean_loss = tf.reduce_mean(total_loss)
    optimizer = tf.train.RMSPropOptimizer(1e-3)

    # batch normalization in tensorflow requires this extra dependency
    extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(extra_update_ops):
        train_step = optimizer.minimize(mean_loss)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        print('Training')
        run_model_short(sess,y_out,mean_loss,X_train,y_train,10,64,100,train_step,True)
        print('Validation')
        run_model(sess,y_out,mean_loss,X_val,y_val,1,64)