In [None]:
import tensorflow as tf
import numpy as np
import math
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
from data_utils import load_CIFAR10

def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=10000):
    """
    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    it for the two-layer neural net classifier. These are the same steps as
    we used for the SVM, but condensed to a single function.  
    """
    # Load the raw CIFAR-10 data
    cifar10_dir = 'cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image

    return X_train, y_train, X_val, y_val, X_test, y_test


# Invoke the above function to get our data.
X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)

In [None]:
def ConvLayer(a, filters, strides = (1, 1)):
    return tf.layers.conv2d(a, filters=filters, kernel_size=(3, 3), strides=strides, padding="same", kernel_initializer=tf.contrib.layers.xavier_initializer(), bias_initializer=tf.initializers.zeros(), kernel_regularizer=tf.contrib.layers.l2_regularizer(0.0001))

In [None]:
def NormRelu(a):
    return tf.nn.relu(tf.layers.batch_normalization(a))

In [None]:
def ConvRelu(a, filters, strides = (1, 1)):
    return NormRelu(ConvLayer(a, filters, strides))

In [None]:
def ResBlock(a, filters):
    z1 = ConvRelu(a, filters)
    z2 = ConvRelu(z1, filters)
    z3 = ConvLayer(z2, filters)
    z4 = z3 + a
    return NormRelu(z4)

In [None]:
def ResNetModel(x, num_classes):
    a1 = ConvRelu(x, 16)
    a2 = ResBlock(a1, 16)
    a3 = ConvRelu(a2, 32, (2, 2))
    a4 = ResBlock(a3, 32)
    a5 = ConvRelu(a4, 64, (2, 2))
    a6 = ResBlock(a5, 64)
    a7 = tf.layers.average_pooling2d(a6, pool_size=(2, 2), strides=(2, 2))
    a8 = tf.layers.flatten(a7)
    scores = tf.layers.dense(a8, units=num_classes, kernel_initializer=tf.contrib.layers.xavier_initializer(), bias_initializer=tf.initializers.zeros(), kernel_regularizer=tf.contrib.layers.l2_regularizer(0.0001))
    
    return scores

In [None]:
def run_model(session, predict, loss_val, Xd, yd, lr,
              epochs=1, batch_size=64, print_every=100,
              training=None, plot_losses=False):
    # have tensorflow compute accuracy
    correct_prediction = tf.equal(tf.cast(tf.argmax(predict,1), tf.int32), y)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    # shuffle indicies
    train_indicies = np.arange(Xd.shape[0])
    np.random.shuffle(train_indicies)

    training_now = training is not None
    
    # setting up variables we want to compute (and optimizing)
    # if we have a training function, add that to things we compute
    variables = [mean_loss,correct_prediction,accuracy]
    if training_now:
        variables[-1] = training
    
    # counter 
    iter_cnt = 0
    for e in range(epochs):
        # keep track of losses and accuracy
        correct = 0
        losses = []
        # make sure we iterate over the dataset once
        for i in range(int(math.ceil(Xd.shape[0]/batch_size))):
            # generate indicies for the batch
            start_idx = (i*batch_size)%Xd.shape[0]
            idx = train_indicies[start_idx:start_idx+batch_size]
            
            # create a feed dictionary for this batch
            feed_dict = {X: Xd[idx,:],
                         y: yd[idx],
                         is_training: training_now,
                         learning_rate:lr }
            # get batch size
            actual_batch_size = yd[idx].shape[0]
            
            # have tensorflow compute loss and correct predictions
            # and (if given) perform a training step
            loss, corr, _ = session.run(variables,feed_dict=feed_dict)
            
            # aggregate performance stats
            losses.append(loss*actual_batch_size)
            correct += np.sum(corr)
            
            # print every now and then
            if training_now and (iter_cnt % print_every) == 0:
                print("Iteration {0}: with minibatch training loss = {1:.3g} and accuracy of {2:.2g}"\
                      .format(iter_cnt,loss,np.sum(corr)/float(actual_batch_size)))
            iter_cnt += 1
        total_correct = float(correct)/Xd.shape[0]
        total_loss = np.sum(losses, dtype=float)/Xd.shape[0]
        print("Epoch {2}, Overall loss = {0:.3g} and accuracy of {1:.3g}"\
              .format(total_loss,total_correct,e+1))
        if plot_losses:
            plt.plot(losses)
            plt.grid(True)
            plt.title('Epoch {} Loss'.format(e+1))
            plt.xlabel('minibatch number')
            plt.ylabel('minibatch loss')
            plt.show()
    return total_loss,total_correct

In [None]:
tf.reset_default_graph()

X = tf.placeholder(tf.float32, [None, 32, 32, 3])
y = tf.placeholder(tf.int32, [None])
is_training = tf.placeholder(tf.bool)
learning_rate = tf.placeholder(tf.float32)

num_classes = 10

y_out = ResNetModel(X, num_classes)

total_loss = tf.losses.softmax_cross_entropy(tf.one_hot(y,num_classes),logits=y_out)
mean_loss = tf.reduce_mean(total_loss)

#global_step = tf.Variable(0, trainable=False)
#starter_learning_rate = 0.02
#learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
 #                                          500, 0.96, staircase=True)

optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9)

extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(extra_update_ops):
    train_step = optimizer.minimize(mean_loss)
    
    
saver = tf.train.Saver()

config = tf.ConfigProto()
config.gpu_options.allocator_type = 'BFC'

sess = tf.Session(config = config)

sess.run(tf.global_variables_initializer())
'''train_accs = []
val_accs = []
max_val_acc = 0.0
for i in range(50):    
    print('Training')
    loss, acc = run_model(sess,y_out,mean_loss,X_train,y_train,1,256,100,train_step,True)
    train_accs.append(acc)
    print('Validation')
    loss, acc = run_model(sess,y_out,mean_loss,X_val,y_val,1,256)
    val_accs.append(acc)
    if acc > max_val_acc:
        max_val_acc = acc
        saver.save(sess, './model_tensorflow.ckpt')'''

In [None]:
def train(t_accs, v_accs, max_v_acc, num_ephocs, batch_size, lr):
    train_accs = t_accs
    val_accs = v_accs
    max_val_acc = max_v_acc
    for i in range(num_ephocs):    
        print('Training')
        loss, acc = run_model(sess,y_out,mean_loss,X_train,y_train, lr, 1,batch_size,100,train_step,True)
        train_accs.append(acc)
        print('Validation')
        loss, acc = run_model(sess,y_out,mean_loss,X_val,y_val, lr, 1,batch_size)
        val_accs.append(acc)
        if acc > max_val_acc:
            max_val_acc = acc
            saver.save(sess, './model_tensorflow.ckpt')
        if lr % 15 == 0:
            lr /= 10
    return train_accs, val_accs, max_val_acc, lr

In [None]:
train_accs = []
val_accs = []
max_val_acc = 0.692
lr = 0.001

In [None]:
train_accs, val_accs, max_val_acc, lr = train(train_accs, val_accs, max_val_acc, 100, 256, lr)

In [None]:
saver.save(sess, './model-overfitted.ckpt')

In [None]:
t = range(len(train_accs))
plt.plot(t, train_accs, 'r--', label = "train acc")
plt.plot(t, val_accs, 'b--', label = "val acc")
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
           ncol=2, mode="expand", borderaxespad=0.)
plt.savefig('train_val_acc2.jpg')
plt.show()

In [None]:
saver.restore(sess, "model_tensorflow.ckpt")

In [None]:
print('Validation')
run_model(sess,y_out,mean_loss,X_val,y_val, None, 1,40)

In [None]:
print('Test')
run_model(sess,y_out,mean_loss,X_test,y_test, None,1,40)