# Homework 6
In this homework we will build deeper networks with 20 or more layers.

Development notes: 

v1: Deep net without batch norm: hard and long to train, bad accuracy on test set
v2: Deep net with batch norm: much faster to train got best accuracy of 97.1 by throwing compute at it
v3: Deep net with resnet structure : TBD

## Part 0: Setup

In [1]:
import tensorflow as tf
import numpy as np
import util

# Load the data we are giving you
def load(filename, W=64, H=64):
    data = np.fromfile(filename, dtype=np.uint8).reshape((-1, W*H*3+1))
    images, labels = data[:, :-1].reshape((-1,H,W,3)), data[:, -1]
    return images, labels

image_data, label_data = load('tux_train.dat')

print('Input shape: ' + str(image_data.shape))
print('Labels shape: ' + str(label_data.shape))

num_classes = 6

Input shape: (12257, 64, 64, 3)
Labels shape: (12257,)


## Part 1: Define your convnet

In [2]:
def resnet_layer(x, n_out):
    """
    Compute a resnet layer given input and output conditions. Note I am using tf.contrib.layers to create a layer with
    ReLU activation and Xavier Initialization. Using a linear projection to match shapes.

    Arguments:
    X -- input vector
    num_outputs -- Number of outputs that the layer will have 
    Return:
    H_x -- which is f_x + x as per the resnet paper by Kaiming He
    """
    F_x= tf.contrib.layers.conv2d(inputs=x,num_outputs=n_out,kernel_size=[3, 3],
         weights_regularizer=tf.nn.l2_loss,stride=1)
    x= tf.contrib.layers.conv2d(inputs=x,num_outputs=n_out,kernel_size=[3, 3],
         weights_regularizer=tf.nn.l2_loss,stride=1,activation_fn=None)
    H_x= F_x + x    
#     op= tf.contrib.layers.conv2d(inputs=H_x,num_outputs,kernel_size=[3, 3],
#          weights_regularizer=tf.nn.l2_loss,stride=1)
    
    return H_x

In [3]:
# Lets clear the tensorflow graph, so that you don't have to restart the notebook every time you change the network
tf.reset_default_graph()

# Set up your input placeholder
inputs = tf.placeholder(tf.float32, (None,64,64,3))

# Set up your input placeholder
training = tf.placeholder_with_default(False, (), name='training')

# Step 1: Augment the training data
def data_augmentation(I):
    I= tf.image.random_flip_left_right(I)
    return I

# map_fn applies data_augmentation independently for each image in the batch, since we are not croping 
# let's apply the augmentation before whitening, it does make evaluation easier
aug_input = tf.map_fn(data_augmentation, inputs)

# During evaluation we don't want data augmentation
eval_inputs = tf.identity(aug_input, name='inputs')

# Whenever you deal with image data it's important to mean center it first and subtract the standard deviation
white_inputs = (eval_inputs - 100.) / 72.

# Set up your label placeholders
labels = tf.placeholder(tf.int64, (None), name='labels')
# lrate= tf.placeholder(tf.float32,name='learning_rate')

with tf.name_scope('model'), tf.variable_scope('model'):   
    # Step 4: Add residual connections
    #  For simplicity you do not need to add a residual connection to every layer, but add them to at least 
    #      half of your layers
    #  Train your model (you should see it converge even faster now).
    
    h = white_inputs
    count=0
    for i in range(4):
        name= 'conv'+ str(i+1)
        h= resnet_layer(h,15)
        h= tf.layers.batch_normalization(h,training=training)
        print(h)
        count+=1
    h = tf.contrib.layers.max_pool2d(inputs=h, kernel_size=[2,2], stride=2, scope='pool1')
    print(h)
    
    for i in range(count,count+2):
        name= 'conv'+ str(i+1)
        h = resnet_layer(h,20)
        h= tf.layers.batch_normalization(h,training=training)
        print(h)
        count+=1
    
    h = tf.contrib.layers.max_pool2d(inputs=h, kernel_size=[2,2], stride=2, scope='pool2')
    print(h)
    
    for i in range(count,count+3):
        name= 'conv'+ str(i+1)
        h = resnet_layer(h,25)
        h= tf.layers.batch_normalization(h,training=training)
        print(h)
        count+=1
    h = tf.contrib.layers.max_pool2d(inputs=h, kernel_size=[2,2], stride=2, scope='pool3')
    print(h)
    
    for i in range(count,count+5):
        name= 'conv'+ str(i+1)
        h = resnet_layer(h,35)
        h= tf.layers.batch_normalization(h,training=training)
        print(h)
        count+=1
    
    h = tf.contrib.layers.max_pool2d(inputs=h, kernel_size=[2,2], stride=2, scope='pool4')
    print(h)
    
    for i in range(count,count+5):
        name= 'conv'+ str(i+1)
        h = resnet_layer(h,60)
        h= tf.layers.batch_normalization(h,training=training)
        print(h)
        count+=1
    
    h = tf.contrib.layers.max_pool2d(inputs=h, kernel_size=[3,3], stride=2, scope='pool5')
    print(h)
    
    h= tf.contrib.layers.conv2d(inputs=h,num_outputs=6,kernel_size=[1,1],
        weights_regularizer=tf.nn.l2_loss,stride=1,activation_fn=None, scope='conv20')
    print(h)    
    # The input here should be a   None x 1 x 1 x 6   tensor
    h = tf.contrib.layers.flatten(h,scope='out')

    loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=h, labels=labels))

output = tf.identity(h, name='output')

regularization_loss = tf.losses.get_regularization_loss()

# Let's weight the regularization loss down, otherwise it will hurt the model performance
# You can tune this weight if you wish
total_loss = loss + 1e-6 * regularization_loss

# create an optimizer
# NOTE: you might have to play with the learning rate as you try out 
# batch_normalization (0.001 might work well without BN, 0.1 with, 0.001 for resnets)
optimizer = tf.train.AdamOptimizer(0.001, 0.9, 0.999)

# use that optimizer on your loss function (control_dependencies makes sure any 
# batch_norm parameters are properly updated)
with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
    opt = optimizer.minimize(total_loss)
correct = tf.equal(tf.argmax(output, 1), labels)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

print( "Total number of variables used ", np.sum([v.get_shape().num_elements() for v in tf.trainable_variables()]), '/', 500000 )


Tensor("model/model/batch_normalization/batchnorm/add_1:0", shape=(?, 64, 64, 15), dtype=float32)
Tensor("model/model/batch_normalization_2/batchnorm/add_1:0", shape=(?, 64, 64, 15), dtype=float32)
Tensor("model/model/batch_normalization_3/batchnorm/add_1:0", shape=(?, 64, 64, 15), dtype=float32)
Tensor("model/model/batch_normalization_4/batchnorm/add_1:0", shape=(?, 64, 64, 15), dtype=float32)
Tensor("model/model/pool1/MaxPool:0", shape=(?, 32, 32, 15), dtype=float32)
Tensor("model/model/batch_normalization_5/batchnorm/add_1:0", shape=(?, 32, 32, 20), dtype=float32)
Tensor("model/model/batch_normalization_6/batchnorm/add_1:0", shape=(?, 32, 32, 20), dtype=float32)
Tensor("model/model/pool2/MaxPool:0", shape=(?, 16, 16, 20), dtype=float32)
Tensor("model/model/batch_normalization_7/batchnorm/add_1:0", shape=(?, 16, 16, 25), dtype=float32)
Tensor("model/model/batch_normalization_8/batchnorm/add_1:0", shape=(?, 16, 16, 25), dtype=float32)
Tensor("model/model/batch_normalization_9/batchnor

## Part 2: Training

Training might take up to 20 min depending on your architecture (and if you have a GPU or not). A network without BN will train much slower, but try it first anyway.

In [4]:
image_val, label_val = load('tux_val.dat')
# Batch size
BS = 32

# Start a session
sess = tf.Session()

# Set up training
sess.run(tf.global_variables_initializer())

# An epoch is a single pass over the training data
creation=0
for epoch in range(20):
    # Let's shuffle the data every epoch
    np.random.seed(epoch)
    np.random.shuffle(image_data)
    np.random.seed(epoch)
    np.random.shuffle(label_data)
    # Go through the entire dataset once
    accuracy_vals, loss_vals = [], []
    for i in range(0, image_data.shape[0]-BS+1, BS):
        # Train a single batch
        batch_images, batch_labels = image_data[i:i+BS], label_data[i:i+BS]
        accuracy_val, loss_val, _ = sess.run([accuracy, total_loss, opt], feed_dict={inputs: batch_images, labels: batch_labels, training:True})
        accuracy_vals.append(accuracy_val)
        loss_vals.append(loss_val)

    val_correct = []
    for i in range(0, image_val.shape[0], BS):
        batch_images, batch_labels = image_val[i:i+BS], label_val[i:i+BS]
        val_correct.extend( sess.run(correct, feed_dict={eval_inputs: batch_images, labels: batch_labels}) )
    print('[%3d] Accuracy: %0.3f  \t  Loss: %0.3f  \t  validation accuracy: %0.3f'%(epoch, np.mean(accuracy_vals), np.mean(loss_vals), np.mean(val_correct)))
    
#     if creation==0:
#         present_cal_acc= np.mean(val_correct)
#         creation+=1
#     if present_cal_acc+0.005 < np.mean(val_correct):
#         present_cal_acc= np.mean(val_correct)
#         print("saving")
#         util.save('assignment6_v3_best.tfg', session=sess)
        

[  0] Accuracy: 0.886  	  Loss: 0.353  	  validation accuracy: 0.825
[  1] Accuracy: 0.933  	  Loss: 0.204  	  validation accuracy: 0.913
[  2] Accuracy: 0.948  	  Loss: 0.153  	  validation accuracy: 0.889
[  3] Accuracy: 0.957  	  Loss: 0.133  	  validation accuracy: 0.898
[  4] Accuracy: 0.960  	  Loss: 0.116  	  validation accuracy: 0.925
[  5] Accuracy: 0.965  	  Loss: 0.105  	  validation accuracy: 0.931
[  6] Accuracy: 0.968  	  Loss: 0.097  	  validation accuracy: 0.915
[  7] Accuracy: 0.972  	  Loss: 0.083  	  validation accuracy: 0.937
[  8] Accuracy: 0.972  	  Loss: 0.082  	  validation accuracy: 0.900
[  9] Accuracy: 0.974  	  Loss: 0.071  	  validation accuracy: 0.938
[ 10] Accuracy: 0.980  	  Loss: 0.061  	  validation accuracy: 0.934
[ 11] Accuracy: 0.977  	  Loss: 0.067  	  validation accuracy: 0.953
[ 12] Accuracy: 0.978  	  Loss: 0.069  	  validation accuracy: 0.944
[ 13] Accuracy: 0.981  	  Loss: 0.059  	  validation accuracy: 0.930
[ 14] Accuracy: 0.978  	  Loss: 0.

## Part 3: Evaluation

### Compute the valiation accuracy

In [5]:
image_val, label_val = load('tux_val.dat')

print('Input shape: ' + str(image_val.shape))
print('Labels shape: ' + str(label_val.shape))

val_correct = []
for i in range(0, image_val.shape[0], BS):
    batch_images, batch_labels = image_val[i:i+BS], label_val[i:i+BS]
    val_correct.extend( sess.run(correct, feed_dict={eval_inputs: batch_images, labels: batch_labels}) )
print("ConvNet Validation Accuracy: ", np.mean(val_correct))

Input shape: (3912, 64, 64, 3)
Labels shape: (3912,)
ConvNet Validation Accuracy:  0.955010224949


## Part 4: Save Model
Please note that we also want you to turn in your ipynb for this assignment.  Zip up the ipynb along with the tfg for your submission.

In [6]:
# util.save('assignment6_v3.tfg', session=sess)

### Part 5 (optional): See your model

In [7]:
# Show the current graph
util.show_graph(tf.get_default_graph().as_graph_def())

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.
