In [1]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


# Homework 7
This homework focuses on fully convolutional networks.

## Part 0: Setup

In [2]:
import tensorflow as tf
import numpy as np
import util

# Colors to visualize the labeling
COLORS = np.array([(0,0,0), (255,0,0), (0,255,0), (255,255,0), (0,0,255), (255,255,255)], dtype=np.uint8)
CROP_SIZE = 64

def parser(record):
    # Parse the TF record
    parsed = tf.parse_single_example(record, features={
        'height': tf.FixedLenFeature([], tf.int64),
        'width': tf.FixedLenFeature([], tf.int64),
        'image_raw': tf.FixedLenFeature([], tf.string),
        'label_raw': tf.FixedLenFeature([], tf.string)
    })
    # Load the data and format it
    H = tf.cast(parsed['height'], tf.int32)
    W = tf.cast(parsed['width'], tf.int32)
    image = tf.reshape(tf.decode_raw(parsed["image_raw"], tf.uint8), [H,W,3])
    label = tf.reshape(tf.decode_raw(parsed["label_raw"], tf.uint8), [H,W])
    
    ## Data augmentation
    # Stack the image and labels to make sure the same operations are applied
    data = tf.concat([image, label[:,:,None]], axis=-1)
    
    # TODO: Apply the data augmentation (you should both crop the images randomly and flip them)
    data= tf.image.random_flip_left_right(data)
    
    return data[:,:,:-1], data[:,:,-1]

def load_dataset(tfrecord):
    # Load the dataset
    dataset = tf.contrib.data.TFRecordDataset(tfrecord)

    # Parse the tf record entries
    dataset = dataset.map(parser, num_threads=8, output_buffer_size=1024)

    # Shuffle the data, batch it and run this for multiple epochs
    dataset = dataset.shuffle(buffer_size=10000)
    dataset = dataset.batch(32) # note make this 32, my gpu did not work with 32 
    dataset = dataset.repeat()
    return dataset

# We still have 6 classes
num_classes = 6

## Part 1: Define your convnet
Important note. The label frequency is horribly inbalanced for this task. On the training set
```[ 0.66839117, 0.00382957, 0.00092516, 0.00345217, 0.00339063, 0.3200113 ]```
On the validation set
```[ 0.68367316, 0.00392016, 0.00165766, 0.00194697, 0.0034067, 0.30539535]```
Tux, bonus, objects and enemies make up less than 1.5% of all labels overall.
You should reweight the loss to address this, if you don't your model will likely ignore all but background and tile labels.

In [15]:
# Create a new log directory (if you run low on disk space you can either disable this or delete old logs)
# run: `tensorboard --logdir log` to see all the nice summaries
for n_model in range(1000):
    LOG_DIR = 'log/model_%d'%n_model
    from os import path
    if not path.exists(LOG_DIR):
        break

# Lets clear the tensorflow graph, so that you don't have to restart the notebook every time you change the network
tf.reset_default_graph()

TF_COLORS = tf.constant(COLORS)

train_data = load_dataset('train.tfrecord')
valid_data = load_dataset('valid.tfrecord')

# Create an iterator for the datasets
# The iterator allows us to quickly switch between training and validataion
iterator = tf.contrib.data.Iterator.from_structure(train_data.output_types, ((None,None,None,3), (None,None,None)))

# and fetch the next images from the dataset (every time next_image is evaluated a new image set of 32 images is returned)
next_image, next_label = iterator.get_next()

# Define operations that switch between train and valid
switch_train_op = iterator.make_initializer(train_data)
switch_valid_op = iterator.make_initializer(valid_data)

# Convert the input
image = tf.cast(next_image, tf.float32)
label = tf.cast(next_label, tf.int32)

# Whiten the input
inputs = tf.identity(image, name='inputs')
white_inputs = (inputs - 100.) / 72.


In [20]:
# TODO: Define your convnet here
# white_inputs = tf.placeholder(tf.float32, shape=(None,32,32,3))
h1 = tf.contrib.layers.conv2d(white_inputs, num_outputs=10, kernel_size=(5, 5), stride=(2, 2))
print(h1)
h2 = tf.contrib.layers.conv2d(h1, num_outputs=25, kernel_size=(5, 5), stride=(2, 2))
print(h2)

h3 = tf.contrib.layers.conv2d(h2, num_outputs=25, kernel_size=(3, 3), stride=(2, 2))
print(h3)

h4 = tf.contrib.layers.conv2d(h3, num_outputs=50, kernel_size=(3, 3), stride=(2, 2))
print(h4)

h1t = tf.contrib.layers.conv2d_transpose(h4, num_outputs=25, kernel_size=(5, 5), stride=(2, 2))
h1t_skip = tf.contrib.layers.conv2d_transpose(h4, num_outputs=10, kernel_size=(5, 5), stride=(16, 16))
print(h1t)
print(h1t_skip)

h2t = tf.contrib.layers.conv2d_transpose(h1t, num_outputs=50,kernel_size=(5, 5), stride=(2, 2))
h2t_skip = tf.contrib.layers.conv2d_transpose(h3, num_outputs=10, kernel_size=(5, 5), stride=(8, 8))
print(h2t)
print(h2t_skip)

h3t = tf.contrib.layers.conv2d_transpose(h2t, num_outputs=50,kernel_size=(5, 5), stride=(2, 2))
h3t_skip = tf.contrib.layers.conv2d_transpose(h1, num_outputs=10,kernel_size=(5, 5), stride=(2, 2))
print(h3t)

h4t = tf.contrib.layers.conv2d_transpose(h3t, num_outputs=10, kernel_size=(5, 5), stride=(2, 2))
print(h4t)


filter_concat = tf.concat([h1t_skip, h2t_skip, h3t_skip, h4t], axis=-1)
print('filter_concat: ' + str(filter_concat))

logit = tf.contrib.layers.conv2d(filter_concat, num_outputs=6, kernel_size=(1, 1), stride=(1, 1))
print('output: ' + str(logit))

Tensor("Conv/Relu:0", shape=(?, ?, ?, 10), dtype=float32)
Tensor("Conv_1/Relu:0", shape=(?, ?, ?, 25), dtype=float32)
Tensor("Conv_2/Relu:0", shape=(?, ?, ?, 25), dtype=float32)
Tensor("Conv_3/Relu:0", shape=(?, ?, ?, 50), dtype=float32)
Tensor("Conv2d_transpose/Relu:0", shape=(?, ?, ?, 25), dtype=float32)
Tensor("Conv2d_transpose_1/Relu:0", shape=(?, ?, ?, 10), dtype=float32)
Tensor("Conv2d_transpose_2/Relu:0", shape=(?, ?, ?, 50), dtype=float32)
Tensor("Conv2d_transpose_3/Relu:0", shape=(?, ?, ?, 10), dtype=float32)
Tensor("Conv2d_transpose_4/Relu:0", shape=(?, ?, ?, 50), dtype=float32)
Tensor("Conv2d_transpose_6/Relu:0", shape=(?, ?, ?, 10), dtype=float32)
filter_concat: Tensor("concat:0", shape=(?, ?, ?, 40), dtype=float32)
output: Tensor("Conv_4/Relu:0", shape=(?, ?, ?, 6), dtype=float32)


In [21]:
# Let's compute the output labeling
output = tf.identity(tf.argmax(logit, axis=-1), name='output')

# TODO: Define a weight per class here (try a function of the class frequencies)
# This is one of the most important steps to get the class accuracy higher
loss_weight = tf.constant([ 1., 100., 500., 100., 100., 1. ])
# Broadcast the weights spatially
weight = tf.gather_nd(loss_weight,label[:,:,:,None])

# Define the loss function
loss = tf.reduce_sum(weight * tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logit, labels=label)) / tf.reduce_sum(weight)

# Let's weight the regularization loss down, otherwise it will hurt the model performance
# You can tune this weight if you wish
regularization_loss = tf.losses.get_regularization_loss()
total_loss = loss + 1e-6 * regularization_loss

# Adam will likely converge much faster than SGD for this assignment.
optimizer = tf.train.AdamOptimizer(0.001, 0.9, 0.999)

# use that optimizer on your loss function (control_dependencies makes sure any 
# batch_norm parameters are properly updated)
with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
    opt = optimizer.minimize(total_loss)
confusion = tf.confusion_matrix(labels=tf.reshape(label,[-1]), predictions=tf.reshape(output,[-1]), num_classes=num_classes)

# Let's define some summaries for tensorboard
colored_label = tf.gather_nd(TF_COLORS, label[:,:,:,None])
colored_output = tf.gather_nd(TF_COLORS, output[:,:,:,None])
tf.summary.image('confusion', tf.cast(confusion[None,:,:,None], tf.float32), max_outputs=1)
tf.summary.image('image', next_image, max_outputs=3)
tf.summary.image('label', colored_label, max_outputs=3)
tf.summary.image('output', colored_output, max_outputs=3)
tf.summary.scalar('loss', tf.placeholder(tf.float32, name='loss'))
tf.summary.scalar('accuracy', tf.placeholder(tf.float32, name='accuracy'))
tf.summary.scalar('class_accuracy', tf.placeholder(tf.float32, name='class_accuracy'))
tf.summary.scalar('jaccard', tf.placeholder(tf.float32, name='jaccard'))
tf.summary.scalar('val_accuracy', tf.placeholder(tf.float32, name='val_accuracy'))
tf.summary.scalar('val_class_accuracy', tf.placeholder(tf.float32, name='val_class_accuracy'))
tf.summary.scalar('val_jaccard', tf.placeholder(tf.float32, name='val_jaccard'))

merged_summary = tf.summary.merge_all()
summary_writer = tf.summary.FileWriter(LOG_DIR, tf.get_default_graph())

# Let's compute the model size
print( "Total number of variables used ", np.sum([v.get_shape().num_elements() for v in tf.trainable_variables()]) )

Total number of variables used  183146


## Part 2: Training

Training might take up to 20 min depending on your architecture (and if you have a GPU or not).

In [22]:
def accuracy(confusion):
    # Overall pixelwise accuracy
    # This metric heavily favors tiles and background (as they are most frequent)
    return np.sum(np.diag(confusion)) / np.sum(confusion)

def class_accuracy(confusion):
    # Class wise accuracy
    # This metric normalizes for class frequencies and favors small classes
    return np.mean(np.diag(confusion) / (np.sum(confusion, axis=1) + 1e-10))

def jaccard(confusion):
    # Jaccard index
    # A mix of the above, neither favors small or large classes much
    D = np.diag(confusion)
    return np.mean( D / (np.sum(confusion, axis=1) + np.sum(confusion, axis=0) - D + 1e-10))

# Start a session
sess = tf.Session()

# Set up training
sess.run(tf.global_variables_initializer())
creation=0
# Run the training for some iterations
for it in range(100):
    sess.run(switch_train_op)

    total_confusion = np.zeros((num_classes, num_classes))
    loss_vals = []
    # Run 10 training iterations and 1 validation iteration
    for i in range(10):
        confusion_val, loss_val, _ = sess.run([confusion, loss, opt])
        total_confusion += confusion_val
        loss_vals.append(loss_val)
    
    sess.run(switch_valid_op)
    confusion_val = sess.run([confusion])
    confusion_val=np.ravel(confusion_val).reshape(6,6)
    
    # Let's update tensorboard
    summary_writer.add_summary( sess.run(merged_summary, {'loss:0': np.mean(loss_vals), 'accuracy:0': accuracy(total_confusion), 'class_accuracy:0': class_accuracy(total_confusion), 'jaccard:0': jaccard(total_confusion), 'val_accuracy:0': accuracy(confusion_val), 'val_class_accuracy:0': class_accuracy(confusion_val), 'val_jaccard:0': jaccard(confusion_val)}), it )
    print('[%3d] Loss: %0.3f  \t  A.: %0.3f  CA.: %0.3f  J.: %0.3f  \t  Val A.: %0.3f  CA.: %0.3f  J.: %0.3f'%(it, np.mean(loss_vals), accuracy(total_confusion), class_accuracy(total_confusion), jaccard(total_confusion), accuracy(confusion_val), class_accuracy(confusion_val), jaccard(confusion_val)))    
    
    if creation==0:
        present_cal_acc= class_accuracy(confusion_val)
        creation+=1
    if present_cal_acc+0.005 < class_accuracy(confusion_val):
        present_cal_acc= class_accuracy(confusion_val)
        print("saving")
        util.save('assignment7_best_'+str(present_cal_acc)+'.tfg', session=sess)

[  0] Loss: 1.775  	  A.: 0.413  CA.: 0.190  J.: 0.085  	  Val A.: 0.561  CA.: 0.204  J.: 0.103
[  1] Loss: 1.731  	  A.: 0.579  CA.: 0.206  J.: 0.109  	  Val A.: 0.620  CA.: 0.190  J.: 0.117
[  2] Loss: 1.664  	  A.: 0.589  CA.: 0.249  J.: 0.113  	  Val A.: 0.633  CA.: 0.253  J.: 0.127
saving
[  3] Loss: 1.599  	  A.: 0.581  CA.: 0.309  J.: 0.117  	  Val A.: 0.650  CA.: 0.322  J.: 0.130
saving
[  4] Loss: 1.521  	  A.: 0.586  CA.: 0.354  J.: 0.121  	  Val A.: 0.638  CA.: 0.370  J.: 0.133
saving
[  5] Loss: 1.447  	  A.: 0.595  CA.: 0.384  J.: 0.124  	  Val A.: 0.607  CA.: 0.413  J.: 0.134
saving
[  6] Loss: 1.350  	  A.: 0.579  CA.: 0.448  J.: 0.125  	  Val A.: 0.582  CA.: 0.437  J.: 0.129
saving
[  7] Loss: 1.339  	  A.: 0.553  CA.: 0.466  J.: 0.122  	  Val A.: 0.631  CA.: 0.405  J.: 0.136
[  8] Loss: 1.307  	  A.: 0.561  CA.: 0.483  J.: 0.130  	  Val A.: 0.574  CA.: 0.424  J.: 0.133
[  9] Loss: 1.240  	  A.: 0.549  CA.: 0.514  J.: 0.132  	  Val A.: 0.651  CA.: 0.421  J.: 0.161
[ 10]

[ 84] Loss: 0.305  	  A.: 0.888  CA.: 0.906  J.: 0.427  	  Val A.: 0.874  CA.: 0.736  J.: 0.396
[ 85] Loss: 0.320  	  A.: 0.888  CA.: 0.902  J.: 0.410  	  Val A.: 0.874  CA.: 0.745  J.: 0.400
[ 86] Loss: 0.317  	  A.: 0.883  CA.: 0.903  J.: 0.425  	  Val A.: 0.879  CA.: 0.769  J.: 0.432
[ 87] Loss: 0.323  	  A.: 0.883  CA.: 0.905  J.: 0.420  	  Val A.: 0.906  CA.: 0.777  J.: 0.412
[ 88] Loss: 0.297  	  A.: 0.902  CA.: 0.910  J.: 0.441  	  Val A.: 0.902  CA.: 0.736  J.: 0.402
[ 89] Loss: 0.289  	  A.: 0.898  CA.: 0.906  J.: 0.428  	  Val A.: 0.908  CA.: 0.760  J.: 0.393
[ 90] Loss: 0.263  	  A.: 0.895  CA.: 0.917  J.: 0.423  	  Val A.: 0.883  CA.: 0.747  J.: 0.399
[ 91] Loss: 0.299  	  A.: 0.907  CA.: 0.908  J.: 0.440  	  Val A.: 0.913  CA.: 0.720  J.: 0.385
[ 92] Loss: 0.306  	  A.: 0.890  CA.: 0.908  J.: 0.409  	  Val A.: 0.903  CA.: 0.774  J.: 0.380
[ 93] Loss: 0.326  	  A.: 0.888  CA.: 0.902  J.: 0.415  	  Val A.: 0.896  CA.: 0.731  J.: 0.372
[ 94] Loss: 0.313  	  A.: 0.895  CA.: 0.

## Part 3: Evaluation
### Compute the validation accuracy

In [23]:
total_lbl, total_cor = np.zeros(6)+1e-10, np.zeros(6)
for it in tf.python_io.tf_record_iterator('valid.tfrecord'):
    example = tf.train.Example()
    example.ParseFromString(it)
    I = np.frombuffer(example.features.feature['image_raw'].bytes_list.value[0], dtype=np.uint8).reshape(256, 256, 3)
    L = np.frombuffer(example.features.feature['label_raw'].bytes_list.value[0], dtype=np.uint8).reshape(256, 256)
    
    P = sess.run('output:0', {'inputs:0':I[None]})
    total_lbl += np.bincount(L.flat, minlength=6)
    total_cor += np.bincount(L.flat, (P==L).flat, minlength=6)
print( 'Mean class accuracy', np.mean(total_cor / total_lbl) )

Mean class accuracy 0.757314108743


## Part 4: Save Model
Please note that we also want you to turn in your ipynb for this assignment.  Zip up the ipynb along with the tfg for your submission.

In [None]:
util.save('assignment7_757.tfg', session=sess)