In [1]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


# Homework 8
Let's draw some pictures today.

## Part 0: Setup

In [1]:
import tensorflow as tf
import numpy as np
import util

# Colors to visualize the labeling
COLORS = np.array([(0,0,0), (255,0,0), (0,255,0), (255,255,0), (0,0,255), (255,255,255)], dtype=np.uint8)
CROP_SIZE = 64

def parser(record):
    # Parse the TF record
    parsed = tf.parse_single_example(record, features={
        'height': tf.FixedLenFeature([], tf.int64),
        'width': tf.FixedLenFeature([], tf.int64),
        'image_raw': tf.FixedLenFeature([], tf.string),
        'label_raw': tf.FixedLenFeature([], tf.string)
    })
    # Load the data and format it
    H = tf.cast(parsed['height'], tf.int32)
    W = tf.cast(parsed['width'], tf.int32)
    image = tf.reshape(tf.decode_raw(parsed["image_raw"], tf.uint8), [H,W,3])
    label = tf.reshape(tf.decode_raw(parsed["label_raw"], tf.uint8), [H,W])
    
    ## Data augmentation
    # Stack the image and labels to make sure the same operations are applied
    data = tf.concat([image, label[:,:,None]], axis=-1)
    
    # TODO: Apply the data augmentation (you should both crop the images randomly and flip them)
    data = tf.random_crop(data, [CROP_SIZE, CROP_SIZE, 4])
    data = tf.image.random_flip_left_right(data)
    
    return data[:,:,:-1], data[:,:,-1]

def load_dataset(tfrecord):
    # Load the dataset
    dataset = tf.contrib.data.TFRecordDataset(tfrecord)

    # Parse the tf record entries
    dataset = dataset.map(parser, num_threads=8, output_buffer_size=1024)

    # Shuffle the data, batch it and run this for multiple epochs
    dataset = dataset.shuffle(buffer_size=10000)
    dataset = dataset.batch(32)
    dataset = dataset.repeat()
    return dataset

# We still have 6 classes
num_classes = 6

## Part 1: Define your convnet

In [2]:
# Create a new log directory (if you run low on disk space you can either disable this or delete old logs)
# run: `tensorboard --logdir log` to see all the nice summaries
for n_model in range(1000):
    LOG_DIR = 'log/model_%d'%n_model
    from os import path
    if not path.exists(LOG_DIR):
        break

# Lets clear the tensorflow graph, so that you don't have to restart the notebook every time you change the network
tf.reset_default_graph()

TF_COLORS = tf.constant(COLORS)

train_data = load_dataset('train.tfrecord')
valid_data = load_dataset('valid.tfrecord')

# Create an iterator for the datasets
# The iterator allows us to quickly switch between training and validataion
iterator = tf.contrib.data.Iterator.from_structure(train_data.output_types, ((None,None,None,3), (None,None,None)))

# and fetch the next images from the dataset (every time next_image is evaluated a new image set of 32 images is returned)
next_image, next_label = iterator.get_next()

# Define operations that switch between train and valid
switch_train_op = iterator.make_initializer(train_data)
switch_valid_op = iterator.make_initializer(valid_data)

# Convert the input
image = tf.cast(next_image, tf.float32)
label = tf.cast(next_label, tf.int32)

# Define the input
label = tf.identity(label, name='label')
# Whiten the one hot
one_hot_label = tf.one_hot(label, num_classes) - np.array([ 0.66839117, 0.00382957, 0.00092516, 0.00345217, 0.00339063, 0.3200113 ])[None,None,None,:]

# Whiten the image
white_image = (image - 100.) / 72.

# Let's upsample an image using the label map as a guidance
image_lr = tf.layers.average_pooling2d(image, 5, 4, padding='SAME')
image_lr = tf.identity(image_lr, name='image_lr')
white_lr = (image_lr - 100.) / 72.

# TODO: Define your convnet here ()
upsampled = tf.image.resize_images(white_lr, tf.shape(label)[1:3])

C0 = 25
D = 5
h = tf.concat([one_hot_label, upsampled], axis=-1)
hs = []
for i in range(D):
    hs.append(h)
    h = tf.contrib.layers.conv2d(h, int(C0*1.5**i), (3,3), stride=2, scope='conv%d'%(i+1))
    h = tf.concat([h, tf.image.resize_images(white_lr,  tf.shape(h)[1:3])], axis=-1)

for i in range(D)[::-1]:
    h = tf.contrib.layers.conv2d_transpose(h, int(C0*1.5**i), (3,3), stride=2, scope='upconv%d'%(i+1))
    h = tf.concat([h, hs[i]], axis=-1)
h = tf.contrib.layers.conv2d(h, C0, (1,1), scope='fc1')
h = tf.contrib.layers.conv2d(h, 3, (1,1), scope='cls', activation_fn=None)

h = h + upsampled

# Let's compute the output labeling
output = tf.cast(tf.clip_by_value(72.*h + 100., 0, 255), tf.uint8, name='output')

# Define the loss function
loss = tf.reduce_mean(tf.abs(white_image - h))

# Let's weight the regularization loss down, otherwise it will hurt the model performance
# You can tune this weight if you wish
regularization_loss = tf.losses.get_regularization_loss()
total_loss = loss + 1e-6 * regularization_loss

# Adam will likely converge much faster than SGD for this assignment.
optimizer = tf.train.AdamOptimizer(0.001, 0.9, 0.999)
# optimizer = tf.train.MomentumOptimizer(0.001, 0.9)

# use that optimizer on your loss function (control_dependencies makes sure any 
# batch_norm parameters are properly updated)
with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
    opt = optimizer.minimize(total_loss)

# Let's define some summaries for tensorboard
colored_label = tf.gather_nd(TF_COLORS, label[:,:,:,None])
tf.summary.image('image', next_image, max_outputs=3)
tf.summary.image('label', colored_label, max_outputs=3)
tf.summary.image('output', output, max_outputs=3)
tf.summary.image('image_lr', image_lr, max_outputs=3)
tf.summary.scalar('loss', tf.placeholder(tf.float32, name='loss'))
tf.summary.scalar('val_loss', tf.placeholder(tf.float32, name='val_loss'))

merged_summary = tf.summary.merge_all()
summary_writer = tf.summary.FileWriter(LOG_DIR, tf.get_default_graph())

# Let's compute the model size
print( "Total number of variables used ", np.sum([v.get_shape().num_elements() for v in tf.trainable_variables()]) )

Total number of variables used  602359


## Part 2: Training

Training might take up to 20 min depending on your architecture (and if you have a GPU or not).

In [4]:
# Start a session
sess = tf.Session()

# Set up training
sess.run(tf.global_variables_initializer())

# Run the training for some iterations
for it in range(300):
    sess.run(switch_train_op)

    loss_vals = []
    # Run 10 training iterations and 1 validation iteration
    for i in range(10):
        loss_val, _ = sess.run([loss, opt])
        loss_vals.append(loss_val)
    
    sess.run(switch_valid_op)
    loss_val = sess.run(loss)

    # Let's update tensorboard
    summary_writer.add_summary( sess.run(merged_summary, {'loss:0': np.mean(loss_vals), 'val_loss:0': loss_val}), it )
    print('[%3d] Loss: %0.3f  \t  val loss A.: %0.3f'%(it, np.mean(loss_vals), loss_val))    


[  0] Loss: 0.311  	  val loss A.: 0.260
[  1] Loss: 0.238  	  val loss A.: 0.227
[  2] Loss: 0.226  	  val loss A.: 0.188
[  3] Loss: 0.231  	  val loss A.: 0.166
[  4] Loss: 0.233  	  val loss A.: 0.200
[  5] Loss: 0.213  	  val loss A.: 0.158
[  6] Loss: 0.225  	  val loss A.: 0.164
[  7] Loss: 0.213  	  val loss A.: 0.212
[  8] Loss: 0.211  	  val loss A.: 0.180
[  9] Loss: 0.211  	  val loss A.: 0.178
[ 10] Loss: 0.213  	  val loss A.: 0.172
[ 11] Loss: 0.198  	  val loss A.: 0.166
[ 12] Loss: 0.194  	  val loss A.: 0.155
[ 13] Loss: 0.204  	  val loss A.: 0.163
[ 14] Loss: 0.199  	  val loss A.: 0.172
[ 15] Loss: 0.179  	  val loss A.: 0.187
[ 16] Loss: 0.194  	  val loss A.: 0.176
[ 17] Loss: 0.183  	  val loss A.: 0.170
[ 18] Loss: 0.188  	  val loss A.: 0.156
[ 19] Loss: 0.176  	  val loss A.: 0.174
[ 20] Loss: 0.188  	  val loss A.: 0.174
[ 21] Loss: 0.172  	  val loss A.: 0.198
[ 22] Loss: 0.182  	  val loss A.: 0.156
[ 23] Loss: 0.194  	  val loss A.: 0.121
[ 24] Loss: 0.17

## Part 3: Evaluation
### Compute the validation accuracy

In [5]:
total_lbl, total_cor = np.zeros(6)+1e-10, np.zeros(6)
I0 = tf.placeholder(tf.float32, shape=(1, None, None, 3))
LR = tf.layers.average_pooling2d(I0, 5, 4, padding='SAME', name='image_lr')

losses = []
for it in tf.python_io.tf_record_iterator('valid.tfrecord'):
    example = tf.train.Example()
    example.ParseFromString(it)
    I = np.frombuffer(example.features.feature['image_raw'].bytes_list.value[0], dtype=np.uint8).reshape(256, 256, 3)
    L = np.frombuffer(example.features.feature['label_raw'].bytes_list.value[0], dtype=np.uint8).reshape(256, 256)
    
    lr_val = sess.run(LR, {I0: I[None]})
    r = sess.run('output:0', {'image_lr:0':lr_val, 'label:0': L[None]})[0]
    losses.append(np.mean(np.abs(r.astype(np.float32)-I)))
print( 'Mean absolute difference', np.mean(losses) )

('Mean absolute difference', 8.1669874)


## Part 4: Save Model
Please note that we also want you to turn in your ipynb for this assignment.  Zip up the ipynb along with the tfg for your submission.

In [6]:
util.save('assignment8.tfg', session=sess)