# Practice Image Segmentation with FCN

# Description of kitti road 
* GT image, background is red, while road is purple


In [1]:
import tensorflow as tf
import helper_gxdai as helper
import tqdm

In [2]:
# load pre-trained VGG-16
def load_vgg(sess, vgg_path):
    # Loader functionality for SavedModel with hermetic, language-neutral exports.
    model = tf.saved_model.loader.load(sess, ['vgg16'], vgg_path)
    
    # Get tensor to be returned from graph
    
    # Return the default graph for the current thread.
    graph = tf.get_default_graph()
    
    # Returns the tensor for the given name.
    image_input = graph.get_tensor_by_name('image_input:0')
    keep_prob = graph.get_tensor_by_name("keep_prob:0")
    layer3 = graph.get_tensor_by_name("layer3_out:0")
    layer4 = graph.get_tensor_by_name("layer4_out:0")
    layer7 = graph.get_tensor_by_name("layer7_out:0")
    
    return image_input, keep_prob, layer3, layer4, layer7



Create the layers for a FCN, using the tensors from the VGG model.
* **add 1x1 convolution to encoder.**
* **add decoder.**
* **add skip connection and upsampling.**

In [3]:
def layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes):
    
    # shorter name for simplicity
    layer3, layer4, layer7 = vgg_layer3_out, vgg_layer4_out, vgg_layer7_out
    
    # apply 1x1 conv
    
    # not too much meanings for filters=num_classes.
    fcn8 = tf.layers.conv2d(layer7, filters=num_classes, kernel_size=1, name='fcn8')
    
    # upsampling 2x
    fcn9 = tf.layers.conv2d_transpose(fcn8, filters=layer4.get_shape().as_list()[-1],
                                      kernel_size=4, strides=(2,2), padding='SAME', name='fcn9')
    
    # skip connection (add)
    fcn9_skip_connected = tf.add(layer4, fcn9, name='fcn9_plus_vgg_layer4')
    
    # upsample again
    fcn10 = tf.layers.conv2d_transpose(fcn9_skip_connected, filters=layer3.get_shape().as_list()[-1],
                                       kernel_size=4, strides=(2,2), padding='SAME', name='fcn10')
    # skip connection
    fcn10_skip_connected = tf.add(layer3, fcn10, name='fcn10_plus_vgg_layer3')
    
    # upsample again
    
    # filters=num_classes for pixel-wise prediction.
    
    fcn11 = tf.layers.conv2d_transpose(fcn10_skip_connected, filters=num_classes,
                                       kernel_size=16, strides=(8,8), padding='SAME', name='fcn11')
    
    # No smooth operation (3x3 conv) in this structure.
    
    return fcn11

# aka: Also Known As

In [4]:
def optimize(nn_last_layer, correct_label, learning_rate, num_classes):
    
    # reshape 4D tensor to 2D
    logits = tf.reshape(nn_last_layer, (-1, num_classes), name='fcn_logits')
    correct_label_reshaped = tf.reshape(correct_label, (-1, num_classes), name='fcn_prediction')
    
    # taken mean or total loss
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=correct_label_reshaped)
    
    loss_op = tf.reduce_mean(cross_entropy, name="fcn_loss")
    # train op
    train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss_op, name='fcn_train_op')
    
    return logits, train_op, loss_op





Set up learning rate, epochs, ...

In [None]:
def train_nn(sess, epochs, batch_size, get_batches_fn, train_op,
             cross_entropy_loss, input_image,
             correct_label, keep_prob, learning_rate):
    
    keep_prob_value = 0.5
    learning_rate_value = 0.001
    for epoch in range(epochs):
        # create function to get batch.
        total_loss = 0
        
        # get_batches_fn(batch_size): will return a generator.
        
        for x_batch, gt_batch in get_batches_fn(batch_size):
            loss, _ = sess.run([cross_entropy_loss, train_op], feed_dict={
                               input_image: x_batch, correct_label: gt_batch,
                               keep_prob: keep_prob_value,
                               learning_rate: learning_rate_value})
            
            total_loss += loss
            
        print("Epoch {} ...".format(epoch+1))
        print("Loss = {:.3f}".format(total_loss))
        
        print()

# traing the model

In [None]:
def run(**params_dict):
    data_dir = params_dict['data_dir']
    training_dir = params_dict['training_dir']
    image_shape = params_dict['image_shape']
    vgg_path = params_dict['vgg_path']
    num_classes = params_dict['num_classes']
    correct_label = params_dict['correct_label']
    learning_rate = params_dict['learning_rate']
    EPOCHS = params_dict['EPOCHS']
    BATCH_SIZE = params_dict['BATCH_SIZE']
    runs_dir = params_dict['runs_dir']
    
    # Download pretrained model
    helper.maybe_download_pretrained_vgg(data_dir)
    
    # A function to get batch
    get_batches_fn = helper.gen_batch_function(training_dir, image_shape)
    config = tf.ConfigProto(allow_soft_placement=True)

    
    with tf.Session(config=config) as sess:
        
        # return different args from vgg model.
        image_input, keep_prob, layer3, layer4, layer7 = load_vgg(sess, vgg_path)
        
        # Add decoder on top of the encoder network
        model_output = layers(layer3, layer4, layer7, num_classes)
        
        # returns the output logits, training operations, and cost function.
        logits, train_op, cross_entropy_loss = optimize(model_output, correct_label, 
                                                        learning_rate, num_classes)
        
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())

        print("Model build successfully, start training.")
        
        train_nn(sess, EPOCHS, BATCH_SIZE, get_batches_fn,
                 train_op, cross_entropy_loss, image_input,
                 correct_label, keep_prob, learning_rate)
        
        helper.save_inference_samples(runs_dir, data_dir, sess, 
                                      image_shape, logits, 
                                      keep_prob, image_input)
        
        print("DONE")
        

In [None]:
# training the specified model

def main():
    # parameter settings
    num_classes = 2
    image_shape = (160, 576)
    EPOCHS = 100
    BATCH_SIZE = 16
    import os
    os.environ['CUDA_VISIBLE_DEVICES']="0"
    data_dir = './data'
    runs_dir = './runs'
    training_dir = './data/data_road/training'
    vgg_path = './data/vgg'
    
    correct_label = tf.placeholder(tf.float32, [None, image_shape[0], image_shape[1], num_classes])
    learning_rate = tf.placeholder(tf.float32)
    keep_prob = tf.placeholder(tf.float32)
    
    params_dict = {"data_dir": data_dir, 
                   "training_dir":training_dir, 
                   "image_shape": image_shape, 
                   "vgg_path": vgg_path, 
                   "num_classes": num_classes,
                   "correct_label": correct_label,
                   "learning_rate": learning_rate,
                   "EPOCHS": EPOCHS,
                   "BATCH_SIZE": BATCH_SIZE,
                   "runs_dir": runs_dir
             }
    
    # place holder

    
    run(**params_dict)

In [None]:
if __name__ == '__main__':
    main()

INFO:tensorflow:Restoring parameters from ./data/vgg/variables/variables
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

Model build successfully, start training.
Epoch 1 ...
Loss = 200.229

Epoch 2 ...
Loss = 4.830

Epoch 3 ...
Loss = 4.110

Epoch 4 ...
Loss = 3.954

Epoch 5 ...
Loss = 3.371

Epoch 6 ...
Loss = 3.107

Epoch 7 ...
Loss = 3.028

Epoch 8 ...
Loss = 3.003

Epoch 9 ...
Loss = 2.729

Epoch 10 ...
Loss = 2.973

Epoch 11 ...
Loss = 2.919

Epoch 12 ...
Loss = 2.633

Epoch 13 ...
Loss = 2.362

Epoch 14 ...
Loss = 2.409

Epoch 15 ...
Loss = 2.544

Epoch 16 ...
Loss = 2.437

Epoch 17 ...
Loss = 2.319

Epoch 18 ...
Loss = 2.111

Epoch 19 ...
Loss = 2.096

Epoch 20 ...
Loss = 1.952

Epoch 21 ...
Loss = 1.915

Epoch 22 ...
Loss = 1.961

Epoch 23 ...
Loss = 1.915

Epoch 24 ...
Loss = 1.929

Epoch 25 ...
Loss = 1.802

Epoch 26 ...
Loss = 1.642
