In [1]:
import os.path
import tensorflow as tf
import helper
import warnings
from distutils.version import LooseVersion
import project_tests as tests
from tqdm import tqdm

In [2]:

# Check TensorFlow Version                                                                                                                                              
assert LooseVersion(tf.__version__) >= LooseVersion('1.0'), 'Please use TensorFlow version 1.0 or newer.  You are using {}'.format(tf.__version__)
print('TensorFlow Version: {}'.format(tf.__version__))

# Check for a GPU                                                                                                                                                       
if not tf.test.gpu_device_name():
    warnings.warn('No GPU found. Please use a GPU to train your neural network.')
else:
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))

TensorFlow Version: 1.12.0
Default GPU Device: /device:GPU:0


In [3]:
vgg_path = '/home/diogoaos/P2/CarND-Semantic-Segmentation/data/vgg'

In [4]:
def load_vgg(sess, vgg_path):
    """                                                                                                                                                                 
    Load Pretrained VGG Model into TensorFlow.                                                                                                                          
    :param sess: TensorFlow Session                                                                                                                                     
    :param vgg_path: Path to vgg folder, containing "variables/" and "saved_model.pb"                                                                                   
    :return: Tuple of Tensors from VGG model (image_input, keep_prob, layer3_out, layer4_out, layer7_out)                                                               
    """
    # TODO: Implement function                                                                                                                                          
    #   Use tf.saved_model.loader.load to load the model and weights                                                                                                    
    vgg_tag = 'vgg16'
    vgg_input_tensor_name = 'image_input:0'
    vgg_keep_prob_tensor_name = 'keep_prob:0'
    vgg_layer3_out_tensor_name = 'layer3_out:0'
    vgg_layer4_out_tensor_name = 'layer4_out:0'
    vgg_layer7_out_tensor_name = 'layer7_out:0'                                                                                                                         
    
    tf.saved_model.loader.load(sess, [vgg_tag], vgg_path)
    g = tf.get_default_graph()
    input_layer = g.get_tensor_by_name(vgg_input_tensor_name)
    keep_prob = g.get_tensor_by_name(vgg_keep_prob_tensor_name)
    l3 = g.get_tensor_by_name(vgg_layer3_out_tensor_name)
    l4 = g.get_tensor_by_name(vgg_layer4_out_tensor_name)
    l7 = g.get_tensor_by_name(vgg_layer7_out_tensor_name)
    
    return input_layer, keep_prob, l3, l4, l7
print('load vgg: ', end='')
tests.test_load_vgg(load_vgg, tf)

load vgg: Tests Passed


In [5]:
def layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes):
    """                                                                                                                                                                 
    Create the layers for a fully convolutional network.  Build skip-layers using the vgg layers.                                                                       
    :param vgg_layer3_out: TF Tensor for VGG Layer 3 output                                                                                                             
    :param vgg_layer4_out: TF Tensor for VGG Layer 4 output                                                                                                             
    :param vgg_layer7_out: TF Tensor for VGG Layer 7 output                                                                                                             
    :param num_classes: Number of classes to classify                                                                                                                   
    :return: The Tensor for the last layer of output                                                                                                                    
    """
    # at the end of VGG16, the image has been downsampled to 1/32th of original size
    # Part1 (upsample 2x, final=2x):
    #   conv 1x1 of layer7
    #   upsample 2x previous
    #   conv 1x1 layer4
    #   add them
    # Part2 (upsample 2x, final=4x):
    #   upsample 2x Part1
    #   conv 1x1 layer3
    #   add them
    # Part3 (upsample 8x, final=32x):
    #   upsample Part2 8x
    
    # TODO: Implement function
    # 1 by 1 convolution from VGG output
    # stride is what is upsmapling, padding must be same, size may change
    # use regulizer to everylayer according to Aron
    # the regulizer penalizes when the weights get too large
    l7_conv_1x1 = tf.layers.conv2d(vgg_layer7_out,
                                filters=num_classes,
                                kernel_size=1,
                                strides=(1,1),
                                padding='same',
                                kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3))
    #fcn32 = tf.layers.conv2d_transpose(l7_conv_1x1,
    #                                    filters=num_classes,
    #                                    kernel_size=64,
    #                                    strides=32,
    #                                    padding='same',
    #                                    kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3))
    l4_conv_1x1 = tf.layers.conv2d(vgg_layer4_out,
                                filters=num_classes,
                                kernel_size=1,
                                strides=(1,1),
                                padding='same',
                                kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3))
    l3_conv_1x1 = tf.layers.conv2d(vgg_layer3_out,
                                filters=num_classes,
                                kernel_size=1,
                                strides=(1,1),
                                padding='same',
                                kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3))
    # second layer is transposed convolution from 1 by 1 convolution
    # we want to upsample x2 and then skip layer 4
    fcn16 = tf.layers.conv2d_transpose(l7_conv_1x1,
                                        filters=num_classes,
                                        kernel_size=4,
                                        strides=2,
                                        padding='same',
                                        kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3))
    fcn16 = tf.add(fcn16, l4_conv_1x1)
    #fcn16 = tf.layers.conv2d_transpose(fcn16,
    #                                    filters=num_classes,
    #                                    kernel_size=32,
    #                                    strides=16,
    #                                    padding='same',
    #                                    kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3))
    
    # next we want to again upsample x2 (total 4x),
    # skip upsampled layer 4 (upsample x2)
    # skip layer 3
    fcn8 = tf.layers.conv2d_transpose(fcn16,
                                        filters=num_classes,
                                        kernel_size=4,
                                        strides=2,
                                        padding='same',
                                        kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3))
    fcn8 = tf.add(fcn8, l3_conv_1x1)
    fcn8 = tf.layers.conv2d_transpose(fcn8,
                                        filters=num_classes,
                                        kernel_size=16,
                                        strides=8,
                                        padding='same',
                                        kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3))
    
    return fcn8
print('layers: ', end='')
tests.test_layers(layers)

layers: Tests Passed


In [6]:
def optimize(nn_last_layer, correct_label, learning_rate, num_classes):
    """                                                                                                                                                                 
    Build the TensorFLow loss and optimizer operations.                                                                                                                 
    :param nn_last_layer: TF Tensor of the last layer in the neural network                                                                                             
    :param correct_label: TF Placeholder for the correct label image                                                                                                    
    :param learning_rate: TF Placeholder for the learning rate                                                                                                          
    :param num_classes: Number of classes to classify                                                                                                                   
    :return: Tuple of (logits, train_op, cross_entropy_loss)                                                                                                            
    """
    # logits : 2D tensor; rows=pixels; columns=pixel classes
    logits = tf.reshape(nn_last_layer, (-1, num_classes))
    correct_label = tf.reshape(correct_label, (-1,num_classes))
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=correct_label)
    loss_operation = tf.reduce_mean(cross_entropy)
    optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)
    train_op = optimizer.minimize(loss_operation)
    return logits, train_op, loss_operation
tests.test_optimize(optimize)

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

Tests Passed


In [7]:
EPOCHS = 30
LEARNING_RATE = 0.001
BATCH_SIZE = 1
DROPOUT_RATE = 0.5

In [8]:
def train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image,
             correct_label, keep_prob, learning_rate):
    """                                                                                                                                                                 
    Train neural network and print out the loss during training.                                                                                                        
    :param sess: TF Session                                                                                                                                             
    :param epochs: Number of epochs                                                                                                                                     
    :param batch_size: Batch size                                                                                                                                       
    :param get_batches_fn: Function to get batches of training data.  Call using get_batches_fn(batch_size)                                                             
    :param train_op: TF Operation to train the neural network                                                                                                           
    :param cross_entropy_loss: TF Tensor for the amount of loss                                                                                                         
    :param input_image: TF Placeholder for input images                                                                                                                 
    :param correct_label: TF Placeholder for label images                                                                                                               
    :param keep_prob: TF Placeholder for dropout keep probability                                                                                                       
    :param learning_rate: TF Placeholder for learning rate                                                                                                              
    """
    pbar = tqdm(total=EPOCHS)
    all_size = []
    all_loss = []
    for epoch in range(epochs):
        epoch_batch_size = []
        epoch_loss = []
        for image, label in get_batches_fn(batch_size):
            # create feed dict: input image, correct label, keep prob, learning rate
            # loss = session.run
            feed_dict = {input_image: image,
                         correct_label: label,
                         keep_prob: DROPOUT_RATE,
                         learning_rate: LEARNING_RATE}
            _, loss = sess.run([train_op, cross_entropy_loss], feed_dict=feed_dict)
            epoch_batch_size.append(len(image))
            epoch_loss.append(loss)
        all_loss.append(epoch_batch_size)
        all_loss.append(epoch_loss)
        print('epoch {} | {} images | loss {}'.format(epoch, sum(epoch_batch_size), sum(epoch_loss) / sum(epoch_batch_size)))
        pbar.update(1)
    pass
tests.test_train_nn(train_nn)

  3%|▎         | 1/30 [00:00<00:00, 205.36it/s]


INFO:tensorflow:Restoring parameters from ./data/vgg/variables/variables
Instructions for updating:
Use `tf.global_variables_initializer` instead.


In [9]:
def run():
    num_classes = 2
    image_shape = (160, 576)  # KITTI dataset uses 160x576 images                                                                                                       
    data_dir = './data'
    runs_dir = './runs'
    tests.test_for_kitti_dataset(data_dir)

    # Download pretrained vgg model                                                                                                                                     
    helper.maybe_download_pretrained_vgg(data_dir)

    # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset.                                                                             
    # You'll need a GPU with at least 10 teraFLOPS to train on.                                                                                                         
    #  https://www.cityscapes-dataset.com/
    
    with tf.Session() as sess:
        label_layer = tf.placeholder(tf.int32, (None, None, None, num_classes), name='gt_label')
        learning_rate = tf.placeholder(tf.float32, name='learning_rate')
        
        # Path to vgg model                                                                                                                                             
        vgg_path = os.path.join(data_dir, 'vgg')
        # Create function to get batches                                                                                                                                
        get_batches_fn = helper.gen_batch_function(os.path.join(data_dir, 'data_road/training'), image_shape)

        # OPTIONAL: Augment Images for better results                                                                                                                   
        #  https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network                                                        

        # TODO: Build NN using load_vgg, layers, and optimize function
        input_image, keep_prob, layer3, layer4, layer7 = load_vgg(sess, vgg_path)
        layer_output = layers(layer3, layer4, layer7, num_classes)

        # TODO: Train NN using the train_nn function
        
        logits, train_op, cross_entropy_loss = optimize(layer_output, label_layer, learning_rate, num_classes )
        
        init_op = tf.initialize_all_variables()
        sess.run(init_op)
        
        train_nn(sess, EPOCHS, BATCH_SIZE, get_batches_fn, train_op, cross_entropy_loss,
                 input_image, label_layer, keep_prob, learning_rate)

        # TODO: Save inference data using helper.save_inference_samples                                                                                                 
        helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image)                                                         

        # OPTIONAL: Apply the trained model to a video

In [10]:
run()

100%|██████████| 30/30 [29:27<00:00, 58.89s/it]