### Import dependencies

In [2]:
import os.path
import tensorflow as tf
import helper
import warnings
from distutils.version import LooseVersion
import project_tests as tests

### Check dependency versions

In [3]:
# Check TensorFlow Version
assert LooseVersion(tf.__version__) >= LooseVersion('1.0'), 'Please use TensorFlow version 1.0 or newer.  You are using {}'.format(tf.__version__)
print('TensorFlow Version: {}'.format(tf.__version__))

# Check for a GPU
if not tf.test.gpu_device_name():
    warnings.warn('No GPU found. Please use a GPU to train your neural network.')
else:
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))

TensorFlow Version: 1.4.1


  import sys


### Encoder
Loading the pre-trained VGG model. 

In the grand scheme of things, this is the encoder portion of the FCN. Luckily we don't have to implement this ourselves since we have a pretrained model, VGG, to use. *#TransferLearning*

In [4]:
def load_vgg(sess, vgg_path):
    """
    Load Pretrained VGG Model into TensorFlow.
    :param sess: TensorFlow Session
    :param vgg_path: Path to vgg folder, containing "variables/" and "saved_model.pb"
    :return: Tuple of Tensors from VGG model (image_input, keep_prob, layer3_out, layer4_out, layer7_out)
    """
    # TODO: Implement function
    #   Use tf.saved_model.loader.load to load the model and weights
    vgg_tag = 'vgg16'
    vgg_input_tensor_name = 'image_input:0'
    vgg_keep_prob_tensor_name = 'keep_prob:0'
    vgg_layer3_out_tensor_name = 'layer3_out:0'
    vgg_layer4_out_tensor_name = 'layer4_out:0'
    vgg_layer7_out_tensor_name = 'layer7_out:0'
    
    # load the saved model using vgg_tag and vgg_path (i.e. directory)
    tf.saved_model.loader.load(sess, [vgg_tag],vgg_path)
    
    # load default graph, this is how we get all the tensors
    graph = tf.get_default_graph()
    
    # debug, check tensor names
    #for n in tf.get_default_graph().as_graph_def().node:
    #    print("tensor name:")
    #    print(n.name)

    # load the required tensors
    image_input = graph.get_tensor_by_name(vgg_input_tensor_name)
    keep_prob = graph.get_tensor_by_name(vgg_keep_prob_tensor_name)
    layer3_out = graph.get_tensor_by_name(vgg_layer3_out_tensor_name)
    layer4_out = graph.get_tensor_by_name(vgg_layer4_out_tensor_name)
    layer7_out = graph.get_tensor_by_name(vgg_layer7_out_tensor_name)
    
    return image_input, keep_prob, layer3_out, layer4_out, layer7_out
tests.test_load_vgg(load_vgg, tf)




Tests Passed


### 1x1 Convolution and decoder

This is the decoder, upsampling, portion of the FCN architecture. This will help us reconstruct the image back to its original size.

Before the decoder though, we need to finish the encoding process by adding a 1x1 convolution layer, i.e. `conv_1x1`, to the last layer of VGG, i.e. VGG 7. This is done instead of the fully connected layer, so that spatial information can be preserved.

In [5]:
def layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes):
    """
    Create the layers for a fully convolutional network.  Build skip-layers using the vgg layers.
    :param vgg_layer7_out: TF Tensor for VGG Layer 3 output
    :param vgg_layer4_out: TF Tensor for VGG Layer 4 output
    :param vgg_layer3_out: TF Tensor for VGG Layer 7 output
    :param num_classes: Number of classes to classify, in this case 2 since it's binary, see implementation details below
    :return: The Tensor for the last layer of output
    """
    # TODO: Implement function
    
    # apply 1x1 convolution to the final VGG layer, i.e. VGG 7, set kernel size to 1 since it's 1x1 convolution
    conv_1x1 = tf.layers.conv2d(vgg_layer7_out, num_classes, kernel_size=1, strides=(1,1), padding='same', 
                                kernel_initializer=tf.random_normal_initializer(stddev=0.01)) #need to add regularizer
    
    # first transpose layer, i.e. de-convolution
    transposed_1 = tf.layers.conv2d_transpose(conv_1x1, num_classes, kernel_size=4, strides=(2, 2), padding='same', 
                                        kernel_initializer=tf.random_normal_initializer(stddev=0.01)) #why kernel size is 4?
    
    # add first skip layer
    # 1) 1x1 conv based on vgg_layer4_out
    conv_1x1_4 = tf.layers.conv2d(vgg_layer4_out, num_classes, kernel_size=1, strides=(1,1), padding='same', 
                                   kernel_initializer=tf.random_normal_initializer(stddev=0.01))
    # 2) add conv_1x1_4 and transposed_1 to form the first skip layer
    transposed_1 = tf.add(transposed_1, conv_1x1_4)
    
    # second tranpose layer, i.e. de-convolution
    transposed_2 = tf.layers.conv2d_transpose(transposed_1, num_classes, kernel_size=4, strides=(2, 2), padding='same', 
                                        kernel_initializer=tf.random_normal_initializer(stddev=0.01)) #why kernel size is 4?
    
    # add second skip layer
    # 1) 1x1 conv based on vgg_layer3_out, i.e. a convolution layer even further back than vgg_layer4_out
    conv_1x1_3 = tf.layers.conv2d(vgg_layer3_out, num_classes, kernel_size=1, strides=(1,1), padding='same', 
                                   kernel_initializer=tf.random_normal_initializer(stddev=0.01))
    # 2) add conv_1x1_4 and transposed_1 to form the first skip layer
    transposed_2 = tf.add(transposed_2, conv_1x1_3)
    
    # third and final upsample with a stride of 8, 8 to get the original input size
    transposed_3 = tf.layers.conv2d_transpose(transposed_2, num_classes, kernel_size=4, strides=(8, 8), padding='same', 
                                        kernel_initializer=tf.random_normal_initializer(stddev=0.01)) #why kernel size is 4?
    
    return transposed_3
tests.test_layers(layers)




Tests Passed


### Optimizer

In [6]:
def optimize(nn_last_layer, correct_label, learning_rate, num_classes):
    """
    Build the TensorFLow loss and optimizer operations.
    :param nn_last_layer: TF Tensor of the last layer in the neural network
    :param correct_label: TF Placeholder for the correct label image
    :param learning_rate: TF Placeholder for the learning rate
    :param num_classes: Number of classes to classify
    :return: Tuple of (logits, train_op, cross_entropy_loss)
    """
    # nn_last_layer is 4D, so we need to reshape it to 2D
    logits = tf.reshape(nn_last_layer, (-1, num_classes))
    print("--------------shape of logits")
    cross_entropy_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=correct_label))
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    train_op = optimizer.minimize(cross_entropy_loss)
    
    return logits, train_op, cross_entropy_loss
tests.test_optimize(optimize)

--------------shape of logits
Tests Passed


### Training

In [7]:
def train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image,
             correct_label, keep_prob, learning_rate):
    """
    Train neural network and print out the loss during training.
    :param sess: TF Session
    :param epochs: Number of epochs
    :param batch_size: Batch size
    :param get_batches_fn: Function to get batches of training data.  Call using get_batches_fn(batch_size)
    :param train_op: TF Operation to train the neural network
    :param cross_entropy_loss: TF Tensor for the amount of loss
    :param input_image: TF Placeholder for input images
    :param correct_label: TF Placeholder for label images
    :param keep_prob: TF Placeholder for dropout keep probability
    :param learning_rate: TF Placeholder for learning rate
    """
    print("before running session")
    sess.run(tf.global_variables_initializer())
    print("--------------after initializing variables")
    for i in range(epochs):
        print("Epoch {} ...".format(i + 1))
        for images, labels in get_batches_fn(batch_size):
            print("========this is images shape")
            print(images.shape)
            print("========this is labels shape")
            print(labels.shape)
            print("========this is correct_label")
            tf.Print(correct_label, [tf.shape(correct_label)])
            print("========this is input_image")
            tf.Print(input_image, [tf.shape(input_image)])
            sess.run([train_op, cross_entropy_loss], feed_dict={input_image: images, correct_label: labels, keep_prob: 0.5, learning_rate: 0.001})
    
    
tests.test_train_nn(train_nn)

INFO:tensorflow:Restoring parameters from b'./data/vgg/variables/variables'


### Execution

In [8]:
def run():
    num_classes = 2
    image_shape = (160, 576)
    data_dir = './data'
    runs_dir = './runs'
    tests.test_for_kitti_dataset(data_dir)

    # Download pretrained vgg model
    helper.maybe_download_pretrained_vgg(data_dir)

    # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset.
    # You'll need a GPU with at least 10 teraFLOPS to train on.
    #  https://www.cityscapes-dataset.com/

    with tf.Session() as sess:
        # Path to vgg model
        vgg_path = os.path.join(data_dir, 'vgg')
        # Create function to get batches
        get_batches_fn = helper.gen_batch_function(os.path.join(data_dir, 'data_road/training'), image_shape)

        # OPTIONAL: Augment Images for better results
        #  https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network

        # TODO: Build NN using load_vgg, layers, and optimize function
        correct_label = tf.placeholder(tf.int32, shape=(None, None, None, num_classes))
        learning_rate = tf.placeholder(tf.float32)
        input_image, keep_prob, layer3_out, layer4_out, layer7_out = load_vgg(sess, vgg_path)
        final_layer = layers(layer3_out, layer4_out, layer7_out, num_classes)
        logits, train_op, cross_entropy_loss = optimize(final_layer, correct_label, learning_rate, num_classes)

        # TODO: Train NN using the train_nn function
        epochs = 10
        batch_size = 5 #can't be too high or you'll get a Resource out exception
        
        train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate)

        # TODO: Save inference data using helper.save_inference_samples
        #  helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image)

        # OPTIONAL: Apply the trained model to a video


if __name__ == '__main__':
    run()


KeyboardInterrupt: 