# Self-Driving Car Nanodegree Final Capstone Project


## Traffic Light Detector: Using SSD Net
This notebook is used to train an SSD network for traffic light detection.
>** Note: The trained model will then be used in a real car by communicating with other modules such as waypoints updater etc **

---

## Step 1: Dataset Exploration

The dataset used is:
    1. Bosch Small Traffic Light Dataset
    2. Udacity Simulator Dataset
    
Visualize the Dataset. This is implemented by plotting traffic light images, plotting the count of each sign, etc.


- The training dataset images are in rgb format:
    - Will determine the width, height, and channel count of the image
- The training labels are in a *.yaml file containing the following information:
    - Traffic light color
    - Bounding box coordinates for each traffic light

Note: If the training image is resized, the bounding boxes are rescaled accordingly

In [1]:
# Load the needed libraries
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import cv2 #for generating additional images (by jittering existing images)
import random #to randomize the training dataset
#
import math
import time
#
import sys #for saving/opening files
import os #for saving/opening files e.g. saving tensorflow variables
import yaml #to opem yaml files
#
%matplotlib inline
#print(tf.__version__)
#
from ssd_tf.nets import ssd_vgg_512
from data.Capstone_Data.capstone_data_processing import resize_with_crop

In [2]:
"""
Sample script to receive traffic light labels and images
of the Bosch Small Traffic Lights Dataset.
Most of these two functions are borrowed from Bosch traffic light dataset github (https://github.com/bosch-ros-pkg)
"""
def get_all_labels(input_yaml, riib=False):
    """ Gets all labels within label file
    Note that RGB images are 1280x720 and RIIB images are 1280x736.
    :param input_yaml: Path to yaml file
    :param riib: If True, change path to labeled pictures
    :return: images: Labels for traffic lights
    """
    images = yaml.load(open(input_yaml, 'rb').read())

    for i in range(len(images)):
        images[i]['path'] = os.path.abspath(os.path.join(os.path.dirname(input_yaml), images[i]['path']))
        if riib:
            images[i]['path'] = images[i]['path'].replace('.png', '.pgm')
            images[i]['path'] = images[i]['path'].replace('rgb/train', 'riib/train')
            images[i]['path'] = images[i]['path'].replace('rgb/test', 'riib/test')
            for box in images[i]['boxes']:
                box['y_max'] = box['y_max'] + 8
                box['y_min'] = box['y_min'] + 8
    return images #images is a yaml list. Not actual image files.


"""
This method displays the traffic light labels within
the given images.
If given an output folder, it draws them to file.
"""
def ir(some_value):
    """Int-round function for short array indexing """
    return int(round(some_value))

    
def show_label_images_with_resize(images_list, output_folder=None, new_size=None):
    """
    Shows and draws pictures with labeled traffic lights after resizing to the new size and 
    adjusts the bounding boxes accordingly too.
    Can save pictures.
    :param input_yaml: Path to yaml file
    :param output_folder: If None, do not save picture. Else enter path to folder
    : new_size=row,col(y,x) 
    Note: cv2 uses sizes in col,row (x,y) vs numpy is in row,col(y,x)
    """

    if output_folder is not None:
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)

    font = cv2.FONT_HERSHEY_SIMPLEX #font for displaying the label
    for i, image_dict in enumerate(images_list):
        image = cv2.imread(image_dict['path']) #note returned image is a numpy array
        orig_size = image.shape[0:-1]
        if(new_size != None):
            #print(orig_size)
            image = cv2.resize(image, (new_size[1],new_size[0])) #cv2 uses sizes in col,row (x,y) vs numpy is in row,col(y,x)
            resize_scale = (new_size[0]/orig_size[0], new_size[1]/orig_size[1]) #row,col = y,x
            text_size = 0.7*math.sqrt(new_size[0]*new_size[1]/(720*1280)) #scale based on image size
        else:
            resize_scale = (1.0,1.0) #use default value
            text_size = 0.7*math.sqrt(orig_size[0]*orig_size[1]/(720*1280)) #scale based on image size
        
        if image is None:
            raise IOError('Could not open image path', image_dict['path'])

        for box in image_dict['boxes']:
                cv2.rectangle(image,
                              (ir(box['x_min']*resize_scale[1]), ir(box['y_min']*resize_scale[0])),
                              (ir(box['x_max']*resize_scale[1]), ir(box['y_max']*resize_scale[0])),
                              (0, 255, 0))
                #print(box['occluded'])
                if (box['occluded'] == False):
                    cv2.putText(image,box['label'],
                                (ir(box['x_min']*resize_scale[1]),ir(box['y_min']*resize_scale[0])),
                                font, text_size,(0,0,255),1)
                else:
                    cv2.putText(image,'occluded',
                                (ir(box['x_min']*resize_scale[1]),ir(box['y_min']*resize_scale[0])),
                                font, text_size,(0,0,255),1)

        if output_folder is not None:
            cv2.imwrite(os.path.join(output_folder, str(i).zfill(4) + '_'
                        + os.path.basename(image_dict['path'])), image)
        
        else:
            #cv2.imshow('labeled_image', image)
            #cv2.waitKey(5)
            plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
            plt.show()
    #cv2.destroyAllWindows()

In [3]:
# #Generate Mini-Training/Testing Dataset from the Original Bosch Dataset
# This old. don't use it. Use the below function with the same name
# def Generate_Mini_Dataset_with_Resize(input_data_yaml, mini_data_yaml, mini_data_folder, mini_data_size=500, new_size=None):
#     """
#     Generates a smaller dataset from the original dataset
#     :param input_data_yaml: full dataset's yaml file
#     :param mini_data_yaml: mini dataset's yaml file
#     :mini_data_folder: folder to save the mini dataset
#     :mini_data_size: size of the mini dataset
#     :new_size=row,col(y,x) 
#     Note: cv2 uses sizes in col,row (x,y) vs numpy is in row,col(y,x)
#     :return:nothing (but saves the mini_data_set images and yaml file at the specified location)
#     """
#     if not os.path.exists(mini_data_folder):
#         os.makedirs(mini_data_folder)

#     input_data_list = yaml.load(open(input_data_yaml, 'rb').read())
#     random.shuffle(input_data_list) #to randomize everything
#     random.shuffle(input_data_list) #to randomize everything (randomize twice)
#     mini_data_list = input_data_list[0:mini_data_size]

#     for i, mini_image_dict in enumerate(mini_data_list):
#         orig_path = os.path.abspath(os.path.join(os.path.dirname(input_data_yaml), mini_image_dict['path']))
#         #orig_path = mini_image_dict['path'] #debug only
#         image = cv2.imread(orig_path) #cv2 returns image as a numpy array
#         if new_size != None:
#             #resize the image
#             orig_size = image.shape[0:-1]
#             #print(orig_size)
#             image = cv2.resize(image, (new_size[1],new_size[0])) #cv2 uses sizes in col,row (x,y) vs numpy is in row,col(y,x)
#             resize_scale = (new_size[0]/orig_size[0], new_size[1]/orig_size[1]) #row,col = y,x
#         #end resize
#         if image is None:
#             raise IOError('Could not open image path', mini_image_dict['path'])
            
#         #make necessary changes to the mini_data_list that will eventually be written into a new yaml file
#         #mini_image_dict['path'] = os.path.abspath(os.path.join(mini_data_folder, str(i).zfill(4) + '_' + os.path.basename(mini_image_dict['path'])))
#         mini_image_dict['path'] = os.path.join(mini_data_folder, str(i).zfill(4) + '_' + os.path.basename(mini_image_dict['path']))
#         #update the gt_box coordinates due to resizing
#         if(new_size != None):
#             for box in mini_image_dict['boxes']:
#                 box['x_min']*=resize_scale[1]
#                 box['x_max']*=resize_scale[1]
#                 box['y_min']*=resize_scale[0]
#                 box['y_max']*=resize_scale[0]
            
#         #save the image in the mini_data location
#         cv2.imwrite(mini_image_dict['path'], image)
    
#     #write the mini_data yaml file
#     with open(mini_data_yaml, 'w') as yaml_file:
#         yaml.dump(mini_data_list, yaml_file, default_flow_style=False)


#Generate Mini-Training/Testing Dataset from the Original Bosch Dataset.
#This is mostly taking positive examples (i.e. images with traffic lights)
def Generate_Mini_Dataset_with_Resize(input_data_yaml, mini_data_yaml, mini_data_folder, mini_data_size=500, new_size=None):
    #Note: don't use this to resize capstone_data. Capstone data is handled separately using 'capstone_data_processing.py'
    #Use this for other dataset including Bosch data and simulator data from Soumya
    """
    Generates a smaller dataset from the original dataset
    :param input_data_yaml: full dataset's yaml file
    :param mini_data_yaml: mini dataset's yaml file
    :mini_data_folder: folder to save the mini dataset
    :mini_data_size: size of the mini dataset
    :new_size=row,col(y,x) 
    Note: cv2 uses sizes in col,row (x,y) vs numpy is in row,col(y,x)
    :return:nothing (but saves the mini_data_set images and yaml file at the specified location)
    """
    if not os.path.exists(mini_data_folder):
        os.makedirs(mini_data_folder)

    input_data_list = yaml.load(open(input_data_yaml, 'rb').read())
    random.shuffle(input_data_list) #to randomize everything
    random.shuffle(input_data_list) #to randomize everything (randomize twice)

    mini_data_list = []
    min_number_of_bbox = 2
    i = 0
    negative_example_counter = 0
    max_negative_examples = 0 #int(mini_data_size/10) #10% of the total
    
    while(len(mini_data_list) < mini_data_size):
        if(i>=len(input_data_list)):
            raise ValueError('Not Enough Training Images with the Desired Number of BBoxes')
        mini_image_dict = input_data_list[i]
        if(len(mini_image_dict['boxes']) < min_number_of_bbox):
            if(negative_example_counter < max_negative_examples):
                negative_example_counter += 1
            else:
                i += 1 #update iterator
                continue #i.e. reject all the remaining statements
                
        orig_path = os.path.abspath(os.path.join(os.path.dirname(input_data_yaml), mini_image_dict['path']))
        #orig_path = mini_image_dict['path'] #debug only
        image = cv2.imread(orig_path) #cv2 returns image as a numpy array
        if new_size != None:
            #resize the image
            orig_size = image.shape[0:-1]
            #print(orig_size)
            image = cv2.resize(image, (new_size[1],new_size[0])) #cv2 uses sizes in col,row (x,y) vs numpy is in row,col(y,x)
            resize_scale = (new_size[0]/orig_size[0], new_size[1]/orig_size[1]) #row,col = y,x
        #end resize
        if image is None:
            raise IOError('Could not open image path', mini_image_dict['path'])
            
        #make necessary changes to the mini_data_list that will eventually be written into a new yaml file
        #mini_image_dict['path'] = os.path.abspath(os.path.join(mini_data_folder, str(i).zfill(4) + '_' + os.path.basename(mini_image_dict['path'])))
        mini_image_dict['path'] = os.path.join(mini_data_folder, str(i).zfill(4) + '_' + os.path.basename(mini_image_dict['path']))
        #update the gt_box coordinates due to resizing
        if(new_size != None):
            for box in mini_image_dict['boxes']:
                box['x_min']*=resize_scale[1]
                box['x_max']*=resize_scale[1]
                box['y_min']*=resize_scale[0]
                box['y_max']*=resize_scale[0]
            
        #save the image in the mini_data location
        cv2.imwrite(mini_image_dict['path'], image)
        #append to the mini_data_list
        mini_data_list.append(mini_image_dict)
        #update iterator
        i += 1
    
    #write the mini_data yaml file
    with open(mini_data_yaml, 'w') as yaml_file:
        yaml.dump(mini_data_list, yaml_file, default_flow_style=False)

In [4]:
#save the archived full bosch training data in a folder names "bosch_train"
use_full_bosch_data = False
if(use_full_bosch_data):
    training_file_yaml = './data/bosch_train/train.yaml' #yaml file containing image path, bounding box, and traffic light solor
    #Generate original training data list
    training_data_list = get_all_labels(training_file_yaml)
    print("Using Full Bosch Training Dataset")
    #print(len(training_data_list))
    #print(type(training_data_list))

In [5]:
mini_training_data_images_folder = './data/dummy_delete' #NOT USED FOR CAPSTONE DATA. IT IS HANDLED SEPARATELY
mini_training_data_yaml = './data/Capstone_Data/512_512/train.yaml'
mini_training_data_ssd_yaml = './data/Capstone_Data/512_512/train_ssd.yaml'
mini_training_val_data_yaml = './data/Capstone_Data/512_512/mini_val.yaml'
placeholder_data_ssd_yaml = './data/Capstone_Data/512_512/placeholder_data_ssd_mini_val.yaml'
#training_file_yaml = './data/mini_train/mini_train.yaml'
training_file_yaml = './data/bosch_train/train.yaml' #yaml file containing image path, bounding box, and traffic light solor

generate_mini_train_data = False
if(generate_mini_train_data):
    #Generate Mini_Training Dataset
    Generate_Mini_Dataset_with_Resize(training_file_yaml, mini_training_data_yaml, 
                                      mini_training_data_images_folder, mini_data_size=500, new_size=(512,512)) #size=row,col(y,x)
    print("Generated Mini Training Dataset with Resize")

use_min_training_dataset = True
if(use_min_training_dataset):
    training_data_list = yaml.load(open(mini_training_data_yaml, 'rb').read())
    print("Using Mini Training Dataset", mini_training_data_yaml)

Using Mini Training Dataset ./data/Capstone_Data/512_512/train.yaml


In [6]:
#Generate Validation Data
def generate_val_data_yaml(train_data_yaml, val_data_yaml):
    input_data_list = yaml.load(open(train_data_yaml, 'rb').read())
    random.shuffle(input_data_list) #to randomize everything
    random.shuffle(input_data_list) #randomize twice
    
    num_validation_images = 32
    val_data_list = input_data_list[0:num_validation_images]
    train_data_list = input_data_list[num_validation_images:]
    
    #write the val_data_yaml to a yaml file
    with open(val_data_yaml, 'w') as yaml_file:
        yaml.dump(val_data_list, yaml_file, default_flow_style=False) 
        
    #write the train_data_yaml to a yaml file
    with open(train_data_yaml, 'w') as yaml_file:
        yaml.dump(train_data_list, yaml_file, default_flow_style=False)

        
generate_validation_data = False
if(generate_validation_data):
    generate_val_data_yaml(mini_training_data_yaml,mini_training_val_data_yaml)
    print("Generated Validation Data")    
    print(len(yaml.load(open(mini_training_data_yaml, 'rb').read())))
    print(len(yaml.load(open(mini_training_val_data_yaml, 'rb').read())))

In [7]:
view_bounding_box_in_training_images = False
if(view_bounding_box_in_training_images):
    #This will add bounding box to each image in a small sample of the training dataset and display & save in the output folder
    #random.shuffle(training_data_list)
    num_images_to_view = 30
    view_images_list = training_data_list[0:num_images_to_view]
    output_folder = 'delete_training_data_with_bbox/'
    show_label_images_with_resize(view_images_list, output_folder, new_size=None) #size=row,col(y,x)
    print("Generated Training Images With Bounding Boxes")

In [8]:
# #For debug only. Comment out in normal operation.
# tmp_path1 = '/home/amit/Desktop/AmitProjects/SDCND/Term3/System_Integration/Project/CarND-Capstone-master/Traffic_Light_Detection/data/mini_train/images/0050_80808.png'
# tmp_path2 = '/home/amit/Desktop/AmitProjects/SDCND/Term3/System_Integration/Project/CarND-Capstone-master/Traffic_Light_Detection/data/mini_train/images/0117_538012.png'
# tmp_img_list = []
# for tmp_img_dict in training_data_list:
#     if (tmp_img_dict['path'] == tmp_path1): tmp_img_list.append(tmp_img_dict)
#     elif (tmp_img_dict['path'] == tmp_path2): tmp_img_list.append(tmp_img_dict)

# print(len(tmp_img_list))
# output_folder = 'training_data_with_bbox/'
# show_label_images_with_resize(tmp_img_list, output_folder)

##
- use functor/generator to send training data to the netwrok
- preprocess the input image (mean shift and scaling)
- don't rescale the input image. but need to figure out how to handle different input image sizes during testing. I.e. know the size of image sent by the camera

In [9]:
#Global Variables
num_feat_layers = 7
num_of_classes = 6 #red,yellow,green,off,unknown/occluded,background(0)

In [10]:
def map_int_to_label(int_label):
    #No label for background (i.e. 0)
    if(int_label == 1):
        label = 'Red'
    elif(int_label == 2):
        label = 'Yellow'
    elif(int_label == 3):
        label = 'Green'
    elif(int_label == 4):
        label = 'off'
    elif(int_label == 5):
        label = 'Occluded'
    else:
        raise ValueError('Label is unknown: ' + str(int_label))
    return label    

def map_labels_to_int(box):
    #0,1,2,3,4,5 (background, red, yellow, green, off, occluded)
    #note: if probability of all cases is low, then predict it as unknown also.
    #note: region without bounding box is background (which is different from unknown)
    #note: background regions will be predicted as 0. But that means no boudning box.
    #returns label according to the above rule
    if(box['occluded'] == False):
        #note: GreenRight, RedRight, YellowLeft, YellowRight, YellowStraight are not in the bosch training data. 
        #But I have added them for completeness and future usage.
        if(box['label']=='Red' or box['label']=='RedLeft' or box['label']=='RedStraight' or box['label']=='RedRight'):
            int_label = 1
        elif(box['label']=='Yellow' or box['label']=='YellowLeft' or box['label']=='YellowStraight' or box['label']=='YellowRight'):
            int_label = 2
        elif(box['label']=='Green' or box['label']=='GreenLeft' or box['label']=='GreenStraight' or box['label']=='GreenRight'):
            int_label = 3
        elif(box['label']=='off'):
            int_label = 4
        else:
            raise ValueError('Label is unknown: ' + str(box['label']))
    else:
        int_label = 5 #if occluded
    
    return int_label


def generate_and_encode_ssd_data(ssd_net, ssd_anchors, data_yaml, data_ssd_yaml, image_shape, new_size=None):
    im_h, im_w = image_shape #used for normalizing the bboxes by image_shape (similar to anchor boxes)
    data_list = yaml.load(open(data_yaml, 'rb').read())
    
    data_ssd_list = {} #dictionary of image file names and 
    image_file_name_list = []
    encode_gt_labels_list = [[] for ii1 in range(num_feat_layers)]
    encode_gt_bboxes_list = [[] for ii1 in range(num_feat_layers)]
    encode_gt_scores_list = [[] for ii1 in range(num_feat_layers)]
    
    random.shuffle(data_list) #randomize the training data
    random.shuffle(data_list) #randomize twice
    
    if(new_size == None):
        new_size = len(data_list) #can be set to the desired size
    for i, image_dict in enumerate(data_list[0:new_size]):
        image_file_name_list.append(image_dict['path']) #add the path name
        img_gt_bboxes = [] 
        img_gt_labels = []
        for box in image_dict['boxes']:
            img_gt_bbox = tf.constant([box['y_min']/im_h, box['x_min']/im_w, 
                                       box['y_max']/im_h, box['x_max']/im_w], dtype=tf.float32)
            img_gt_bboxes.append(img_gt_bbox)
            img_gt_label = tf.constant([map_labels_to_int(box)], dtype=tf.int64)
            img_gt_labels.append(img_gt_label)
        img_gt_bboxes = tf.stack(img_gt_bboxes, axis=0) #stack in the first dimension
        img_gt_labels = tf.stack(img_gt_labels, axis=0) #stack in the first dimension
        #print(img_gt_bboxes.get_shape().as_list())
        #print(img_gt_labels.get_shape().as_list())
        #print()
        encode_gt_labels, encode_gt_bboxes, encode_gt_scores = ssd_net.bboxes_encode(img_gt_labels, img_gt_bboxes, ssd_anchors)
        #print(encode_gt_labels[0].get_shape().as_list())
        for lst_idx in range(len(encode_gt_labels_list)):
            encode_gt_labels_list[lst_idx].append(encode_gt_labels[lst_idx])
            encode_gt_bboxes_list[lst_idx].append(encode_gt_bboxes[lst_idx])
            encode_gt_scores_list[lst_idx].append(encode_gt_scores[lst_idx])
    
    for lst_idx in range(len(encode_gt_labels_list)):
        encode_gt_labels_list[lst_idx] = tf.stack(encode_gt_labels_list[lst_idx], axis=0) #stack in the first dimension
        encode_gt_bboxes_list[lst_idx] = tf.stack(encode_gt_bboxes_list[lst_idx], axis=0) #stack in the first dimension
        encode_gt_scores_list[lst_idx] = tf.stack(encode_gt_scores_list[lst_idx], axis=0) #stack in the first dimension
    
    print(encode_gt_labels_list[0].get_shape().as_list())
    print(encode_gt_bboxes_list[0].get_shape().as_list())
    print(encode_gt_scores_list[0].get_shape().as_list())
    
    with tf.Session() as sess:
        encode_gt_labels_list,encode_gt_bboxes_list,encode_gt_scores_list = \
        sess.run([encode_gt_labels_list,encode_gt_bboxes_list,encode_gt_scores_list])
    
    #data_ssd_list will be saved in yaml file
    data_ssd_list['image_file_name_list'] = tuple(image_file_name_list) #convert list to tuple as tuple can be send to tensorflow feed_dict
    data_ssd_list['encode_gt_labels_list'] = tuple(encode_gt_labels_list) #convert list to tuple as tuple can be send to tensorflow feed_dict
    data_ssd_list['encode_gt_bboxes_list'] = tuple(encode_gt_bboxes_list) #convert list to tuple as tuple can be send to tensorflow feed_dict
    data_ssd_list['encode_gt_scores_list'] = tuple(encode_gt_scores_list) #convert list to tuple as tuple can be send to tensorflow feed_dict
    
    #For debug only
    #output_folder = 'ssd_training_data_debug/'
    #show_label_images_with_resize(data_list[0:3], output_folder, new_size=None) #size=row,col(y,x)
    
    #write the data_ssd_yaml_list to a yaml file
    with open(data_ssd_yaml, 'w') as yaml_file:
        yaml.dump(data_ssd_list, yaml_file, default_flow_style=False)


In [11]:
def gen_batch_function(data_list):
    """
    Generate function to create batches of data for SSD training
    :param data_list: yaml list of all the training data image's path location and gt info (label and bbox)
    :return: this is a functor and yields batch of images and gt info
    """
    def get_batches_fn(batch_size):
        """
        Create batches of training data
        :param batch_size: Batch Size
        :return: Batches of training data
        """
        #Note: the loaded data_list is already randomized (when it was saved)
        train_data_list = [i for i in range(len(data_list['image_file_name_list']))]
        random.shuffle(train_data_list) #randomize for every epoch
        random.shuffle(train_data_list) #randomize twice
        for batch_i in range(0, len(train_data_list), batch_size):
            #note: for numpy array, if batch_end is beyond the length of that dimension, it will return upto the last one.
            #so it automatically returns the last batch (as smaller than batch size) if the dataset is not fully divisible by the batch size
            batch_end = batch_i + batch_size
            if(batch_end > len(train_data_list)):
                batch_end = len(train_data_list)
            
            batch_encode_gt_labels = [[] for ii1 in range(num_feat_layers)]
            batch_encode_gt_bboxes = [[] for ii1 in range(num_feat_layers)]
            batch_encode_gt_scores = [[] for ii1 in range(num_feat_layers)]
            
            sel_list = [train_data_list[iii] for iii in range(batch_i,batch_end)]
            #print(sel_list)
            for lst_idx in range(len(batch_encode_gt_labels)):
                batch_encode_gt_labels[lst_idx] = data_list['encode_gt_labels_list'][lst_idx][sel_list,:,:,:]
                batch_encode_gt_bboxes[lst_idx] = data_list['encode_gt_bboxes_list'][lst_idx][sel_list,:,:,:,:]
                batch_encode_gt_scores[lst_idx] = data_list['encode_gt_scores_list'][lst_idx][sel_list,:,:,:]
            
            batch_images = []
            img_sel_list = [data_list['image_file_name_list'][iii] for iii in sel_list]
            for image_path in img_sel_list:
                image = cv2.imread(image_path)
                #randomly blur image
                blur_probability = 0.2 #randomly blur 20% of the images
                to_blur = random.random()
                if(to_blur < blur_probability): 
                    image = cv2.GaussianBlur(image, (9,9), sigmaX=5, sigmaY=5)
                    #print('blurred')
                #else don't blur
                batch_images.append(image)
            yield np.stack(batch_images,axis=0), batch_encode_gt_labels, batch_encode_gt_bboxes, batch_encode_gt_scores
        
    return get_batches_fn

In [12]:
def generate_encoded_gt_tf_placeholders(data_ssd_yaml, gt_encoded_labels, gt_encoded_scores, gt_encoded_bboxes):
#Return the gt_placeholders to be used during training
    #t0 = time.time()
    #print(t0)
    data_list = yaml.load(open(data_ssd_yaml, 'rb').read()) #Loading this file takes 8sec. for 10 images
    #print('1: ', time.time()-t0)
    
    for i in range(len(data_list['encode_gt_labels_list'])):
        #gt_encoded_classes
        temp_shp = [None] + list(data_list['encode_gt_labels_list'][i].shape[1:])
        temp_tf = tf.placeholder(dtype=tf.int64, shape=temp_shp)
        gt_encoded_labels.append(temp_tf)
        #gt_encoded_scores
        temp_shp = [None] + list(data_list['encode_gt_scores_list'][i].shape[1:])
        temp_tf = tf.placeholder(dtype=tf.float32, shape=temp_shp)
        gt_encoded_scores.append(temp_tf)
        #gt_encoded_bboxes
        temp_shp = [None] + list(data_list['encode_gt_bboxes_list'][i].shape[1:])
        temp_tf = tf.placeholder(dtype=tf.float32, shape=temp_shp)
        gt_encoded_bboxes.append(temp_tf)

    gt_encoded_labels = tuple(gt_encoded_labels) #convert list to tuple as tuple can be send to tensorflow feed_dict
    gt_encoded_scores = tuple(gt_encoded_scores) #convert list to tuple as tuple can be send to tensorflow feed_dict
    gt_encoded_bboxes = tuple(gt_encoded_bboxes) #convert list to tuple as tuple can be send to tensorflow feed_dict
    return gt_encoded_labels, gt_encoded_scores, gt_encoded_bboxes


def unnormalize_predicted_bboxes(predicted_bboxes, image_shape):
    #predicted_bboxes is a list of length 6/7 (one for each feature map) containing 5D tensors (NxHxWx(num_anchorboxes)x4)
    #The last column in the 5D tensor contains the 4 bbox coordinates (ymin, xmin, ymax, xmax)
    #scale the box from size of 1x1 to img_shape size
    img_h, img_w = image_shape
#     print(len(predicted_bboxes))
#     print(predicted_bboxes[0].get_shape())
#     print(predicted_bboxes[1].get_shape())
#     print(predicted_bboxes[2].get_shape())
    unnormalized_predicted_bboxes = []
    for bbox_layer in predicted_bboxes:
        ymin = bbox_layer[:,:,:,:,0] * img_h
        xmin = bbox_layer[:,:,:,:,1] * img_w
        ymax = bbox_layer[:,:,:,:,2] * img_h
        xmax = bbox_layer[:,:,:,:,3] * img_w
        
        bboxes = tf.stack([ymin, xmin, ymax, xmax], axis=-1) #stack at the last dimension
        #print(bboxes.get_shape().as_list())
        unnormalized_predicted_bboxes.append(bboxes)
        
    return unnormalized_predicted_bboxes
    

In [13]:
#This is for debug only
def debug_encode_decode_image(image_dict):
    #works on a single image's data
    #reset tf graph
    tf.reset_default_graph()
    # Instantiate SSDNet class with basic parameters for the SSDNet
    # And Generate anchor boxes
    ssd_net = ssd_vgg_512.SSDNet()
    ssd_shape = ssd_net.params.img_shape #HxW
    ssd_anchors = ssd_net.anchors(ssd_shape) #compute the default anchor boxes given the img_shape(i.e given the features dimension)

    #encode image gt bbox
    im_h, im_w = ssd_shape
    img_gt_bboxes = [] 
    img_gt_labels = []
    test_image_bboxes = image_dict['boxes']
    for box in test_image_bboxes:
        img_gt_bbox = tf.constant([box['y_min']/im_h, box['x_min']/im_w, 
                                   box['y_max']/im_h, box['x_max']/im_w], dtype=tf.float32)
        img_gt_bboxes.append(img_gt_bbox)
        img_gt_label = tf.constant([map_labels_to_int(box)], dtype=tf.int64)
        img_gt_labels.append(img_gt_label)

    img_gt_bboxes = tf.stack(img_gt_bboxes, axis=0) #stack in the first dimension
    img_gt_labels = tf.stack(img_gt_labels, axis=0) #stack in the first dimension
    #print(img_gt_bboxes.get_shape().as_list())
    #print(img_gt_labels.get_shape().as_list())
    #print()
    encode_gt_labels, encode_gt_bboxes, encode_gt_scores = ssd_net.bboxes_encode(img_gt_labels, img_gt_bboxes, ssd_anchors)
    encode_gt_labels_list = []
    encode_gt_bboxes_list = []
    encode_gt_scores_list = []
    decode_gt_labels_list = []
    for i in range(num_feat_layers):
        encode_gt_labels_list.append(tf.expand_dims(encode_gt_labels[i], axis=0))
        encode_gt_bboxes_list.append(tf.expand_dims(encode_gt_bboxes[i], axis=0))
        encode_gt_scores_list.append(tf.expand_dims(encode_gt_scores[i], axis=0))

        #one hot encode the labels classes
        temp = tf.expand_dims(encode_gt_labels[i], axis=0)
        temp = tf.one_hot(indices=temp, depth=num_of_classes) #6=num_classes
        decode_gt_labels_list.append(temp)

    # print(encode_gt_bboxes_list[0])
    # print(encode_gt_labels_list[0])
    # print(encode_gt_scores_list[0])
    # print(encode_gt_scores_list[0])
    # print(decode_gt_labels_list[0])

    #decode image gt bbox
    decode_gt_bboxes_list = ssd_net.bboxes_decode(encode_gt_bboxes_list, ssd_anchors)
    decode_gt_bboxes_list = unnormalize_predicted_bboxes(decode_gt_bboxes_list, image_shape=ssd_shape)
    rscores_tst, rbboxes_tst = ssd_net.detected_bboxes(decode_gt_labels_list, decode_gt_bboxes_list,
                                               select_threshold=0.01,
                                               nms_threshold=0.45,
                                               clipping_bbox=None,
                                               top_k=400, 
                                               keep_top_k=10)
    with tf.Session() as sess:
        #sess.run(init)
        rscores_tst, rbboxes_tst = sess.run([rscores_tst, rbboxes_tst])
    
    return rbboxes_tst,rscores_tst

In [14]:
def numpy_jaccard(bbox1,bbox2):
    #intersection
    inter_ymin = np.maximum(bbox1[0],bbox2[0])
    inter_xmin = np.maximum(bbox1[1],bbox2[1])
    inter_ymax = np.minimum(bbox1[2],bbox2[2])
    inter_xmax = np.minimum(bbox1[3],bbox2[3])
    h = np.maximum(inter_ymax-inter_ymin,0)
    w = np.maximum(inter_xmax-inter_xmin,0)
    intersection = h*w
    #Union
    union = (bbox1[2]-bbox1[0])*(bbox1[3]-bbox1[1]) + (bbox2[2]-bbox2[0])*(bbox2[3]-bbox2[1])
    union -= intersection
    #IOU/Jaccard
    jaccard = intersection/union
    return jaccard
    

def calculate_image_MAP(data_list,rbb,rsc,img_id):
    #note: rbb and rsc are sorted (and need to be) by the scores. actually don't need to be sorted??
    #calculates the accuracy metrics for image=img_id
    #rbb and rsc are lists of size 5 (5 classes ignoring background)
    
    image_dict = data_list[img_id]
    image_path = image_dict['path']
    test_image = cv2.imread(image_path) #cv2 returns image as numpy array

    pred_sc = {}
    pred_bbox = {}
    pred_bbox_prev_confirmed_in_gt = {}
    confidence_threshold = 0.15
    font = cv2.FONT_HERSHEY_SIMPLEX #font for displaying the label
    for i in range(1,num_of_classes): #6 is the number of classes (0 is background, so is ignored)
        pred_sc[i] = []
        pred_bbox[i] = []
        pred_bbox_prev_confirmed_in_gt[i] = []
        for j in range(rsc[i].shape[1]):
            if(rsc[i][img_id][j] > confidence_threshold):
                y_min = rbb[i][img_id][j][0]
                x_min = rbb[i][img_id][j][1]
                y_max = rbb[i][img_id][j][2]
                x_max = rbb[i][img_id][j][3]
                pred_sc[i].append(rsc[i][img_id][j])
                pred_bbox[i].append(np.array([y_min,x_min,y_max,x_max], dtype=np.float32))
                pred_bbox_prev_confirmed_in_gt[i].append(0)
                #Draw BBox on the Image
                #cv2.rectangle(test_image, (ir(x_min), ir(y_min)), (ir(x_max), ir(y_max)),(0, 255, 0))
                #disp_text = map_int_to_label(i) + ' :' + str(rsc[i][0][j])
                #cv2.putText(test_image, disp_text, (ir(x_min), ir(y_min)), font, 0.3, (0,0,255), 1)
        
        if(len(pred_bbox[i]) > 0): #can use len() for numpy arrays too
            pred_bbox[i] = np.stack(pred_bbox[i],axis=0)
            pred_bbox_prev_confirmed_in_gt[i] = np.stack(pred_bbox_prev_confirmed_in_gt[i],axis=0)
    
    #load gt_data
    gt_bbox = {1:[],2:[],3:[],4:[],5:[]}
    image_dict = data_list[img_id]
    for i,box in enumerate(image_dict['boxes']):
        label = map_labels_to_int(box)
        bbox = np.array([box['y_min'],box['x_min'],box['y_max'],box['x_max']], np.float32)
        gt_bbox[label].append(bbox)

    #Now all the bboxes in pred_bbox are final predictions. So their ordering doesn't matter in precision/recall calculations
    iou_threshold = 0.5
    tp_dict = {}
    fp_dict = {}
    num_gt_bboxes = 0 #for recall
    true_pos = 0
    false_pos = 0
    for i in range(1,num_of_classes):
        tp_dict[i] = []
        fp_dict[i] = []
        for j in range(len(pred_bbox[i])): #note:pred_bbox don't have to be sorted as it's after NMS
            tp = 0
            fp = 0
            for k in range(len(gt_bbox[i])):
                jac_score = numpy_jaccard(gt_bbox[i][k],pred_bbox[i][j]) #support broadcasting wrt first parameter
                if(jac_score > iou_threshold):
                    if(pred_bbox_prev_confirmed_in_gt[i][j] == 0):
                        tp += 1
                        pred_bbox_prev_confirmed_in_gt[i][j] = 1
                    else:
                        fp += 1
            if(pred_bbox_prev_confirmed_in_gt[i][j] == 0):
                fp += 1

            tp_dict[i].append(tp) #tp for each predicted bbox
            fp_dict[i].append(fp) #fp for each predicted bbox
        
        #Calculate Precision-Recall
        true_pos += np.sum(np.array(tp_dict[i]))
        false_pos += np.sum(np.array(fp_dict[i]))
        num_gt_bboxes += len(gt_bbox[i]) #total number of positives used for recall (tp+fn)
    
    if((true_pos==0) and (false_pos== 0)):
        avg_precision = 1.0 #this is debatable, but gives the right F1 score.
    else:
        avg_precision = true_pos/(true_pos+false_pos)
    if(num_gt_bboxes == 0): #then true_pos will always be zero
        avg_recall = 1.0 #this is debatable, but gives the right F1 score
    else:
        avg_recall = true_pos/num_gt_bboxes
    if(avg_precision==0 and avg_recall==0):
        F1_score = 0.0
    else:
        F1_score = 2*avg_precision*avg_recall/(avg_precision+avg_recall) #harmonic average of precision and recall
    
    #plt.imshow(cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB))
    #plt.show()
    #cv2.imwrite('encoded_decoded_bbox_testing/image_pred.png', test_image)
    return avg_precision, avg_recall, F1_score


def calculate_MAP(data_yaml=mini_training_val_data_yaml):
    #note: rbb and rsc are sorted (and need to be) by the scores. actually don't need to be sorted??
    data_list = yaml.load(open(data_yaml, 'rb').read())
    len_data_list = len(data_list)
    test_images = []
    for ii in range(len_data_list):
        image_dict = data_list[ii]
        image_path = image_dict['path']
        test_image = cv2.imread(image_path) #cv2 returns image as numpy array
        test_images.append(test_image)
    test_images = np.stack(test_images,axis=0)
    #Forward propagate all the images through ssd_net
    rbb,rsc = test_ssd(test_images) #note: rbb and rsc are sorted by the scores

    #Calculate the mean of AP/AR across all the validation data
    mean_avg_precision = 0
    mean_avg_recall = 0
    mean_avg_F1_score = 0
    for img_id in range(len_data_list):
        #rbb,rsc = debug_encode_decode_image(data_list[img_id]) #debug only
        #avg_precision, avg_recall, avg_F1_score = calculate_image_MAP([data_list[img_id]],rbb,rsc,0) #debug only 
        avg_precision, avg_recall, avg_F1_score = calculate_image_MAP(data_list,rbb,rsc,img_id)  
        #print('MA Precision is {0}, MA Recall is {1}, and MA F1 Score is {2}'.format(avg_precision, avg_recall, avg_F1_score))

        mean_avg_precision += avg_precision
        mean_avg_recall += avg_recall
        mean_avg_F1_score += avg_F1_score
    mean_avg_precision /= len_data_list
    mean_avg_recall /= len_data_list
    if(mean_avg_precision==0 and mean_avg_recall==0):
        mean_avg_F1_score = 0.0
    else:
        mean_avg_F1_score = 2*mean_avg_precision*mean_avg_recall/(mean_avg_precision+mean_avg_recall) #harmonic average of precision and recall
    print()
    print('MA Precision is {0}, MA Recall is {1}, and MA F1 Score is {2}'.format(mean_avg_precision, mean_avg_recall, mean_avg_F1_score))


#calculate_MAP(data_yaml=mini_training_val_data_yaml)

In [16]:
#SETUP THE TF SSD MODEL/GRAPH
learning_rate = 0.33e-3 #1e-3 #TODO: find the right value
training_batch_size = 8
num_epochs = 200
regularization_factor = 5e-4 #1e-10 #5e-5 #0.0005

tf.reset_default_graph()
# Instantiate SSDNet class with basic parameters for the SSDNet
# And Generate anchor boxes
ssd_net = ssd_vgg_512.SSDNet()
ssd_shape = ssd_net.params.img_shape #HxW
ssd_anchors = ssd_net.anchors(ssd_shape) #compute the default anchor boxes given the img_shape(i.e given the features dimension)

if_generate_and_encode_ssd_data = False
if(if_generate_and_encode_ssd_data == True):
    ###generate training sdd data
    generate_and_encode_ssd_data(ssd_net, ssd_anchors, mini_training_data_yaml, mini_training_data_ssd_yaml, image_shape=ssd_shape)
    ###generate data to be used for placeholder
    generate_and_encode_ssd_data(ssd_net, ssd_anchors, mini_training_val_data_yaml, placeholder_data_ssd_yaml, image_shape=ssd_shape)

input_shape = list(ssd_shape) + [3] #HxWxC
input_shape = [None] + input_shape
input_image = tf.placeholder(tf.float32, shape=input_shape)

#Pre-process the image to normalize between -1 and 1
input_image_pre_processed = tf.cast(input_image, dtype=tf.float32)
input_image_pre_processed = tf.multiply(input_image_pre_processed, tf.constant(2.0/255.0, dtype=tf.float32))
input_image_pre_processed = tf.add(input_image_pre_processed, tf.constant(-1.0, dtype=tf.float32))

#Define the argument scope
ssdnet_arg_scope = ssd_net.arg_scope(weight_decay=regularization_factor) #also set's xavier initialization

#For training
#generate the gt placeholders
gt_encoded_labels = []
gt_encoded_scores = []
gt_encoded_bboxes = []
gt_encoded_labels, gt_encoded_scores, gt_encoded_bboxes = generate_encoded_gt_tf_placeholders(placeholder_data_ssd_yaml, 
                                                                                              gt_encoded_labels, 
                                                                                              gt_encoded_scores, 
                                                                                              gt_encoded_bboxes)

#create the SSD network for training
#Set the argument scope
with tf.contrib.slim.arg_scope(ssdnet_arg_scope):
    predictions, localizations, logits, end_points = \
        ssd_net.net(input_image_pre_processed, is_training=True, reuse=False)
#print(predictions[0].get_shape())

#Setup the losses and training optimizer
ssd_net.losses(logits, localizations, gt_encoded_labels, gt_encoded_bboxes, gt_encoded_scores)
tot_loss = tf.losses.get_total_loss(add_regularization_losses=True) #includes regularization loss
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(tot_loss)
tf.summary.scalar('Total_Loss',tot_loss)
###End training

#For testing
#create the SSD network for testing while reusing the variables from before (i.e. training)
select_threshold = 0.01 #0.01
select_top_k = 400
keep_top_k = 10 #200
nms_threshold = 0.45

#Set the argument scope
with tf.contrib.slim.arg_scope(ssdnet_arg_scope):
    predictions_tst, localizations_tst, logits_tst, end_points_tst = \
        ssd_net.net(input_image_pre_processed, is_training=False, reuse=True)
localizations_tst = ssd_net.bboxes_decode(localizations_tst, ssd_anchors)
localizations_tst = unnormalize_predicted_bboxes(localizations_tst, image_shape=ssd_shape)
rscores_tst, rbboxes_tst = ssd_net.detected_bboxes(predictions_tst, localizations_tst,
                                           select_threshold=select_threshold,
                                           nms_threshold=nms_threshold,
                                           clipping_bbox=None,
                                           top_k=select_top_k, 
                                           keep_top_k=keep_top_k)
###End testing

init = tf.global_variables_initializer()
saver = tf.train.Saver() #saves the above defined graph
merged = tf.summary.merge_all()

###########End Model/TF Graph Generation##################

def train_ssd(data_ssd_yaml, resume_training=False):
    strt_time = time.time()
    data_list = yaml.load(open(data_ssd_yaml, 'rb').read()) #Loading this file takes 8sec. for 10 images
    batches_fn = gen_batch_function(data_list) #function that returns another function
    with tf.Session() as sess:
        train_writer = tf.summary.FileWriter('./tensorboard_data', sess.graph)
        sess.run(init)
        if(resume_training):
            saver.restore(sess,tf.train.latest_checkpoint('./SSD_Saved_Model'))
        for epoch in range(num_epochs):
            generate_batches = batches_fn(batch_size=training_batch_size) #generator fucntion (an iterator for training images and ground truth)
            for batch_image, batch_encode_gt_labels, batch_encode_gt_bboxes, batch_encode_gt_scores in generate_batches:
                ##Debug Only: To Check if Blur is working
                #for iii in range(batch_image.shape[0]):
                #    show_image = batch_image[iii,:,:,:]
                #    plt.imshow(cv2.cvtColor(show_image, cv2.COLOR_BGR2RGB))
                #    plt.show()
                _,l,summary_out = sess.run([optimizer,tot_loss, merged], feed_dict={input_image:batch_image,
                                                                     gt_encoded_labels:batch_encode_gt_labels,
                                                                     gt_encoded_bboxes:batch_encode_gt_bboxes,
                                                                     gt_encoded_scores:batch_encode_gt_scores})
                print(l)

            msg = "Final Batch Loss at Epoch {0} is: {1:.4f}"
            print(msg.format(epoch+1, l))
            train_writer.add_summary(summary_out, epoch)
            if((epoch+1)%5 == 0): #save variables every 5 epochs
                saver_file_name = './SSD_Saved_Model/my_ssd' + str(epoch+1)
                saver.save(sess, saver_file_name)
                calculate_MAP(mini_training_val_data_yaml)
        saver.save(sess, './SSD_Saved_Model/my_ssd')


def test_ssd(test_image):
    with tf.Session() as sess:
        sess.run(init) #initialize all global variables
        saver.restore(sess,tf.train.latest_checkpoint('./SSD_Saved_Model'))
    
        t1 = time.time()
        rbbx,rsc = sess.run([rbboxes_tst, rscores_tst], feed_dict={input_image:test_image})
        t2 = time.time()
        #print(t2-t1)
        return rbbx,rsc


In [17]:
train_ssd(mini_training_data_ssd_yaml, resume_training=False)
#train_ssd(placeholder_data_ssd_yaml, resume_training=False)
#train_ssd(mini_simulator_data_300_300_ssd_yaml, resume_training=False)

#11/01/17 at 10pm: No dropout, after 35 epochs, on validation data get MAP of 1 and MAR of 0.875 (F1 of 0.9333) after 2hrs trainig
#11/02/17 at 10am: With dropout, after 35 epochs, on validation data get MAP of 1 and MAR of 0.860 (F1 of 0.924) after 2hrs trainig

54.276
60.3673
68.3948
66.0506
55.0817
64.0417
56.4897
65.2757
41.8507
62.6935
55.9747
43.5469
59.6959
46.1081
57.7886
29.398
56.4965
46.2828
61.6591
70.2265
65.5402
66.8804
40.2604
55.2934
46.0052
70.9986
57.7491
56.5987
40.9955
49.6191
28.1169
40.4792
50.6482
38.1752
39.8262
40.8143
39.3169
44.9692
39.6899
36.0949
55.5881
51.3469
49.1002
37.045
37.1087
46.8418
39.9497
52.7262
47.7765
32.5074
24.1184
42.7946
32.1687
38.3313
24.881
30.3024
38.1066
34.2248
25.2065
35.4927
39.2508
28.1846
37.9831
47.7836
34.4104
38.5783
40.6034
27.6841
23.883
39.5558
28.2096
25.4723
32.5396
28.6231
26.5439
38.4312
24.452
24.174
Final Batch Loss at Epoch 1 is: 24.1740
29.2367
20.8129
34.7482
21.7073
27.8272
36.8798
13.5678
26.4394
15.1465
22.6599
40.3025
43.3862
33.973
27.7244
25.0081
27.0475
34.4802
17.8505
27.1598
39.0895
19.5515
29.7615
27.8234
20.4296
16.1965
33.2276
16.2613
30.3534
31.9224
30.8881
37.6308
30.2618
20.3018
30.9467
27.8658
30.6021
24.5777
30.4673
30.4652
29.4679
27.7254
20.7213
29.3687


13.0028
5.88979
7.17641
9.64773
10.4608
11.3121
6.20466
5.41407
13.158
15.1318
5.72479
12.1528
5.13223
10.8461
10.0242
12.5646
7.92024
4.33276
2.97438
4.56349
10.6187
7.55233
6.09979
12.2054
6.45181
4.76703
8.30263
7.9227
6.20689
14.0284
7.53947
10.5475
8.61355
23.7982
5.32127
13.6395
4.41124
9.8448
10.4367
6.00791
9.63363
7.23504
7.73688
11.658
8.41304
5.42967
15.075
10.3984
8.11794
11.0008
8.26368
18.3053
3.60429
9.88004
11.2817
8.80969
11.0193
18.214
7.99705
12.3706
8.14357
11.3167
8.05439
8.2832
10.6427
9.27499
12.0658
6.27509
7.06706
8.18668
11.3459
9.24184
7.87487
13.2018
4.80413
3.9853
3.03971
13.7724
Final Batch Loss at Epoch 13 is: 13.7724
4.43809
14.3047
7.80972
10.751
7.99426
12.3449
6.29611
5.92464
4.5439
12.4837
7.29059
7.50225
5.20524
8.61813
5.25607
6.48774
7.70766
4.70833
12.2426
10.7233
9.40678
7.26363
6.31195
4.75163
11.1239
10.4434
8.64437
5.88223
5.42047
5.64496
10.3937
6.36695
3.98852
7.60789
8.72219
8.25046
11.2338
14.8002
5.16353
4.0825
6.87881
4.23446
6.9536
5.6

6.92666
2.18648
6.00625
5.2216
3.80747
4.01135
6.15409
5.54931
7.28217
5.45075
5.98812
7.48357
2.73364
6.69998
7.58278
3.69736
2.48201
4.85782
6.1754
2.71586
9.3435
3.14274
4.60675
5.24694
2.69555
2.30675
6.12679
11.3177
3.26999
6.74734
5.16379
3.55548
2.0961
3.5046
3.30357
5.27113
8.50465
5.55039
4.46053
5.64609
6.74235
3.63051
3.57903
2.69919
3.88661
4.37399
4.66172
2.7354
11.1589
9.68096
5.80614
3.57284
4.28315
8.85072
8.09766
5.47683
4.96302
6.31756
8.06355
2.75943
6.46866
2.26017
6.15772
3.51422
2.98843
11.486
7.81758
2.53651
8.73294
3.52139
3.38221
6.317
5.93555
11.6337
2.27415
1.87775
Final Batch Loss at Epoch 25 is: 1.8777
INFO:tensorflow:Restoring parameters from ./SSD_Saved_Model/my_ssd25

MA Precision is 1.0, MA Recall is 0.859375, and MA F1 Score is 0.9243697478991597
2.6043
6.70585
7.87541
3.08735
4.6466
3.2119
2.83754
3.70094
3.98208
8.07989
6.27009
6.93568
4.45544
2.45218
7.22933
4.58414
7.04577
10.4201
8.5812
5.92612
5.97634
6.38063
2.06339
4.56698
4.18087
5.29082
3.212

9.61277
5.01637
3.08625
5.47869
4.01667
5.86665
6.96451
Final Batch Loss at Epoch 36 is: 6.9645
2.35694
7.91731
3.8051
5.94432
4.75484
5.08449
4.35889
6.11786
4.27191
8.41696
4.11766
5.63988
2.01986
2.67832
4.5786
2.49155
3.21449
3.0857
1.80822
6.81556
2.11932
4.74234
5.16286
7.1422
5.41812
3.92726
4.83507
4.88142
5.08577
11.8281
1.85601
4.56013
7.9345
3.24555
2.6076
3.74088
4.58747
5.89988
3.32333
3.72942
3.40118
4.75881
2.75632
3.15825
7.64016
5.88808
2.84784
3.58415
4.76492
4.76084
2.94078
3.59954
4.00699
5.76768
3.52897
4.36439
3.27677
3.53187
7.46642
1.55552
4.19191
2.31241
1.64706
1.93904
2.74677
9.08357
5.06187
3.6775
1.90804
6.43494
6.40401
5.32958
5.48236
3.54322
5.99055
2.60314
6.50443
12.0955
Final Batch Loss at Epoch 37 is: 12.0955
3.36314
2.94217
2.98302
3.72645
5.70048
4.25194
3.04051
6.44307
2.74794
4.26558
3.27461
3.54476
1.82405
4.05446
3.58447
2.55999
1.91308
5.98808
4.9849
3.02666
6.47275
7.22987
1.47702
5.08712
2.41899
3.12603
3.3887
5.0943
6.13502
8.66818
6.47713
6

5.69377
2.09302
2.81027
9.75446
Final Batch Loss at Epoch 48 is: 9.7545
3.59163
3.68226
5.34923
4.19721
2.73089
4.15066
1.49392
1.90284
5.29174
3.68221
1.98885
4.83985
4.76117
5.2983
5.22674
5.14169
1.3321
2.99585
8.77509
3.81427
7.48993
3.70809
3.17456
2.05718
5.61101
4.61761
4.24758
3.81038
2.64368
3.84172
2.94331
3.45422
3.68432
4.57115
5.62248
2.20221
6.46512
1.85071
4.57489
2.8755
3.68661
3.88642
2.0034
1.57364
3.03329
2.64654
1.8381
5.28208
4.63361
2.7641
1.93374
2.99352
2.69805
2.7221
2.15464
3.99559
5.70166
5.57438
5.20715
3.68774
2.99944
2.90679
2.87542
2.96729
5.73623
3.41179
4.26316
5.63326
3.99891
2.29752
4.49714
6.04948
9.49226
3.20845
3.66908
3.01176
1.71907
3.73329
Final Batch Loss at Epoch 49 is: 3.7333
4.33765
3.86302
4.57405
6.77819
4.68798
3.03396
4.40708
3.47353
8.00343
4.17855
1.91812
2.89079
3.50383
3.10842
2.81043
3.31894
4.99507
4.67438
5.3597
3.36163
3.42969
6.40183
5.58469
3.54871
3.70844
3.88977
4.18954
2.47686
3.50346
1.5909
4.33811
3.80591
3.25652
1.60038
6

7.51437
Final Batch Loss at Epoch 60 is: 7.5144
INFO:tensorflow:Restoring parameters from ./SSD_Saved_Model/my_ssd60

MA Precision is 0.984375, MA Recall is 0.8645833333333333, and MA F1 Score is 0.9205985915492957
1.76815
6.01626
1.62154
6.55915
18.707
3.62458
6.83518
7.67965
7.60875
4.63439
6.37084
5.92633
4.80536
3.9717
6.00328
2.53072
3.85499
2.48663
7.28983
5.55973
4.56054
1.27583
3.36238
3.91172
5.25207
3.73392
11.7261
5.2593
4.95551
6.08086
1.69608
4.28579
2.17898
2.27061
3.67787
3.70472
6.8524
4.0938
3.65849
6.0937
2.28908
4.1344
2.49488
5.79739
3.49502
2.5072
4.5445
4.04059
5.95176
5.51371
3.45029
1.77006
8.34805
3.94193
2.65857
2.02809
4.16737
4.03591
4.85203
2.35689
3.81705
3.37449
1.58298
4.02421
4.19515
4.28324
5.01167
2.24622
4.19712
3.74332
2.47001
4.99878
4.40312
1.65255
1.95394
6.08522
3.36681
1.8318
Final Batch Loss at Epoch 61 is: 1.8318
3.17334
4.08031
2.61952
2.87627
2.08595
2.96535
3.14646
1.45072
1.02671
3.21781
3.64065
2.0024
2.06291
3.69799
2.53958
2.24371
2.47

KeyboardInterrupt: 

In [None]:
#data_list = yaml.load(open(mini_training_val_data_yaml, 'rb').read())
data_list = yaml.load(open(placeholder_data_ssd_yaml, 'rb').read())
#data_list = yaml.load(open(mini_simulator_data_300_300_yaml, 'rb').read()) #Loading this file takes 8sec. for 10 images

In [None]:
output_folder = './SSD_Test_Results/'
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

#Testing the Network Predictions
for im in range(32):
    #Predict the network
    im_id = im
    image_path = data_list['image_file_name_list'][im_id]
    ##data_list = yaml.load(open(mini_training_data_yaml, 'rb').read()) #Loading this file takes 8sec. for 10 images
    #image_path = data_list[im_id]['path']

    #use test_data directory
    #data_list = os.listdir('./data/mini_data_simulator/test_data/')
    #print(len(data_list))
    #print(data_list[0])
    #image_path = './data/mini_data_simulator/test_data/' + data_list[im_id]
    #print(image_path)

    test_image = cv2.imread(image_path) #cv2 returns image as numpy array
    #test_image = cv2.resize(test_image, (512,512)) #cv2 uses sizes in col,row (x,y) vs numpy is in row,col(y,x)
    #print(test_image.shape)
    test_image_ssd = np.expand_dims(test_image, axis=0) #1XHxWxC
    rbb,rsc = test_ssd(test_image_ssd)

    print(rsc[3])

    #Continue Predict the network
    #print(rsc.keys())
    test_image = cv2.imread(image_path) #reload the same image
    font = cv2.FONT_HERSHEY_SIMPLEX #font for displaying the label
    for i in range(1,num_of_classes):
        for j in range(rsc[1].shape[1]):
            #print(rscores_tst[i])
            #print(rbboxes_tst[i])
            #print()
            if(rsc[i][0][j] > 0.15):
                y_min = rbb[i][0][j][0]
                x_min = rbb[i][0][j][1]
                y_max = rbb[i][0][j][2]
                x_max = rbb[i][0][j][3]
                cv2.rectangle(test_image, (ir(x_min), ir(y_min)), (ir(x_max), ir(y_max)),(0, 255, 0))
                disp_text = map_int_to_label(i) + ' :' + str(rsc[i][0][j])
                cv2.putText(test_image, disp_text, (ir(x_min), ir(y_min)), font, 0.5, (0,0,255), 1)

    print('Prediction Output')
    plt.imshow(cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB))
    plt.show()
    
    #save the image
    new_path = output_folder + 'image_pred_' + str(im_id) + '.png'
    print(new_path)
    cv2.imwrite(new_path, test_image)

In [None]:
#Test the Network on SSD Network
output_folder = './SSD_Test_Results/'
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
    

test_dir = './data/Capstone_Data/Test_Data/'
#test_filesnames = os.listdir(test_dir)
test_filesnames = [f for f in os.listdir(test_dir) if f.endswith('.jpg')]

with tf.Session() as sess:
    sess.run(init) #initialize all global variables
    saver.restore(sess,tf.train.latest_checkpoint('./SSD_Saved_Model'))
    
    for im_id in range(len(test_filesnames)):
        test_path = test_dir + test_filesnames[im_id]
        test_image = cv2.imread(test_path)
        test_image, _ = resize_with_crop(test_image, bboxes=[], new_size=[512,512], crop=False)
        test_image_4d = np.expand_dims(test_image, axis=0) #1XHxWxC
        
        t1 = time.time()
        rbb,rsc = sess.run([rbboxes_tst, rscores_tst], feed_dict={input_image:test_image_4d})
        t2 = time.time()
        print(t2-t1)

        #print(rsc[3])

        #Continue Predict the network
        #print(rsc.keys())
        font = cv2.FONT_HERSHEY_SIMPLEX #font for displaying the label
        test_bboxes = []
        for i in range(1,num_of_classes):
            for j in range(rsc[1].shape[1]):
                #print(rscores_tst[i])
                #print(rbboxes_tst[i])
                #print()
                if(rsc[i][0][j] > 0.15):
                    y_min = rbb[i][0][j][0]
                    x_min = rbb[i][0][j][1]
                    y_max = rbb[i][0][j][2]
                    x_max = rbb[i][0][j][3]
                    #cv2.rectangle(test_image, (ir(x_min), ir(y_min)), (ir(x_max), ir(y_max)),(0, 255, 0))
                    disp_text = map_int_to_label(i) + ' :' + str(rsc[i][0][j])
                    #cv2.putText(test_image, disp_text, (ir(x_min), ir(y_min)), font, 0.5, (0,0,255), 1)
                    #save bbox
                    bbox = {}
                    bbox['x_min'] = x_min
                    bbox['y_min'] = y_min
                    bbox['x_max'] = x_max
                    bbox['y_max'] = y_max
                    bbox['label'] = map_int_to_label(i)
                    bbox['score'] = rsc[i][0][j]
                    test_bboxes.append(bbox)

    #     print('Prediction Output')
    #     plt.imshow(cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB))
    #     plt.show()

    #     #save the image
    #     new_path = output_folder + 'image_pred_' + str(im_id) + '.png'
    #     print(new_path)
    #     cv2.imwrite(new_path, test_image)

        test_image, test_bboxes = resize_with_crop(test_image, test_bboxes, new_size=(600,800), crop=False)
        #print(test_bboxes)
        for bbox in test_bboxes:
            xmin = int(bbox['x_min'])
            xmax = int(bbox['x_max'])
            ymin = int(bbox['y_min'])
            ymax = int(bbox['y_max'])
            cv2.rectangle(test_image, (xmin,ymin), (xmax,ymax), (0,0,255), 2)
            disp_text = bbox['label'] + ': ' + str(bbox['score'])
            cv2.putText(test_image, disp_text, (xmin,ymin), font, 0.75, (0,0,255), 1)

        #print('Prediction Output')
        #plt.imshow(cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB))
        #plt.show()

        #save the image
        new_path = output_folder + 'image_pred_' + str(im_id) + '.png'
        print(new_path)
        cv2.imwrite(new_path, test_image)

In [None]:
test_bboxes

In [None]:
###Using Mini Simulator Data
mini_simulator_data_yaml = './data/mini_data_simulator/mini_data_simulator.yaml'
mini_simulator_data_300_300_yaml = './data/mini_data_simulator/mini_data_simulator_300_300.yaml'
mini_simulator_data_300_300_ssd_yaml = './data/mini_data_simulator/mini_data_simulator_300_300_ssd.yaml'
mini_simulator_data_300_300_folder = './data/mini_data_simulator/images_300_300'

data_list = yaml.load(open(mini_simulator_data_300_300_yaml, 'rb').read())

###Show images with bbox and also generate resized training data
#show_label_images_with_resize(data_list, output_folder='./data/mini_data_simulator/playground/', new_size=None)
###Generate_Mini_Dataset_with_Resize(mini_simulator_data_yaml, mini_simulator_data_300_300_yaml, mini_simulator_data_300_300_folder, mini_data_size=16, new_size=(300,300)) #size=row,col(y,x)

###Generate SSD Data
#ssd_net = ssd_vgg_300.SSDNet()
#ssd_shape = ssd_net.params.img_shape #HxW
#ssd_anchors = ssd_net.anchors(ssd_shape) #compute the default anchor boxes given the img_shape(i.e given the features dimension)
#generate_and_encode_ssd_data(ssd_net, ssd_anchors, mini_simulator_data_300_300_yaml,mini_simulator_data_300_300_ssd_yaml, image_shape=ssd_shape)

In [None]:
#FOR DEBUG ONLY
#data_list = yaml.load(open(mini_training_data_ssd_yaml, 'rb').read()) #Loading this file takes 8sec. for 10 images
data_list = yaml.load(open(placeholder_data_ssd_yaml, 'rb').read()) #Loading this file takes 8sec. for 10 images
for im_id in range(32):
    image_path = data_list['image_file_name_list'][im_id]
    test_image = cv2.imread(image_path) #cv2 returns image as numpy array
    plt.imshow(cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB))
    plt.show()

In [None]:
#FOR DEBUG ONLY (encoded) SSD Data List
#To compare the encode/decode function
#This module loads an original image and displays the boudning boxes on it
#Then it encodes and decodes the bboxes and displays it
#reset tf graph
tf.reset_default_graph()

font = cv2.FONT_HERSHEY_SIMPLEX #font for displaying the label
data_list = yaml.load(open(placeholder_data_ssd_yaml, 'rb').read())
#im_id = 4
for im_id in range(32):
    image_path = data_list['image_file_name_list'][im_id]

    test_image_labels_list = []
    test_image_bboxes_list = []
    test_image_scores_list = []
    decode_gt_labels_list = []
    for i in range(num_feat_layers):
        test_image_labels_list.append(tf.convert_to_tensor(data_list['encode_gt_labels_list'][i][im_id:(im_id+1),:,:,:]))
        test_image_bboxes_list.append(tf.convert_to_tensor(data_list['encode_gt_bboxes_list'][i][im_id:(im_id+1),:,:,:,:]))
        test_image_scores_list.append(tf.convert_to_tensor(data_list['encode_gt_scores_list'][i][im_id:(im_id+1),:,:,:]))

        #one hot encode the labels classes
        temp = data_list['encode_gt_labels_list'][i][im_id:(im_id+1),:,:,:]
        temp = tf.one_hot(indices=temp, depth=num_of_classes) #6=num_classes
        decode_gt_labels_list.append(temp)

    # print(test_image_bboxes_list[0])
    # print(test_image_labels_list[0])
    # print(test_image_scores_list[0])
    # print(decode_gt_labels_list[0])

    # Instantiate SSDNet class with basic parameters for the SSDNet
    # And Generate anchor boxes
    ssd_net = ssd_vgg_512.SSDNet()
    ssd_shape = ssd_net.params.img_shape #HxW
    ssd_anchors = ssd_net.anchors(ssd_shape) #compute the default anchor boxes given the img_shape(i.e given the features dimension)

    #decode image gt bbox
    decode_gt_bboxes_list = ssd_net.bboxes_decode(test_image_bboxes_list, ssd_anchors)
    decode_gt_bboxes_list = unnormalize_predicted_bboxes(decode_gt_bboxes_list, image_shape=ssd_shape)
    rscores_tst, rbboxes_tst = ssd_net.detected_bboxes(decode_gt_labels_list, decode_gt_bboxes_list,
                                               select_threshold=0.01,
                                               nms_threshold=0.45,
                                               clipping_bbox=None,
                                               top_k=400, 
                                               keep_top_k=20)
    #print(rscores_tst[1])
    with tf.Session() as sess:
        rscores_tst, rbboxes_tst = sess.run([rscores_tst, rbboxes_tst])

    #rscores_tst.keys()
    test_image = cv2.imread(image_path) #cv2 returns image as numpy array
    for i in range(1,num_of_classes):
        for j in range(3):
            #print(rscores_tst[i])
            #print(rbboxes_tst[i])
            #print()
            if(rscores_tst[i][0][j] > 0.25):
                y_min = rbboxes_tst[i][0][j][0]
                x_min = rbboxes_tst[i][0][j][1]
                y_max = rbboxes_tst[i][0][j][2]
                x_max = rbboxes_tst[i][0][j][3]
                cv2.rectangle(test_image, (ir(x_min), ir(y_min)), (ir(x_max), ir(y_max)),(0, 255, 0))
                disp_text = map_int_to_label(i) + ' :' + str(rscores_tst[i][0][j])
                cv2.putText(test_image, disp_text, (ir(x_min), ir(y_min)), font, 0.3, (0,0,255), 1)

    print('\nEncoded/Decoded BBox from SSD Training Data')
    plt.imshow(cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB))
    plt.show()
    cv2.imwrite('delete_encoded_decoded_bbox_testing/image_' + str(im_id) + '_ssd_data_encode_decode.png', test_image)

In [None]:
rbb.keys()
print(rbb[3])
print()

#print(rbb[3][0,1,:])
rsc[1]
rsc[1].shape

In [None]:
#FOR DEBUG ONLY (Regular non-SSD datalist)
#To compare the encode/decode function
#This module loads an original image and displays the boudning boxes on it
#Then it encodes and decodes the bboxes and displays it
font = cv2.FONT_HERSHEY_SIMPLEX #font for displaying the label
data_list = yaml.load(open(mini_training_val_data_yaml, 'rb').read())
#im_id = 0
for im_id in range(32):
    image_dict = data_list[im_id]
    image_path = image_dict['path']
    test_image = cv2.imread(image_path) #cv2 returns image as numpy array
    test_image_bboxes = image_dict['boxes']
    for box in test_image_bboxes:
        x_min = box['x_min']
        y_min = box['y_min']
        x_max = box['x_max']
        y_max = box['y_max']
        cv2.rectangle(test_image, (ir(x_min), ir(y_min)), (ir(x_max), ir(y_max)),(0, 255, 0))
        cv2.putText(test_image, box['label'], (ir(box['x_min']), ir(box['y_min'])), font, 0.3, (0,0,255), 1)

    print('Original BBox')
    plt.imshow(cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB))
    plt.show()
    cv2.imwrite('encoded_decoded_bbox_testing/image1.png', test_image)

    #encode and decode the image
    rbboxes_tst,rscores_tst = debug_encode_decode_image(image_dict)

    #rscores_tst.keys()
    test_image = cv2.imread(image_path) #cv2 returns image as numpy array
    for i in range(1,num_of_classes):
        for j in range(3):
            #print(rscores_tst[i])
            #print(rbboxes_tst[i])
            #print()
            if(rscores_tst[i][0][j] > 0.25):
                y_min = rbboxes_tst[i][0][j][0]
                x_min = rbboxes_tst[i][0][j][1]
                y_max = rbboxes_tst[i][0][j][2]
                x_max = rbboxes_tst[i][0][j][3]
                cv2.rectangle(test_image, (ir(x_min), ir(y_min)), (ir(x_max), ir(y_max)),(0, 255, 0))
                disp_text = map_int_to_label(i) + ' :' + str(rscores_tst[i][0][j])
                cv2.putText(test_image, disp_text, (ir(x_min), ir(y_min)), font, 0.3, (0,0,255), 1)

    print('Encoded/Decoded BBox')
    plt.imshow(cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB))
    plt.show()
    cv2.imwrite('delete_encoded_decoded_bbox_testing/image_' + str(im_id) + '_ssd_data_encode_decode.png', test_image)
    #print()

In [None]:
rscores_tst[5]

In [None]:
rscores_tst[1].shape

In [None]:
rsc[1].shape

In [None]:
rsc[5]

In [None]:
rbb[5][0,0,:]

In [None]:
#For efficiency: Do as much of the processing in Tensorflow as possible
#No need to use anything more accurate than tf.float32


tf.reset_default_graph() #Remove all existing variables in the graph

#Preprocess the Ground Truth Data


In [None]:
%reset -f #to reset memory