# Deep Learning for Autonomous Vehicles
**Module 2: Sub-project 1: Traffic Light Prediction**
* Student: Dan Sullivan, Spr 2019

## Define Libraries and Functions

In [3]:
import numpy as np  
import matplotlib.pyplot as plt  
import pandas as pd
import shutil
import os
import requests
import base64

## Define Paths for Annotation CSVs and YAML Files
Information will be used to create the CSVs

In [4]:
train_yamlpath = 'C:/Users/Dan/Documents/traffic_light_data/dataset_train_riib/train.yaml'
train_riib_csv = 'C:/Users/Dan/Documents/traffic_light_data/train_riib.csv'
train_rgb_csv = 'C:/Users/Dan/Documents/traffic_light_data/train_rgb.csv'
train_rgb_colab_csv = 'C:/Users/Dan/Documents/traffic_light_data/train_rgb_colab.csv'

test_yamlpath = 'C:/Users/Dan/Documents/traffic_light_data/dataset_test_riib/test.yaml'
test_riib_csv = 'C:/Users/Dan/Documents/traffic_light_data/test_riib.csv'
test_rgb_csv = 'C:/Users/Dan/Documents/traffic_light_data/test_rgb.csv'
test_rgb_colab_csv = 'C:/Users/Dan/Documents/traffic_light_data/test_rgb_colab.csv'

## Define Functions for Processing the YAML Files Into CSVs

In [20]:
# Code based on Bosch Sample script to receive traffic light labels and images

import sys
import yaml

WIDTH = 1280
HEIGHT = 736

def get_all_images(input_yaml):
    
    # returns file names of all images in input_yaml
    
    assert os.path.isfile(input_yaml), "Input yaml {} does not exist".format(input_yaml)
    with open(input_yaml, 'rb') as iy_handle:
        images = yaml.load(iy_handle)

    if not images or not isinstance(images[0], dict) or 'path' not in images[0]:
        raise ValueError('Something seems wrong with this label-file: {}'.format(input_yaml))

    imgOut = [0 for x in range(len(images))] 
    for i in range(len(images)):
        images[i]['path'] = os.path.abspath(os.path.join(os.path.dirname(input_yaml),images[i]['path']))
        images[i]['path'] = images[i]['path'].replace('.png', '.pgm')    
        imgOut[i] = os.path.basename(images[i]['path'])     
        
    return imgOut

def get_all_labels(input_yaml, riib, clip, colab):
    
    box_count = 0
    """ Gets all labels within label file

    Note that RGB images are 1280x720 and RIIB images are 1280x736.
    Args:
        input_yaml->str: Path to yaml file
        riib->bool: If True, change path to labeled pictures
        clip->bool: If True, clips boxes so they do not go out of image bounds
    Returns: Labels for traffic lights
    """
    assert os.path.isfile(input_yaml), "Input yaml {} does not exist".format(input_yaml)
    with open(input_yaml, 'rb') as iy_handle:
        images = yaml.load(iy_handle)

    if not images or not isinstance(images[0], dict) or 'path' not in images[0]:
        raise ValueError('Something seems wrong with this label-file: {}'.format(input_yaml))

    for i in range(len(images)):
        images[i]['path'] = os.path.abspath(os.path.join(os.path.dirname(input_yaml),images[i]['path']))
        images[i]['path'] = images[i]['path'].replace('\\', '/')

        # There is (at least) one annotation where xmin > xmax
        for j, box in enumerate(images[i]['boxes']):
            box_count += 1
            if box['x_min'] > box['x_max']:
                images[i]['boxes'][j]['x_min'], images[i]['boxes'][j]['x_max'] = (
                    images[i]['boxes'][j]['x_max'], images[i]['boxes'][j]['x_min'])
            if box['y_min'] > box['y_max']:
                images[i]['boxes'][j]['y_min'], images[i]['boxes'][j]['y_max'] = (
                    images[i]['boxes'][j]['y_max'], images[i]['boxes'][j]['y_min'])

        # There is (at least) one annotation where xmax > 1279
        if clip:
            for j, box in enumerate(images[i]['boxes']):
                images[i]['boxes'][j]['x_min'] = max(min(box['x_min'], WIDTH - 1), 0)
                images[i]['boxes'][j]['x_max'] = max(min(box['x_max'], WIDTH - 1), 0)
                images[i]['boxes'][j]['y_min'] = max(min(box['y_min'], HEIGHT - 1), 0)
                images[i]['boxes'][j]['y_max'] = max(min(box['y_max'], HEIGHT - 1), 0)

                
        # the test images have a web address that needs to be corrected
        test_str = '/net/pal-soc1.us.bosch.com/ifs/data/Shared_Exports/deep_learning_data/traffic_lights/university_run1/'
        local_riib_test = '/Users/Dan/Documents/traffic_light_data/dataset_test_riib/riib/test/'        
        local_rgb_test = '/Users/Dan/Documents/traffic_light_data/dataset_test_rgb/rgb/test/'
        colab_rgb_test = '/content/gdrive/My Drive/AI/dataset_test_rgb/rgb/test/'        
                
        # The raw images have additional lines with image information
        # so the annotations need to be shifted. Since they are stored in a different
        # folder, the path also needs modifications.
        if riib:
            images[i]['path'] = images[i]['path'].replace('.png', '.pgm')
            images[i]['path'] = images[i]['path'].replace('rgb/train', 'riib/train')
            images[i]['path'] = images[i]['path'].replace('rgb/test', 'riib/test')
            images[i]['path'] = images[i]['path'].replace(test_str, local_riib_test)
            for box in images[i]['boxes']:
                box['y_max'] = box['y_max'] + 8
                box['y_min'] = box['y_min'] + 8
        else:
            images[i]['path'] = images[i]['path'].replace('train_riib', 'train_rgb')
            images[i]['path'] = images[i]['path'].replace(test_str, local_rgb_test)
        
        if colab:
            images[i]['path'] = images[i]['path'].replace('C:/Users/Dan/Documents/traffic_light_data/', '/content/gdrive/My Drive/AI/')           
            images[i]['path'] = images[i]['path'].replace(test_str, colab_rgb_test)
            
    dataOut = [[0 for x in range(8)] for y in range(box_count)] 
    
    box_count = 0
    for i in range(len(images)):
        # There is (at least) one annotation where xmin > xmax
        for j, box in enumerate(images[i]['boxes']):
            dataOut[box_count][0] = images[i]['path']
            dataOut[box_count][1] = os.path.basename(images[i]['path'])
            dataOut[box_count][2] = images[i]['boxes'][j]['label']
            dataOut[box_count][3] = images[i]['boxes'][j]['occluded']
            dataOut[box_count][4] = images[i]['boxes'][j]['x_min']
            dataOut[box_count][5] = images[i]['boxes'][j]['x_max']
            dataOut[box_count][6] = images[i]['boxes'][j]['y_min']
            dataOut[box_count][7] = images[i]['boxes'][j]['y_max']
            box_count +=1
    
    return dataOut

## See If All Image Names Are Unique
If they are (i.e. no file name has an instance count greater than 1), than the file name can be used as the identifier for each annotated box.

Results:
Max occurence is 1 for both training and testing, so using basename

In [4]:
# Evaluate Training Images
train_imgList = get_all_images(train_riib_yamlpath)
df_trainImg = pd.DataFrame(train_imgList, columns = ['fname'])
print('\nValue Counts of Training Images:')
vs_train = df_trainImg['fname'].value_counts()
print(vs_train)

# Evaluate Testing Images
test_imgList = get_all_images(test_riib_yamlpath)
df_testImg = pd.DataFrame(test_imgList, columns = ['fname'])
print('\nValue Counts of Testing Images:')
vs_test = df_testImg['fname'].value_counts()
print(vs_test)


Value Counts of Training Images:
128072.pgm    1
9142.pgm      1
521660.pgm    1
479916.pgm    1
122980.pgm    1
217722.pgm    1
458684.pgm    1
475882.pgm    1
592042.pgm    1
526882.pgm    1
477480.pgm    1
131602.pgm    1
507178.pgm    1
507734.pgm    1
527370.pgm    1
122008.pgm    1
124996.pgm    1
210440.pgm    1
138292.pgm    1
712628.pgm    1
659178.pgm    1
559916.pgm    1
223212.pgm    1
573646.pgm    1
89540.pgm     1
96106.pgm     1
533136.pgm    1
518198.pgm    1
611648.pgm    1
117026.pgm    1
             ..
12662.pgm     1
678682.pgm    1
514934.pgm    1
624696.pgm    1
138362.pgm    1
115636.pgm    1
527160.pgm    1
28596.pgm     1
138222.pgm    1
524994.pgm    1
539494.pgm    1
682432.pgm    1
629294.pgm    1
521938.pgm    1
36414.pgm     1
42246.pgm     1
562000.pgm    1
93536.pgm     1
676528.pgm    1
585512.pgm    1
29906.pgm     1
525828.pgm    1
637746.pgm    1
207642.pgm    1
142252.pgm    1
218248.pgm    1
464920.pgm    1
623296.pgm    1
211094.pgm    1
124508

## Convert YAMLs into riib CSVs
Each row will be one annotated box

In [19]:
col_names = ['img_path','fname','label','occluded','x_min','x_max','y_min','y_max']

# Process Training YAML
train_data = get_all_labels(train_yamlpath, riib=True, clip=True, colab=False)
df_train = pd.DataFrame(train_data, columns = col_names)
#df_train.drop('img_path',1,inplace=True)
# Round the x/y min, max values to integers (pixels)
df_train = df_train.round({'x_min': 0, 'x_max': 1, 'y_min': 0, 'y_max': 0})
df_train['x_min'] = df_train['x_min'].astype(int)
df_train['x_max'] = df_train['x_max'].astype(int)
df_train['y_min'] = df_train['y_min'].astype(int)
df_train['y_max'] = df_train['y_max'].astype(int)
print('\nTraining Dataframe Sample:\n',df_train[0:5])
df_train.to_csv(train_riib_csv, index=False, header=False)

# Process Testing YAML
test_data = get_all_labels(test_yamlpath, riib=True, clip=True, colab=False)
df_test = pd.DataFrame(test_data, columns = col_names)
#df_test.drop('img_path',1,inplace=True)
# Round the x/y min, max values to integers (pixels)
df_test = df_test.round({'x_min': 0, 'x_max': 0, 'y_min': 0, 'y_max': 0})
df_test['x_min'] = df_test['x_min'].astype(int)
df_test['x_max'] = df_test['x_max'].astype(int)
df_test['y_min'] = df_test['y_min'].astype(int)
df_test['y_max'] = df_test['y_max'].astype(int)
print('\nTesting Dataframe Sample:\n',df_test[0:5])
df_test.to_csv(test_riib_csv, index=False, header=False)



Training Dataframe Sample:
                                             img_path       fname    label  \
0  C:/Users/Dan/Documents/traffic_light_data/data...  207386.pgm   Yellow   
1  C:/Users/Dan/Documents/traffic_light_data/data...  207386.pgm   Yellow   
2  C:/Users/Dan/Documents/traffic_light_data/data...  207386.pgm   Yellow   
3  C:/Users/Dan/Documents/traffic_light_data/data...  207390.pgm  RedLeft   
4  C:/Users/Dan/Documents/traffic_light_data/data...  207390.pgm      Red   

   occluded  x_min  x_max  y_min  y_max  
0      True    611    615    360    367  
1     False    634    638    350    359  
2     False    650    655    358    369  
3      True    612    615    363    368  
4     False    632    636    354    363  

Testing Dataframe Sample:
                                             img_path      fname  label  \
0  C:/Users/Dan/Documents/traffic_light_data/data...  24068.pgm  Green   
1  C:/Users/Dan/Documents/traffic_light_data/data...  24070.pgm  Green   
2  C:/

## Convert YAMLs into rgb CSVs
Each row will be one annotated box

In [21]:
col_names = ['img_path','fname','label','occluded','x_min','x_max','y_min','y_max']

# Process Training YAML
train_data = get_all_labels(train_yamlpath, riib=False, clip=True, colab=False)
df_train = pd.DataFrame(train_data, columns = col_names)
#df_train.drop('img_path',1,inplace=True)
# Round the x/y min, max values to integers (pixels)
df_train = df_train.round({'x_min': 0, 'x_max': 1, 'y_min': 0, 'y_max': 0})
df_train['x_min'] = df_train['x_min'].astype(int)
df_train['x_max'] = df_train['x_max'].astype(int)
df_train['y_min'] = df_train['y_min'].astype(int)
df_train['y_max'] = df_train['y_max'].astype(int)
print('\nTraining Dataframe Sample:\n',df_train[0:5])
df_train.to_csv(train_rgb_csv, index=False, header=False)

# Process Testing YAML
test_data = get_all_labels(test_yamlpath, riib=False, clip=True, colab=False)
df_test = pd.DataFrame(test_data, columns = col_names)
#df_test.drop('img_path',1,inplace=True)
# Round the x/y min, max values to integers (pixels)
df_test = df_test.round({'x_min': 0, 'x_max': 0, 'y_min': 0, 'y_max': 0})
df_test['x_min'] = df_test['x_min'].astype(int)
df_test['x_max'] = df_test['x_max'].astype(int)
df_test['y_min'] = df_test['y_min'].astype(int)
df_test['y_max'] = df_test['y_max'].astype(int)
print('\nTesting Dataframe Sample:\n',df_test[0:5])
df_test.to_csv(test_rgb_csv, index=False, header=False)



Training Dataframe Sample:
                                             img_path       fname    label  \
0  C:/Users/Dan/Documents/traffic_light_data/data...  207386.png   Yellow   
1  C:/Users/Dan/Documents/traffic_light_data/data...  207386.png   Yellow   
2  C:/Users/Dan/Documents/traffic_light_data/data...  207386.png   Yellow   
3  C:/Users/Dan/Documents/traffic_light_data/data...  207390.png  RedLeft   
4  C:/Users/Dan/Documents/traffic_light_data/data...  207390.png      Red   

   occluded  x_min  x_max  y_min  y_max  
0      True    611    615    352    359  
1     False    634    638    342    351  
2     False    650    655    350    361  
3      True    612    615    355    360  
4     False    632    636    346    355  

Testing Dataframe Sample:
                                             img_path      fname  label  \
0  C:/Users/Dan/Documents/traffic_light_data/data...  24068.png  Green   
1  C:/Users/Dan/Documents/traffic_light_data/data...  24070.png  Green   
2  C:/

## Convert YAMLs into rgb CSVs for Google Colab
Each row will be one annotated box

In [22]:
col_names = ['img_path','fname','label','occluded','x_min','x_max','y_min','y_max']

# Process Training YAML
train_data = get_all_labels(train_yamlpath, riib=False, clip=True, colab=True)
df_train = pd.DataFrame(train_data, columns = col_names)
#df_train.drop('img_path',1,inplace=True)
# Round the x/y min, max values to integers (pixels)
df_train = df_train.round({'x_min': 0, 'x_max': 1, 'y_min': 0, 'y_max': 0})
df_train['x_min'] = df_train['x_min'].astype(int)
df_train['x_max'] = df_train['x_max'].astype(int)
df_train['y_min'] = df_train['y_min'].astype(int)
df_train['y_max'] = df_train['y_max'].astype(int)
print('\nTraining Dataframe Sample:\n',df_train[0:5])
df_train.to_csv(train_rgb_colab_csv, index=False, header=False)

# Process Testing YAML
test_data = get_all_labels(test_yamlpath, riib=False, clip=True, colab=True)
df_test = pd.DataFrame(test_data, columns = col_names)
#df_test.drop('img_path',1,inplace=True)
# Round the x/y min, max values to integers (pixels)
df_test = df_test.round({'x_min': 0, 'x_max': 0, 'y_min': 0, 'y_max': 0})
df_test['x_min'] = df_test['x_min'].astype(int)
df_test['x_max'] = df_test['x_max'].astype(int)
df_test['y_min'] = df_test['y_min'].astype(int)
df_test['y_max'] = df_test['y_max'].astype(int)
print('\nTesting Dataframe Sample:\n',df_test[0:5])
df_test.to_csv(test_rgb_colab_csv, index=False, header=False)



Training Dataframe Sample:
                                             img_path       fname    label  \
0  /content/gdrive/My Drive/AI/dataset_train_rgb/...  207386.png   Yellow   
1  /content/gdrive/My Drive/AI/dataset_train_rgb/...  207386.png   Yellow   
2  /content/gdrive/My Drive/AI/dataset_train_rgb/...  207386.png   Yellow   
3  /content/gdrive/My Drive/AI/dataset_train_rgb/...  207390.png  RedLeft   
4  /content/gdrive/My Drive/AI/dataset_train_rgb/...  207390.png      Red   

   occluded  x_min  x_max  y_min  y_max  
0      True    611    615    352    359  
1     False    634    638    342    351  
2     False    650    655    350    361  
3      True    612    615    355    360  
4     False    632    636    346    355  

Testing Dataframe Sample:
                                             img_path      fname  label  \
0  /content/gdrive/My Drive/AI/dataset_test_rgb/r...  24068.png  Green   
1  /content/gdrive/My Drive/AI/dataset_test_rgb/r...  24070.png  Green   
2  /co