In [None]:
%matplotlib inline

import os, os.path, sys, logging
from functools import reduce
import re, random
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import scipy.ndimage as ndimage
import cv2
import yaml
import time

## Get Dataset

In [None]:
# Get the training/test set information
# (adapted from https://github.com/bosch-ros-pkg/bstld/blob/master/read_label_file.py)
def get_all_labels(input_yaml, riib=False):
    """ Gets all labels within label file
    Note that RGB images are 1280x720 and RIIB images are 1280x736.
    :param input_yaml: Path to yaml file
    :param riib: If True, change path to labeled pictures
    :return: images: Labels for traffic lights
    """
    if not len(input_yaml):
        return []

    images = yaml.load(open(input_yaml, 'rb').read())

    for i in range(len(images)):
        images[i]['path'] = os.path.abspath(os.path.join(os.path.dirname(input_yaml), images[i]['path']))
        if riib:
            images[i]['path'] = images[i]['path'].replace('.png', '.pgm')
            images[i]['path'] = images[i]['path'].replace('rgb/train', 'riib/train')
            images[i]['path'] = images[i]['path'].replace('rgb/test', 'riib/test')
            for box in images[i]['boxes']:
                box['y_max'] = box['y_max'] + 8
                box['y_min'] = box['y_min'] + 8
    return images

In [None]:
def split_path_labels(dataset, paths, labels):
    for data in dataset:
        paths.append(data['path'])

        # Determine label
        if len(data['boxes']) == 0:
            labels.append('NoLight')
        else:
            labels.append(data['boxes'][0]['label'])

In [None]:
# Split into paths and labels
paths = []
labels = []

## Paths
simulator_yaml = ""
rosbag_yaml = ""
bosch_yaml = ""

## SIMULATOR DATASET
simulator_dataset = get_all_labels(simulator_yaml)
split_path_labels(simulator_dataset, paths, labels)

## ROSBAG DATASET
rosbag_dataset = get_all_labels(rosbag_yaml)
split_path_labels(rosbag_dataset, paths, labels)

## BOSCH DATASET
## (from https://hci.iwr.uni-heidelberg.de/node/6132)
bosch_dataset = get_all_labels(bosch_yaml)

# Add data from Bosch dataset
for data in bosch_dataset:    
    # Determine label
    num_boxes = len(data['boxes'])
    if num_boxes == 0:
        labels.append('NoLight')
        paths.append(data['path'])
    else:
        # Check boxes (only select Red, Green, Yellow, and non-conflicting images)
        target_label = ""
        for box in data['boxes']:
            # Simplify labels (e.g. RedLeft -> Red)
            label_name = box['label']
            if label_name[0] == "R":
                label_name = "Red"
            elif label_name[0] == "G":
                label_name = "Green"
            elif label_name[0] == "Y":
                label_name = "Yellow"
            else:
                target_label = ""
                break
                
            # Set target label
            if not len(target_label):
                target_label = label_name
            
            # Compare against target
            if target_label != label_name:
                target_label = ""
                break
                
        if len(target_label):
            labels.append(target_label)
            paths.append(data['path'])

## Create Train, Validation, and Test Sets

In [None]:
# Train, validation, and test sets
# Split ratio is 7:3:1
x_train, x_not_train, y_train, y_not_train = train_test_split(paths, labels, test_size=0.3)
x_validation, x_test, y_validation, y_test = train_test_split(x_not_train, y_not_train, test_size=0.33)

print("Train size: {}, Validation size: {}, Test size: {}".format( \
    len(x_train), len(x_validation), len(x_test))) 
print("All labels in training set:", set(y_train))
print("All labels in validation set:", set(y_validation))
print("All labels in test set:", set(y_test))

In [None]:
# Label counts in training set - Needed for oversampling
label_counts = {}
label_indices = {}
for i, label in enumerate(y_train):
    if label not in label_counts:
        label_counts[label] = 0
        label_indices[label] = []
    label_counts[label] += 1
    label_indices[label].append(i)
print(label_counts)
print()
print("Indices with Yellow label:", label_indices['Yellow'])

## Oversample Training Set

In [None]:
# Determine label seen the most
max_count = 0
for label in label_counts:
    max_count = max(max_count, label_counts[label])

# Oversample
oversampled_train_indices = []
for label in label_indices:
    random.shuffle(label_indices[label])
    
    new_label_set = []
    new_label_set += label_indices[label] * int(max_count / len(label_indices[label]))
    new_label_set += label_indices[label][0:max_count % len(label_indices[label])]
    
    oversampled_train_indices += new_label_set
    print("Label {} now has {} indices".format(label, len(new_label_set)))
    
target_indices = oversampled_train_indices
#target_indices = list(range(len(x_train)))

# Create Generator

In [None]:
def loadImage(imgPath):
    img = cv2.imread(imgPath)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = np.array(cv2.resize(img, (300, 300))) # resize and convert to np.array
    img = img / 128 - 1.0 # normalize the image
    return img

# One Hot Encoder Mapping
label_ohe_map = {
    'NoLight': [1, 0, 0, 0],
    'Red': [0, 1, 0, 0],
    'Yellow': [0, 0, 1, 0], 
    'Green': [0, 0, 0, 1]
}

## Create Generator
NUM_AUGMENTATIONS = 5
def createBatchGenerator(indices, x, y, train=False):
    def batchGenerator(batch_size):
        for i in range(0, NUM_AUGMENTATIONS):
            random.shuffle(indices)
            for batch_i in range(0, len(indices), batch_size):
                images = []
                labels = []

                for index in indices[batch_i:batch_i + batch_size]:
                    image = loadImage(x[index])
                    if i == 0:
                        # Regular images
                        images.append(image)
                    elif i == 1:
                        # Augment data (flip images along vertical axis)
                        # (adapted from Udacity SDC-ND Behavioral Cloning Project)
                        images.append(cv2.flip(image, 1))
                    elif i == 2:
                        # Augment data (add normally-distributed noise)
                        # (adapted from Udacity SDC-ND Traffic Sign Classification Project (Term 1))
                        image = image + np.random.normal(loc = 0, scale = 0.4,
                                                         size = (image.shape[0], image.shape[1], image.shape[2]))
                        images.append(image)
                    elif i == 3:
                        # Augment data (rotate: +15°)
                        # (adapted from Udacity SDC-ND Traffic Sign Classification Project (Term 1), https://stackoverflow.com/a/19167666)
                        images.append(ndimage.rotate(image, 15, reshape=False))
                    elif i == 4:
                        # Augment data (rotate: -15°)
                        # (adapted from Udacity SDC-ND Traffic Sign Classification Project (Term 1), https://stackoverflow.com/a/19167666)
                        images.append(ndimage.rotate(image, -15, reshape=False))
                        
                    labels.append(label_ohe_map[y[index]])

                yield np.array(images), np.array(labels)

            # Training?
            if not train:
                break;
        
    return batchGenerator

In [None]:
trainingBatchGenerator = createBatchGenerator(target_indices, x_train, y_train, train=True)
validationBatchGenerator = createBatchGenerator(list(range(len(x_validation))), x_validation, y_validation)
testBatchGenerator = createBatchGenerator(list(range(len(x_test))), x_test, y_test)

## Create Network

In [None]:
# Layers
def addConv(input, filters, kernel, stride, padding, keepprob):
    conv = tf.layers.conv2d(input, filters, kernel, stride, padding=padding, \
                            kernel_initializer=tf.truncated_normal_initializer(stddev=0.01), \
                            activation=tf.nn.relu)
    batch_norm = tf.layers.batch_normalization(conv)
    dropout = tf.layers.dropout(batch_norm, keepprob)
    
    return dropout

# Networks
def createEnsembleCNNUnit(input):
    conv1 = addConv(input, 32, 3, 2, 'valid', keepprob)
    max1 = tf.layers.max_pooling2d(conv1, 2, 2, padding='valid')

    conv2 = addConv(max1, 64, 3, 2, 'valid', keepprob)
    max2 = tf.layers.max_pooling2d(conv2, 2, 2, padding='valid')

    conv3 = addConv(max2, 128, 3, 2, 'valid', keepprob)
    max3 = tf.layers.max_pooling2d(conv3, 2, 2, padding='valid')

    flat = tf.contrib.layers.flatten(max3)
    fc1 = tf.layers.dense(flat, 128, activation=tf.nn.relu)
    fc2 = tf.layers.dense(fc1, 64, activation=tf.nn.relu)
    logits = tf.layers.dense(fc2, 4, activation=tf.nn.relu)
    
    return logits

def createEnsembleCnn(input):
    unit_1 = createEnsembleCNNUnit(input)
    unit_2 = createEnsembleCNNUnit(input)
    unit_3 = createEnsembleCNNUnit(input)
    logits = tf.add(tf.add(unit_1, unit_2), unit_3, name="logits")

    return logits

def createSimpleCnn(input):
    conv1 = tf.layers.conv2d(input, 8, 5, 1, padding='valid', activation=tf.nn.relu)
    max1 = tf.layers.max_pooling2d(conv1, 2, 2, padding='valid')

    conv2 = tf.layers.conv2d(max1, 16, 5, 1, padding='valid', activation=tf.nn.relu)
    max2 = tf.layers.max_pooling2d(conv2, 2, 2, padding='valid')

    conv3 = tf.layers.conv2d(max2, 24, 5, 1, padding='valid', activation=tf.nn.relu)
    max3 = tf.layers.max_pooling2d(conv3, 2, 2, padding='valid')
    
    conv4 = tf.layers.conv2d(max3, 32, 5, 1, padding='valid', activation=tf.nn.relu)
    max4 = tf.layers.max_pooling2d(conv4, 2, 2, padding='valid')
    
    conv5 = tf.layers.conv2d(max4, 40, 5, 1, padding='valid', activation=tf.nn.relu)
    max5 = tf.layers.max_pooling2d(conv5, 2, 2, padding='valid')

    flat = tf.contrib.layers.flatten(max5)
    fc1 = tf.layers.dense(flat, 1024, activation=tf.nn.relu)
    fc1_dropout = tf.layers.dropout(fc1, keepprob)
    
    fc2 = tf.layers.dense(fc1_dropout, 256, activation=tf.nn.relu)
    fc2_dropout = tf.layers.dropout(fc2, keepprob)
    
    fc3 = tf.layers.dense(fc2_dropout, 64, activation=tf.nn.relu)
    fc3_dropout = tf.layers.dropout(fc3, keepprob)

    logits = tf.layers.dense(fc3_dropout, 4)
    
    return logits

def createSimpleCnnSmall(input):
    conv1 = tf.layers.conv2d(input, 6, 5, 1, padding='valid', activation=tf.nn.relu)
    max1 = tf.layers.max_pooling2d(conv1, 2, 2, padding='valid')

    conv2 = tf.layers.conv2d(max1, 8, 5, 1, padding='valid', activation=tf.nn.relu)
    max2 = tf.layers.max_pooling2d(conv2, 2, 2, padding='valid')

    conv3 = tf.layers.conv2d(max2, 10, 5, 1, padding='valid', activation=tf.nn.relu)
    max3 = tf.layers.max_pooling2d(conv3, 2, 2, padding='valid')
    
    conv4 = tf.layers.conv2d(max3, 12, 5, 1, padding='valid', activation=tf.nn.relu)
    max4 = tf.layers.max_pooling2d(conv4, 2, 2, padding='valid')
    
    conv5 = tf.layers.conv2d(max4, 14, 5, 1, padding='valid', activation=tf.nn.relu)
    max5 = tf.layers.max_pooling2d(conv5, 2, 2, padding='valid')

    flat = tf.contrib.layers.flatten(max5)
    fc1 = tf.layers.dense(flat, 48, activation=tf.nn.relu)
    fc1_dropout = tf.layers.dropout(fc1, keepprob)
    
    fc2 = tf.layers.dense(fc1_dropout, 16, activation=tf.nn.relu)
    fc2_dropout = tf.layers.dropout(fc2, keepprob)

    logits = tf.layers.dense(fc2_dropout, 4)
    
    return logits

def createSimpleCnn3(input):
    conv1 = tf.layers.conv2d(input, 6, 3, 1, padding='valid', activation=tf.nn.relu)
    max1 = tf.layers.max_pooling2d(conv1, 2, 2, padding='valid')

    conv2 = tf.layers.conv2d(max1, 8, 3, 1, padding='valid', activation=tf.nn.relu)
    max2 = tf.layers.max_pooling2d(conv2, 2, 2, padding='valid')

    conv3 = tf.layers.conv2d(max2, 10, 3, 1, padding='valid', activation=tf.nn.relu)
    max3 = tf.layers.max_pooling2d(conv3, 2, 2, padding='valid')
    
    conv4 = tf.layers.conv2d(max3, 12, 3, 1, padding='valid', activation=tf.nn.relu)
    max4 = tf.layers.max_pooling2d(conv4, 2, 2, padding='valid')
    
    conv5 = tf.layers.conv2d(max4, 14, 3, 1, padding='valid', activation=tf.nn.relu)
    batch_norm5 = tf.layers.batch_normalization(conv5)
    max5 = tf.layers.max_pooling2d(batch_norm5, 2, 2, padding='valid')

    conv6 = tf.layers.conv2d(max5, 16, 3, 1, padding='valid', activation=tf.nn.relu)
    batch_norm6 = tf.layers.batch_normalization(conv6)
    max6 = tf.layers.max_pooling2d(batch_norm6, 2, 2, padding='valid')
        
    flat = tf.contrib.layers.flatten(max6)
    fc1 = tf.layers.dense(flat, 64, activation=tf.nn.relu)
    fc1_dropout = tf.layers.dropout(fc1, keepprob)
    
    fc2 = tf.layers.dense(fc1_dropout, 16, activation=tf.nn.relu)
    fc2_dropout = tf.layers.dropout(fc2, keepprob)

    logits = tf.layers.dense(fc2_dropout, 4)
    
    return logits

In [None]:
# Clear old graphs
tf.reset_default_graph()

# Input Placeholders
images = tf.placeholder(tf.float32, (None, 300, 300, 3), name='input_images')
labels = tf.placeholder(tf.float32, (None, 4), name='labels')
keepprob = tf.placeholder(tf.float32, name='keep_probability')
learningrate = tf.placeholder(tf.float32, name='learning_rate')

# Init network
#logits = createEnsembleCnn(images)
#logits = createSimpleCnn(images)
#logits = createSimpleCnnSmall(images)
logits = createSimpleCnn3(images)
output = tf.nn.softmax(logits, name="output")

# Create Optimizer
softmax_losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels)
loss = tf.reduce_mean(softmax_losses)
optimizer = tf.train.AdamOptimizer(learningrate).minimize(loss)
correct_prediction = tf.equal(tf.argmax(labels, 1), tf.argmax(output, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

## Train Network

In [None]:
# Hyperparameters
epochs = 50
batch_size = 32
lr = 5e-4
kp = 0.6

In [None]:
# Train Network
sess = tf.Session()
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())

print("Training...")
for e in range(epochs):
    t_start = time.time()

    # Train Network
    total_loss = 0
    total_accuracy = 0
    for train_images, train_labels in trainingBatchGenerator(batch_size):
        _, loss_val, acc = sess.run([optimizer, loss, accuracy], {images: train_images, labels: train_labels, \
                                                                  keepprob: kp, learningrate: lr})
        total_loss += loss_val
        total_accuracy += acc * train_images.shape[0]

    # Determine Validation Accuracy
    validation_loss = 0
    validation_acc = 0
    for valid_images, valid_labels in validationBatchGenerator(batch_size):
        loss_val, acc = sess.run([loss, accuracy], {images: valid_images, labels: valid_labels, keepprob: 1.0})
        validation_loss += loss_val
        validation_acc += acc * valid_images.shape[0]
    t_end = time.time()

    print("Epoch {}/{}, Time {:.2f} s, TrainAccuracy: {:.2f} %, ValAccuracy: {:.2f} %, TrainLoss: {:.2f}, ValLoss: {:.2f}"\
      .format(e+1, epochs, t_end - t_start, total_accuracy * 100 / len(target_indices) / NUM_AUGMENTATIONS, \
              validation_acc * 100 / len(x_validation), total_loss / NUM_AUGMENTATIONS, validation_loss))
    
    # Save Network
    saver = tf.train.Saver()
    saver.save(sess, './checkpoints/tl-classifier-%i.ckpt' % e)

## Test Network

In [None]:
saver = tf.train.Saver()
saver.restore(sess, './checkpoints/tl-classifier-1.ckpt')

# Print network
for n in tf.get_default_graph().as_graph_def().node:
    print("%s" % n.name)

In [None]:
batch_size = 32

# Test Accuracy
total_loss = 0
total_acc = 0
for test_images, test_labels in testBatchGenerator(batch_size):
    loss_val, acc = sess.run([loss, accuracy], {images: test_images, labels: test_labels, keepprob: 1.0})
    total_loss += loss_val
    total_acc += acc * test_images.shape[0]

print("Test Loss: {:.5f}, Accuracy: {:.2f}%".format(total_loss, total_acc * 100 / len(x_test)))

## Freeze/Optimize Graph

In [None]:
# Adapted from https://stackoverflow.com/a/45493657

# Save GraphDef
tf.train.write_graph(sess.graph_def,'.','model/tl-classifier.pb')
# Save checkpoint
saver.save(sess=sess, save_path="model/tl-classifier")

# Freeze graph
from tensorflow.python.tools.freeze_graph import freeze_graph
freeze_graph("model/tl-classifier.pb", "", False, "model/tl-classifier", "output",
    "save/restore_all", "save/Const:0", "model/tl-classifier-frozen.pb", True, "", "")

In [None]:
# Optimize for inference
# (adapted from tensorflow/python/tools/optimize_for_inference.py, https://stackoverflow.com/a/45493657)
from tensorflow.python.framework import dtypes
from tensorflow.core.framework import graph_pb2
from tensorflow.python.platform import gfile
from tensorflow.python.tools import optimize_for_inference_lib

# Read frozen model
input_graph_def = graph_pb2.GraphDef()
with gfile.Open("model/tl-classifier-frozen.pb", "rb") as f:
    data = f.read()
    input_graph_def.ParseFromString(data)

# Optimize graph for inference
output_graph_def = optimize_for_inference_lib.optimize_for_inference(
    input_graph_def, ["input_images"], ["output"], dtypes.float32.as_datatype_enum)

# Export optimized graph to file
with gfile.FastGFile("model/tl-classifier-frozen-opt.pb", "w") as f:
    f.write(output_graph_def.SerializeToString())

# Print optimized graph
for op in output_graph_def.node:
    print("%s" % op.name)

## Visually Confirm Working Network

In [None]:
labels_reversed = ['NoLight', 'Red', 'Yellow', 'Green']

# Set image size
plt.rcParams['figure.figsize'] = (20.0, 10.0)

# Load frozen graph (adapted from https://stackoverflow.com/a/45493657)
with tf.gfile.GFile('model/tl-classifier-frozen-opt.pb', 'rb') as f:
    graph_def_optimized = tf.GraphDef()
    graph_def_optimized.ParseFromString(f.read())

# Create session (adapted from https://stackoverflow.com/a/45493657)
G = tf.Graph()
with tf.Session(graph=G) as sess:
    image_tensor, output = tf.import_graph_def(graph_def_optimized, return_elements=['input_images:0', 'output:0'])
    
    # Init global variables
    tf.global_variables_initializer().run()

    # Show misclassified images
    print("Number of tests: %i" % len(x_test))
    for i, (img_path, label) in enumerate(zip(x_test, y_test)):
        # Load Image
        test_img = loadImage(img_path)

        # Run Classification
        prob = sess.run(output, feed_dict={image_tensor: [test_img]})[0]
        idx = np.argmax(prob)

        # Check
        if labels_reversed[idx] != label:
            # Print Information
            print("Mismatch (%i): %s != %s %s" % (i, labels_reversed[idx], label, np.array_str(prob)))

            # Print image
            plt.imshow(cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB))
            plt.show()