# Convert Bosch Labels to keras_ssd7 repo format
Just create the conversion once rather than doing some other glue logic. This should make the keras_ssd7 model *just work*.

In [1]:
import keras
import pickle
import numpy as np
from sklearn.model_selection import train_test_split
import math
from sklearn import preprocessing
import yaml
import sys

root = '/run/user/1000/gvfs/smb-share:server=j3-ms-7721,share=datasets/TrafficLights_Bosch/train'

test_yaml = root + '/train.yaml'
with open(test_yaml, 'r') as stream:
    try:
        dta = yaml.load(stream)
    except yaml.YAMLError as exc:
        print(exc)

print('Loaded the yaml data')



Using TensorFlow backend.


Loaded the yaml data


In [2]:
from random import shuffle
#compiled_list format is 'image_name,xmin,xmax,ymin,ymax,class_id\n'
compiled_list=[]
label_dict = {}
all_labels = []

def getBoundingBoxes(entry):
    boxDict = entry['boxes']
    rect = []
    for b in boxDict:
        x_min = b['x_min']
        x_max = b['x_max']
        y_min = b['y_min']
        y_max = b['y_max']
        rect.append([(math.floor(x_min),math.ceil(x_max)),(math.ceil(y_min),math.floor(y_max))])
    return rect
        
def getLabels(entry):
    boxDict = entry['boxes']
    labels = []
    for b in boxDict:
        labels.append(b['label'])
    return labels


counter = 1
debug = False
for entry in dta:
    counter += 1
    
    # Pull out the box and the path of the image
    boxDict = entry['boxes']
    path = entry['path']
    
    # If there are no boxes, skip the image
    if len(boxDict) == 0:
        continue

    rect = getBoundingBoxes(entry)
    labels = getLabels(entry)
    
    for bx, lbl in zip(rect,labels):
        # Pull out the indices in the correct order
        indices = ''
        for b in bx:
            indices += str(b[0]) + ',' + str(b[1]) + ','
        # Add the labels to a list
        all_labels.append(lbl)
        # Add the path and indices to the list, not the labels yet
        compiled_list.append('"'+ path+'"' + ',' + indices)
    
# Go and add the integer id for each label now
label_id = set(all_labels)
print(label_id)
label_dt = {}
for lid, idx in zip(label_id,range(1, len(label_id)+1)):
    label_dt[lid] = idx
print('Label to ID mapping', label_dt)

for idx,l in zip(range(len(compiled_list)),all_labels):
    compiled_list[idx] += str(label_dt[l]) + '\n'

# Shuffle the neatly ordered dataset (prior to splitting between test and train)
shuffle(compiled_list)
    
print('Gathered ',len(compiled_list), 'boxes')
print("Finised with this cell")   
with open(root+'/train_labels.csv', 'w') as f:
    for idx in range(len(compiled_list)-3000):        
        f.write(compiled_list[idx])
        
with open(root+'/val_labels.csv', 'w') as f:
    for idx in range(len(compiled_list)-3000, len(compiled_list)):        
        f.write(compiled_list[idx])

{'GreenLeft', 'Red', 'RedLeft', 'Yellow', 'GreenRight', 'RedStraight', 'Green', 'GreenStraightLeft', 'RedRight', 'GreenStraightRight', 'GreenStraight', 'RedStraightLeft', 'off'}
Label to ID mapping {'GreenLeft': 1, 'Red': 2, 'RedLeft': 3, 'Yellow': 4, 'GreenRight': 5, 'RedStraight': 6, 'Green': 7, 'GreenStraightLeft': 8, 'RedRight': 9, 'GreenStraightRight': 10, 'GreenStraight': 11, 'RedStraightLeft': 12, 'off': 13}
Gathered  10756 boxes
Finised with this cell


In [8]:
import itertools
import operator
from random import shuffle
from numpy import array
from numpy import argmax
from keras.utils import to_categorical


from sklearn.preprocessing import LabelBinarizer



#compiled_list format is 'image_name,xmin,xmax,ymin,ymax,class_id\n'
compiled_list=[]
label_dict = {}
all_labels = []

def getBoundingBoxes(entry, width, height):
    boxDict = entry['boxes']
    rect = []
    for b in boxDict:
        x_min = b['x_min']
        x_max = b['x_max']
        y_min = b['y_min']
        y_max = b['y_max']
        rect.append([x_min/width,y_min/height,x_max/width,y_max/height])
    return rect
        
def getLabels(entry):
    boxDict = entry['boxes']
    labels = []
    for b in boxDict:
        labels.append(b['label'])
    return labels


counter = 1
debug = False
allData = {}
boxes = []
for entry in dta:
    counter += 1
    
    # Pull out the box and the path of the image
    boxDict = entry['boxes']
    path = entry['path']
    
    # If there are no boxes, skip the image
    if len(boxDict) == 0:
        continue
    new_labels= []
    new_boxes = []
    rect = getBoundingBoxes(entry, 1280, 720)
    labels = getLabels(entry)

    
    for box, lbl in zip(rect,labels):
        # Add the labels to a list
        all_labels.append(lbl)
        new_labels.append(lbl)
        new_boxes.append(box)

    allData[path]=(new_boxes)

    
print(allData['./rgb/train/2017-02-03-11-44-56_los_altos_mountain_view_traffic_lights_bag/207386.png'])
# Go and add the integer id for each label now
label_id = set(all_labels)
label_dt = {}
for lid, idx in zip(label_id,range(1, len(label_id)+1)):
    label_dt[lid] = idx
print('Label to ID mapping', label_dt)

encoder = LabelBinarizer()
transfomed_label = encoder.fit_transform(all_labels)

combined_data_set = {}
index = 0
for key in allData.keys():
    tmp_lst = []
    for box in allData[key]:
        entry = list(itertools.chain.from_iterable([box,transfomed_label[index]]))
        tmp_lst.append(entry)
        index += 1
    combined_data_set[key] = np.asarray(tmp_lst)

print(len(combined_data_set))

with open(root+'/train_labels.pkl', 'wb') as handle:
    pickle.dump(combined_data_set, handle, protocol=pickle.HIGHEST_PROTOCOL)


[[0.47705078125, 0.48819444444444443, 0.4810546875, 0.49809027777777776], [0.49521484375, 0.4753472222222222, 0.49853515625, 0.4875], [0.507421875, 0.48663194444444446, 0.51171875, 0.5010416666666667]]
Label to ID mapping {'RedLeft': 1, 'RedStraightLeft': 2, 'Yellow': 3, 'GreenRight': 4, 'RedRight': 5, 'GreenLeft': 6, 'GreenStraightLeft': 7, 'GreenStraight': 8, 'RedStraight': 9, 'Green': 10, 'off': 11, 'Red': 12, 'GreenStraightRight': 13}
3153
