# Bosch Dataset - Data Formatting

In [1]:
import tensorflow as tf
import yaml
import os
from object_detection.utils import dataset_util

## Categories in dataset

In [2]:
LABEL_DICT =  {
    "Green" : 1,
    "Red" : 2,
    "GreenLeft" : 3,
    "GreenRight" : 4,
    "RedLeft" : 5,
    "RedRight" : 6,
    "Yellow" : 7,
    "off" : 8,
    "RedStraight" : 9,
    "GreenStraight" : 10,
    "GreenStraightLeft" : 11,
    "GreenStraightRight" : 12,
    "RedStraightLeft" : 13,
    "RedStraightRight" : 14
    }

## Output File as TFRecord

In [3]:
OUTPUT_PATH = 'bosch_train.record'

## Formatting Data: 
Check [here](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md)

In [4]:
def create_tf_example(example):
    
    # Bosch
    height = 720 # Image height
    width = 1280 # Image width

    filename = example['path'] # Filename of the image. 
    filename = filename.encode()

    with tf.gfile.GFile(example['path'], 'rb') as fid:
        encoded_image = fid.read()

    image_format = 'png'.encode() 

    xmins = [] # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = [] # List of normalized right x coordinates in bounding box (1 per box)
    ymins = [] # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = [] # List of normalized bottom y coordinates in bounding box (1 per box)
    classes_text = [] # List of string class name of bounding box (1 per box)
    classes = [] # List of integer class id of bounding box (1 per box)

    for box in example['boxes']:
        #if box['occluded'] is False:
        xmins.append(float(box['x_min'] / width))
        xmaxs.append(float(box['x_max'] / width))
        ymins.append(float(box['y_min'] / height))
        ymaxs.append(float(box['y_max'] / height))
        classes_text.append(box['label'].encode())
        classes.append(int(LABEL_DICT[box['label']]))


    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_image),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))

    return tf_example


In [5]:
writer = tf.python_io.TFRecordWriter(OUTPUT_PATH)

# BOSCH
INPUT_YAML = "/home/camilog/Downloads/Bosch/train.yaml"
examples = yaml.load(open(INPUT_YAML, 'rb').read())

#examples = examples[:10]  # for testing
len_examples = len(examples)
print("Loaded ", len(examples), "examples")

for i in range(len(examples)):
    examples[i]['path'] = os.path.abspath(os.path.join(os.path.dirname(INPUT_YAML), examples[i]['path']))
    
counter = 0
for example in examples:
    tf_example = create_tf_example(example)
    writer.write(tf_example.SerializeToString())

    if counter % 100 == 0:
        print("Percent done", (counter/len_examples)*100)
    counter += 1

writer.close()
print("Done!")

Loaded  5093 examples
Percent done 0.0
Percent done 0.19634792852935404
Percent done 0.3926958570587081
Percent done 0.589043785588062
Percent done 0.7853917141174162
Percent done 0.98173964264677
Percent done 1.178087571176124
Percent done 1.3744354997054782
Percent done 1.5707834282348323
Percent done 1.7671313567641862
Percent done 1.96347928529354
Percent done 2.159827213822894
Percent done 2.356175142352248
Percent done 2.5525230708816022
Percent done 2.7488709994109564
Percent done 2.94521892794031
Percent done 3.1415668564696646
Percent done 3.337914784999018
Percent done 3.5342627135283724
Percent done 3.7306106420577265
Percent done 3.92695857058708
Percent done 4.123306499116435
Percent done 4.319654427645788
Percent done 4.516002356175142
Percent done 4.712350284704496
Percent done 4.90869821323385
Percent done 5.1050461417632045
Percent done 5.301394070292558
Percent done 5.497741998821913
Percent done 5.694089927351267
Percent done 5.89043785588062
Percent done 6.086785784