In [11]:
import io
import os
import tqdm
import random
import hashlib
import PIL.Image
import pandas as pd
from tqdm import tqdm
import tensorflow as tf
from object_detection.utils import dataset_util

In [12]:
data_dir = '/mnt/data/nexar'
image_dir = os.path.join(data_dir, 'images')
annotations = pd.read_csv(os.path.join(data_dir, 'train.csv'))
boxes = pd.read_csv(os.path.join(data_dir, 'train_boxes.csv'))

In [13]:
annotations[:5]

Unnamed: 0,image_filename,lighting,city
0,frame_20f328fa-2459-46d0-97a5-5ae2d6103cb0_000...,Twilight,NYC
1,frame_927bde20-f97f-48c2-af30-f9127b6b32ce_000...,Day,NYC
2,frame_67012509-f3bd-4175-a9d2-565a7b6bb3c7_000...,Day,NYC
3,frame_bd043377-6fb8-407a-95e5-7deb1fbab13a_000...,Day,NYC
4,frame_4da1583b-58d0-4893-8149-54541191031d_000...,Day,NYC


In [14]:
boxes[:5]

Unnamed: 0,image_filename,x0,y0,x1,y1,label,confidence
0,frame_817c47b8-22c4-438a-8dc6-0e3f67f299ee_000...,601.6,270.355731,726.755556,421.185771,van,1.0
1,frame_817c47b8-22c4-438a-8dc6-0e3f67f299ee_000...,497.777778,308.774704,534.755556,338.656126,car,1.0
2,frame_817c47b8-22c4-438a-8dc6-0e3f67f299ee_000...,449.422222,310.197628,509.155556,358.577075,car,1.0
3,frame_a9110bf2-5252-4ec6-83c6-33b65d0fc04d_000...,711.111111,304.505929,786.488889,368.537549,car,1.0
4,frame_a9110bf2-5252-4ec6-83c6-33b65d0fc04d_000...,584.533333,307.351779,647.111111,358.577075,car,1.0


In [15]:
label_map_dict = {}
for i, l in enumerate(set(boxes['label'])):
    label_map_dict[l] = i + 1
print(label_map_dict)

{'pickup_truck': 1, 'car': 2, 'van': 3, 'bus': 4, 'truck': 5}


In [16]:
examples_list = list(annotations['image_filename'])

# Test images are not included in the downloaded data set, so we shall perform
# our own split.
random.seed(42)
random.shuffle(examples_list)
num_examples = len(examples_list)
num_train = int(0.7 * num_examples)
train_examples = examples_list[:num_train]
val_examples = examples_list[num_train:]
print('%d training and %d validation examples.' % (len(train_examples), len(val_examples)))

35000 training and 15000 validation examples.


In [17]:
def create_tf_example(df, label_map_dict, image_dir, image_filename):
    img_path = os.path.join(image_dir, image_filename)
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()
    width = image.width
    height = image.height

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    for idx, obj in df.iterrows():
        xmin.append(obj['x0'] / width)
        ymin.append(obj['y0'] / height)
        xmax.append(obj['x1'] / width)
        ymax.append(obj['y1'] / height)
        class_name = obj['label']
        classes_text.append(class_name.encode('utf8'))
        classes.append(label_map_dict[class_name])

    example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(image_filename.encode('utf8')),
      'image/source_id': dataset_util.bytes_feature(image_filename.encode('utf8')),
      'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
      'image/encoded': dataset_util.bytes_feature(encoded_jpg),
      'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return example    

def create_tf_record(output_filename,
                     label_map_dict,
                     dataframe,
                     image_dir,
                     examples):
    """Creates a TFRecord file from examples.
    """
    writer = tf.python_io.TFRecordWriter(output_filename)
    for idx, example in tqdm(enumerate(examples)):
        df = dataframe[dataframe['image_filename'] == example]
        tf_example = create_tf_example(df, label_map_dict, image_dir, example)
        writer.write(tf_example.SerializeToString())
    writer.close()

In [18]:
train_output_path = os.path.join(data_dir, 'nexar_train.record')
val_output_path = os.path.join(data_dir, 'nexar_val.record')
create_tf_record(train_output_path, label_map_dict, boxes,
               image_dir, train_examples)
create_tf_record(val_output_path, label_map_dict, boxes,
               image_dir, val_examples)

35000it [14:56, 39.04it/s]
15000it [07:03, 35.20it/s]
