In [1]:
import pathlib
import os
import json
import glob

import numpy as np
import tensorflow as tf

tf.enable_eager_execution()

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
def load_tub_data_to_records(data_dir):
    # Get a list of directories starting with word tub
    tub_dirs = glob.glob(os.path.join(data_dir, 'tub*'))
    # Sort the directories
    tub_dirs.sort()
    tub_dirs = [tub_dir for tub_dir in tub_dirs]
    print(tub_dirs)
    # Go through the directories 
    records = []
    for tub_dir in tub_dirs:
        json_files = glob.glob(os.path.join(tub_dir, 'record_*.json'))
        if len(json_files) == 0:
            tub_dir = os.path.join(tub_dir, 'tub')
            json_files = glob.glob(os.path.join(tub_dir, 'record_*.json'))
        n = len(json_files)
        i = 0
        cnt = 0
        while cnt < n:
            json_file = os.path.join(tub_dir, 'record_%d.json' % i)
            try:
                data = json.load(open(json_file, 'r'))
                data['img_path'] = os.path.join(os.path.basename(tub_dir), data['cam/image_array'])
                records.append(data)
                cnt += 1
            except:
                pass
            i += 1

    return records

In [8]:
def decode_img(img):
    # convert the compressed string to a 3D uint8 tensor
    img = tf.image.decode_jpeg(img, channels=3)
    # Use `convert_image_dtype` to convert to floats in the [0,1] range.
    img = tf.image.convert_image_dtype(img, tf.float32)
    # resize the image to the desired size.
    return tf.image.resize(img, [120, 180])

def process_path(file_path):
    label = get_label(file_path)
    # load the raw data from the file as a string
    img = tf.io.read_file(file_path)
    img = decode_img(img)
    
    return img, label

def load_image(file_path):
    img = tf.io.read_file(file_path)
    img = decode_img(img)
    
    return img

In [13]:
# The following functions can be used to convert a value to a type compatible
# with tf.Example.

def _bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy() # BytesList won't unpack a string from an EagerTensor.

    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
  """Returns a float_list from a float / double."""
  return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
  """Returns an int64_list from a bool / enum / int / uint."""
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

In [36]:
def serialize_example(image, angle, throttle):
    """
    Creates a tf.Example message ready to be written to a file.
    """
    # Create a dictionary mapping the feature name to the tf.Example-compatible
    # data type.
    feature = {
      'image': _bytes_feature(image),
      'angle': _float_feature(angle),
      'throttle': _float_feature(throttle),
    }

    # Create a Features message using tf.train.Example.
    example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
    return example_proto.SerializeToString()

In [25]:
train_records = load_tub_data_to_records('data/train/')

['data/train/tub9', 'data/train/tub_2019-05-08_1240', 'data/train/tub_teemu_08052019']


In [27]:
record = train_records[0]
record

{'cam/image_array': '0_cam-image_array_.jpg',
 'timestamp': '2019-05-08 09:21:58.490116',
 'user/throttle': 3.051850947599719e-05,
 'user/angle': 0.0,
 'user/mode': 'user',
 'img_path': 'tub9/0_cam-image_array_.jpg'}

In [37]:
!rm data/train.tfrecord

rm: cannot remove 'data/train.tfrecord': No such file or directory


In [40]:
# Write the `tf.Example` observations to the file.
with tf.io.TFRecordWriter('data/train.tfrecord') as writer:
    for i, record in enumerate(train_records[:1000]):
        # parse fields
        #img = load_image(os.path.join('data/train/', record['img_path']))
        image_string = open(os.path.join('data/train/', record['img_path']), 'rb').read()
        angle = record['user/angle']
        throttle = record['user/throttle']
        example = serialize_example(image_string, angle, throttle)
        writer.write(example)
        if i % 1000 == 0:
            print(i, len(train_records), 100*i/len(train_records))

0 35998 0.0
