In [47]:
import os
import tensorflow as tf
tf.enable_eager_execution()

# Parameters

In [52]:
NUM_TRAIN_SAMPLES = 35998
NUM_VAL_SAMPLES = 5368

VERBOSE = True
MIN_DELTA = 0.005
PATIENCE = 5
BATCH_SIZE = 256
EPOCHS = 100

MODEL_NAME = 'simple_model'

## Reading the dataset

In [3]:
#raw_image_dataset = tf.data.TFRecordDataset('data/train.tfrecord')
#
# Create a dictionary describing the features.
#image_feature_description = {
#    'image': tf.io.FixedLenFeature([], tf.string),
#    'angle': tf.io.FixedLenFeature([], tf.float32),
#    'throttle': tf.io.FixedLenFeature([], tf.float32)
#}
#
#def _parse_image_function(example_proto):
#  # Parse the input tf.Example proto using the dictionary above.
#  return tf.io.parse_single_example(example_proto, image_feature_description)
#
#parsed_image_dataset = raw_image_dataset.map(_parse_image_function)

In [4]:
def decode_jpeg(image_buffer, scope=None):
    """Decode a JPEG string into one 3-D float image Tensor.
    Args:
        image_buffer: scalar string Tensor.
        scope: Optional scope for name_scope.
    Returns:
        3-D float Tensor with values ranging from [0, 1).
    """
    with tf.name_scope(values=[image_buffer], name=scope,
                       default_name='decode_jpeg'):
        # Decode the string as an RGB JPEG.
        # Note that the resulting image contains an unknown height
        # and width that is set dynamically by decode_jpeg. In other
        # words, the height and width of image is unknown at compile-i
        # time.
        image = tf.image.decode_jpeg(image_buffer, channels=3)

        # After this point, all image pixels reside in [0,1)
        # until the very end, when they're rescaled to (-1, 1).
        # The various adjust_* ops all require this range for dtype
        # float.
        image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        return image

def _parse_fn(example_serialized, is_training=False):
    """ ...
    """
    feature_map = {
        'image': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
        'angle': tf.FixedLenFeature([], dtype=tf.float32, default_value=0.0),
        'throttle': tf.FixedLenFeature([], dtype=tf.float32, default_value=0.0),
    }
    
    parsed = tf.parse_single_example(example_serialized, feature_map)
    image = decode_jpeg(parsed['image'])
    image = tf.reshape(image, (1, 240, 360, 3))
    return (image, (parsed['angle'], parsed['throttle']))

In [5]:
def get_dataset(tfrecords_dir, subset, batch_size):
    """Read TFRecords files and turn them into a TFRecordDataset."""
    files = tf.matching_files(os.path.join(tfrecords_dir, '%s-*' % subset))
    shards = tf.data.Dataset.from_tensor_slices(files)
    shards = shards.shuffle(tf.cast(tf.shape(files)[0], tf.int64))
    shards = shards.repeat()
    dataset = shards.interleave(tf.data.TFRecordDataset, cycle_length=4)
    dataset = dataset.shuffle(buffer_size=8192)
    parser = partial(
        _parse_fn, is_training=True if subset == 'train' else False)
    dataset = dataset.apply(
        tf.data.experimental.map_and_batch(
            map_func=parser,
            batch_size=batch_size,
            num_parallel_calls=config.NUM_DATA_WORKERS))
    dataset = dataset.prefetch(batch_size)
    return dataset

In [6]:
raw_image_dataset = tf.data.TFRecordDataset('data/train.tfrecord')
parsed_image_dataset = raw_image_dataset.map(_parse_fn)

In [7]:
raw_validation_set = tf.data.TFRecordDataset('data/val.tfrecord')
parsed_validation_set = raw_image_dataset.map(_parse_fn)

# Training the mode

In [8]:
from tensorflow.python.keras.layers import Input
from tensorflow.python.keras.models import Model, load_model
from tensorflow.python.keras.layers import Convolution2D, Convolution3D
from tensorflow.python.keras.layers import MaxPooling2D, MaxPooling3D
from tensorflow.python.keras.activations import relu
from tensorflow.python.keras.layers import Dropout, Flatten, Dense
from tensorflow.python.keras.layers import Cropping2D, Cropping3D
from tensorflow.python.keras.callbacks import ModelCheckpoint, EarlyStopping

In [9]:
weight_loss_angle = 0.9
weight_loss_throttle = 0.1

In [42]:
def create_2d_model(img_dims, crop_margin_from_top=80):
    tf.keras.backend.clear_session()

    img_in = Input(shape=(img_dims), name='img_in')

    x = img_in

    x = Cropping2D(((crop_margin_from_top, 0), (0, 0)))(x)

    # Define convolutional neural network to extract features from the images
    x = Convolution2D(filters=24, kernel_size=(5, 5), activation='relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Convolution2D(filters=24, kernel_size=(3, 3), activation='relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Convolution2D(filters=24, kernel_size=(3, 3), activation='relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Convolution2D(filters=24, kernel_size=(3, 3), activation='relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Convolution2D(filters=24, kernel_size=(3, 3), activation='relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    #x = Convolution2D(filters=32, kernel_size=(5, 5), strides=(2, 2), activation='relu')(x)
    #x = Convolution2D(filters=64, kernel_size=(5, 5), strides=(2, 2), activation='relu')(x)
    #x = Convolution2D(filters=64, kernel_size=(3, 3), strides=(2, 2), activation='relu')(x)
    #x = Convolution2D(filters=64, kernel_size=(3, 3), strides=(1, 1), activation='relu')(x)

    # Define decision layers to predict steering and throttle
    x = Flatten(name='flattened')(x)
    #x = Dense(units=100, activation='linear')(x)
    #x = Dropout(rate=.5)(x)
    x = Dense(units=10, activation='linear')(x)
    x = Dropout(rate=.5)(x)
    # categorical output of the angle
    angle_out = Dense(units=1, activation='linear', name='angle_out')(x)

    # continous output of throttle
    throttle_out = Dense(units=1, activation='linear', name='throttle_out')(x)

    model = Model(inputs=[img_in], outputs=[angle_out, throttle_out])

    model.summary()

    model.compile(optimizer='adam',
                loss={'angle_out': 'mean_squared_error',
                      'throttle_out': 'mean_squared_error'},
                loss_weights={'angle_out': weight_loss_angle,
                              'throttle_out': weight_loss_throttle},
                metrics=['mse', 'mae', 'mape'])

    return model

In [43]:
model = create_2d_model(img_dims=[240, 360, 3])

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
img_in (InputLayer)             (None, 240, 360, 3)  0                                            
__________________________________________________________________________________________________
cropping2d (Cropping2D)         (None, 160, 360, 3)  0           img_in[0][0]                     
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 156, 356, 24) 1824        cropping2d[0][0]                 
__________________________________________________________________________________________________
max_pooling2d (MaxPooling2D)    (None, 78, 178, 24)  0           conv2d[0][0]                     
__________________________________________________________________________________________________
conv2d_1 (

In [51]:
#TODO: based on running locally or valohai the dir should be changed
#saved_model_path = os.path.join(datadir, 'models', model_file_name)
outputs_dir = os.getenv('VH_OUTPUTS_DIR', './')
output_file = os.path.join(outputs_dir, '%s.h5' % MODEL_NAME)


print('model will be stored to: %s' % output_file)

# checkpoint to save model after each epoch
save_best = ModelCheckpoint(output_file,
                            monitor='val_loss',
                            verbose=VERBOSE,
                            save_best_only=True,
                            mode='min')

# stop training if the validation error stops improving.
early_stop = EarlyStopping(monitor='val_loss',
                           min_delta=MIN_DELTA,
                           patience=PATIENCE,
                           verbose=VERBOSE,
                           mode='auto')

model will be stored to: ./simple_model.h5


In [44]:
model.fit(parsed_image_dataset,
          validation_data = parsed_image_dataset,
          steps_per_epoch = NUM_TRAIN_SAMPLES // BATCH_SIZE,
          validation_steps = NUM_VAL_SAMPLES // BATCH_SIZE,
          batch_size=BATCH_SIZE,
          epochs=EPOCHS)

 3180/35998 [=>............................] - ETA: 3:08:30 - loss: 0.2270 - angle_out_loss: 0.2508 - throttle_out_loss: 0.0130 - angle_out_mean_squared_error: 0.2508 - angle_out_mean_absolute_error: 0.3994 - angle_out_mean_absolute_percentage_error: 95308808.0000 - throttle_out_mean_squared_error: 0.0130 - throttle_out_mean_absolute_error: 0.0751 - throttle_out_mean_absolute_percentage_error: 36675.6797

KeyboardInterrupt: 