# Behavioral cloning

In [None]:
%%writefile model.py

"""This module can be used to build and train a convolutional nueral network used to predict
an appropriate steering angle for a self driving car based on images from a center mounted
dash cam.

Example:
    
    !) Build a model based on the VGG16 CNN
    2) Train it for 5 epochs
    3) Use it to make a prediction
    
    recorded_data = ... # array trianing data paths
    
    batch_size = 128
    trainGen, trainSize, valGen, valSize = getGenerators(recorded_data, batch_size)
    trainGenerator, validationGenerator = ...
    
    driver = Driver((224, 224, 3))
    driver.build(ModelType.VGG16)
    driver.trainGen(trainGen, trainSize, 5, valGen, valSize)
    
    driver.predict

"""

__author__  =  "Ben Thomas"

import os.path
import numpy as np
from enum import Enum
from keras.models import Sequential
from keras.layers import Input, Dense, Activation, Flatten, Dropout
from keras.layers import Convolution2D, MaxPooling2D
from keras.applications.vgg16 import VGG16
from keras.models import Model, load_model
from keras.optimizers import Adam

class ModelType(Enum):
    """An enum type for specifying a model architexture."""
    
    CONV1 = 1,
    VGG16 = 2

class Driver:
    """"""
    
    def __init__(self, input_shape):
        """Constructs an instance of the Driver class.
        
        Args:
            input_shape (int, int, int): The shape of the images that will be used with the model.
            
        Returns:
            Driver: The initialized instance.
        """
        
        self.input_shape = input_shape
        
    def build(self, model_type):
        """Builds a convolutional neural network.
        
        Args:
            model_type (ModelType): The type of model to build.
        """
        
        if model_type == ModelType.CONV1:
            self.model = self.basicModel()
        elif model_type == ModelType.VGG16:
            self.model = self.vgg16()
            
    def load(self, model_path):
        """Loads the model at the provided path.
        
        Args:
            model_path: A path to a valid Keras model (.h5)
        """
        
        self.model = load_model(model_path)
        
    def __str__(self):
        """Provides a summary of the model."""
        
        if self.model is None:
            return "Model is not built."
        
        self.model.summary()
        return ""
        
    def basicModel(self):
        """A 'basic' convolutional neural network. This network contains 3 convolutional blocks
        and 2 fully connected layers followed by a dropout layer before the output neuron.
        
        Returns:
            keras.models.Model: The constructed model. The model returned from this 
            function has not yet been compiled.
        """
        
        # this model will be pretty basic with a single input, single output and no branching.
        # Because of this, the keras Sequential model will work fine
        model = Sequential()
        
        # convolution 1, max-pooling, relu for the nonlinearity
        model.add(Convolution2D(50, 15, 15, border_mode='valid', input_shape=self.input_shape))
        model.add(MaxPooling2D(pool_size=(2,2)))
        model.add(Activation('relu'))
        
        # convolution 2, max-pooling, relu for the nonlinearity
        model.add(Convolution2D(30, 10, 10, border_mode='valid'))
        model.add(MaxPooling2D(pool_size=(2,2)))
        model.add(Activation('relu'))
        
        # convolution 3, max-pooling, relu for the nonlinearity
        model.add(Convolution2D(10, 7, 7, border_mode='valid'))
        model.add(MaxPooling2D(pool_size=(2,2)))
        model.add(Activation('relu'))
        
        # flatten the filters before transitioning to the fully connected section
        model.add(Flatten())
        
        # fully connected 1 with 200 neurons, relu for the nonlinearity
        model.add(Dense(200))
        model.add(Activation('relu'))
        
        # fully connected 2 with 75 neurons, relu for the nonlinearity
        model.add(Dense(75))
        model.add(Activation('relu'))
        
        # dropout to help prevent over-fitting to the training data
        model.add(Dropout(0.25))
        
        # as currently stated, this is a regression problem. Because of this, the output layer is a single 
        # neuron with a continuous value and so softmax activations make no sense. Linear makes the most
        # sense in this case.
        model.add(Dense(1))
        model.add(Activation('linear'))
        
        return model
    
    def vgg16(self):
        """A more complex convolutional neural network. This network is based on the VGG16 CNN provided
        by Keras (https://keras.io/applications/). The pre-trained VGG16 model is followed up with a custom 
        top section to perform the regression task. The pre-trained VGG16 layers are frozen so training this
        model will only effect the top section.
        
        Returns:
            keras.models.Model: The constructed model. The model returned from this 
            function has not yet been compiled.
        """
        
        # The model architecture here is too complicated to use the Sequential model provided by Keras
        # so the functional API is used instead (https://keras.io/getting-started/functional-api-guide/).
        # This makes it easy to add a regression section on top of the pre-trained VGG16 model.
        input_layer = Input(shape=self.input_shape)
        
        # Create the VGG16 model. We don't include the top (fully connected layers) because that would
        # lock us in to using images with a shape of 224x224x3 as was done in the original VGG16 training.
        # We will provide our own fully connected section. (https://keras.io/applications/#vgg16)
        vgg16_trained = VGG16(weights='imagenet', include_top=False, input_tensor=input_layer)
        
        # we want the freeze all of the layers of VGG16 to take adavantage of the pre-trained weights. 
        # Then we can train our custom top section to perform the regression task.
        for layer in vgg16_trained.layers:
            layer.trainable = False
        
        # build the model using Keras functional API. The output of the VGG16 convolutional sections are 
        # flattened before transitioning to the fully connected layers. Relu activations are used at each
        # step to introduce nonlinearity. Furthermore, dropout is added at several points to help prevent
        # this very large model from over-fitting our relatively small dataset.
        model = vgg16_trained.output
        model = Flatten()(model)
        model = Dense((1500))(model)
        model = Dropout(0.5)(model)
        model = Activation('relu')(model)
        model = Dropout(0.5)(model)
        model = Dense((750))(model)
        model = Activation('relu')(model)
        model = Dense(1)(model)
        model = Activation('linear')(model)
        return Model(input=input_layer, output=model)
    
    def train(self, X, y, batch_size=128, nb_epoch=5, lr=0.001):
        """Train the model with provided samples and labels. The training is performed using the Adam
        optimizer and the mean-squared-error loss function.
        
        Args:
            X: The training smamples
            y: The training labels
            batch_size (optional): the size of training batches. Defaults to 128
            nb_epoch (optional): the number of epochs to train for. Defaults to 5
            lr (optional): The learning rate used in the Adam optimizer. Defaults to 0.001
        """
        
        optimizer = Adam(lr=lr)
        self.model.compile(optimizer, loss='mse')
        history = self.model.fit(X, y, batch_size, nb_epoch)
    
    def trainGen(self, train_generator, sample_per_epoch, nb_epoch, validation_generator, validation_size, lr=0.001):
        """Train the model with provided data generators. The training is performed using the Adam
        optimizer and the mean-squared-error loss function.
        
        Args:
            train_generator: A generator that yields a batch of training images and labels
            sample_per_epoch: The number of samples/labels in each epoch
            nb_epoch: The number of epochs to train for. Defaults to 5
            validation_generator: A generator that yields a batch of validation images and labels
            validation_size: The number of samples used in the validation step.
            lr (optional): The learning rate used in the Adam optimizer. Defaults to 0.001
        """
        
        optimizer = Adam(lr=lr)
        self.model.compile(optimizer, loss='mse')
        history = self.model.fit_generator(train_generator, sample_per_epoch, nb_epoch, validation_data=validation_generator, nb_val_samples=validation_size)

    def predict(self, image):
        """Uses the already trained model to predict the appropriate steering angle for a given input image.
        Args:
            image: The image that the prediction is based on. The dimensions of the image must match the
            dimensions that the network was trained on.
            
        Returns (float) The predicted steering angle.
        """
        return float(self.model.predict(image, batch_size=1))
    
    def save(self, file):
        """Saves the trained model to disk.
        
        Args:
            file (string): The name of the samed file. An extension of 'h5' will be appended to the file name if
            it is not already.
        """
        
        if os.path.splitext(file)[1] != 'h5':
            file = file + '.h5'
            
        self.model.save(file)

In [2]:
%%writefile process.py

import cv2
import numpy as np

def read_image(image_path):
    """Reads an image from disk into a numpy array.
    
    Args:
        image_path (string): The path of the image to be read.
        
    Returns: (numpy.Array): A numpy array containing the image data.
    """
    
    image = cv2.imread(image_path)
    return image

def pre_process(image):
    """Preprocess an image for use in the CNN.
    
    Args:
        image (numpy.array): The image to process.
    
    Returns: (numpy.Array): The processed image.
    """
    
    # discard the top of the image
    height = int(0.6 * image.shape[0])
    image = image[-height:-1,:,:]
    
    #resize to 80x80
    image = cv2.resize(image, (80, 80))
        
    # convert to gray scale floats
    shape = image.shape
    image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY).reshape(shape[0], shape[1], 1).astype(np.float32)
    
    # scale the image
    image = image/255.0 - 0.5

    return image

Overwriting process.py


In [3]:
%%writefile data.py

from process import read_image, pre_process
from keras.models import load_model
import numpy as np
import os.path
import cv2

class TrainingData:
    """A class to represent and manage the training data for the CNN.
        
    Args: 
        training_sets: An list of paths to training sets. Each path must point to a directory
        that contains a driving_log.csv file as well as a directory of trianing images.
        batch_size (optional): The batch size that will be yielded by the generators. Defaults to 128
        validation_split (optional): The portion of data to split out for validation.
    
    Attributes:
        train_generator: A generator that yields a collection of training images and a collection
        training labels.
        validation_generator: A generator that yields a collection of valiation images and a collection
        validation labels.
        training_size: The number of training samples.
        validation_size: The number of validation samples
        training_sets: An list of paths to training sets. Each path must point to a directory
        that contains a driving_log.csv file as well as a directory of trianing images.
        batch_size (optional): The batch size that will be yielded by the generators. Defaults to 128
        validation_split (optional): The portion of data to split out for validation.
    """
    
    def __init__(self, training_sets, batch_size=128, validation_split=0.2):
        self.training_sets = training_sets
        self.batch_size = batch_size
        self.validation_split = validation_split
        self.training_generator = None
        self.validation_generator = None
        self.training_size = None
        self.validation_size = None
        self.load_data()

    def load_data(self):
        """Loads the provide training sets by creating generators for the test and validation sets."""
        
        self.sample_maps = []

        # merge all of the sample maps
        for directory in self.training_sets:
            mapFile = directory + '/driving_log.csv'
            images = directory + '/IMG'

            # validate the input
            if not os.path.isdir(images) or not os.path.isfile(mapFile):
                raise ValueError("parameter must be a valid directory containing images and csv map.")

            # read the contents of the map file
            with open(mapFile) as f:
                self.sample_maps.extend(f.readlines())

        all_samples = np.arange(0, len(self.sample_maps))
        np.random.shuffle(all_samples)
        validation_split_index = int(len(all_samples) * self.validation_split)
        validation_samples, train_samples = all_samples[:validation_split_index], all_samples[validation_split_index:]
        
        self.training_generator = self.generator(train_samples)
        self.validation_generator = self.generator(validation_samples)
        self.validation_size = len(validation_samples)
        self.training_size = len(train_samples)
    
    def generator(self, samples):
        """A generator used to feed data into the model fit routine.
        
        Args:
            sample: A list of samples defining the image and labels to use.
            
        Yeilds: A tuple containing a collection of images and a collection of associated labels.
        """
        
        num_samples = len(samples)
        while True:
            batch_X = []
            batch_y = []

            # shuffle up the images
            np.random.shuffle(samples)
            for i, sample_index in np.ndenumerate(samples):
                center, left, right, steering_angle, throttle, brake, speed = tuple(self.sample_maps[sample_index].split(','))
                steering_angle, throttle, brake, speed = float(steering_angle), float(throttle), float(brake), float(speed)
                if speed > 1.0:
                    center_image = pre_process(read_image(center))
                    batch_X.append(center_image)
                    batch_y.append(steering_angle)

                    if len(batch_X) % self.batch_size == 0 or i == (num_samples-1):
                        yield (np.array(batch_X), np.array(batch_y))
                        batch_X = []
                        batch_y = []

Overwriting data.py


In [None]:
%%writefile train.py

from model import Driver, ModelType
from data import TrainingData

"""The training of the CNN is performed in two steps. The first step runs with a limited training
set for 5 epochs with a learning rate of 0.001. This has been found to establish a solid foundation
that allows the car to make it all the way around the track, albeit in a rather slopy way. The second
step trains with a lower learning rate and much more data for an additional 5 epochs. This fills in 
the gaps in the CNN and smooths out the driving considerably.
"""

# build the driver
driver = Driver((80, 80, 1))
driver.build(ModelType.CONV1)

##################### Initial training ####################
initial_data = [
                    './data/trk1_normal_1', 
                    './data/trk1_normal_2', 
                    './data/trk1_normal_3', 
                    './data/trk1_corner_infill',
                    './data/udacity_data',
                ]

# 1) The initial training step
data = TrainingData(initial_data)
driver.trainGen(data.training_generator, 
                data.training_size, 
                5, 
                data.validation_generator, 
                data.validation_size,
                lr=0.001)

####################### Fine tuning #######################
fine_tune_data = [
                    './data/trk1_normal_1', 
                    './data/trk1_normal_2', 
                    './data/trk1_normal_3', 
                    './data/trk1_normal_4', 
                    './data/trk1_swerve_fill', 
                    './data/trk1_corner_infill',
                    './data/trk1_corner_infill_2',
                    './data/trk1_bridge_infill',
                    './data/trk1_corners',
                    './data/trk2_normal_1',
                    './data2/trk1_corrections_1',
                    './data2/trk1_corrections_2',
                    './data2/trk1_small_swerve',
                    './data2/trk1_small_swerve_2',
                    './data2/trk1_small_swerve_3',
                    './data2/trk1_normal_1',
                    './data/udacity_data',
                ]

# 2) The fine-tuning step
data = TrainingData(fine_tune_data)
driver.trainGen(data.training_generator, 
                data.training_size, 
                5, 
                data.validation_generator, 
                data.validation_size,
                lr=0.0001)

# write the model to disk
driver.save('model')

In [None]:
from model import Driver
from keras.utils.visualize_util import plot

# inspect the model

driver = Driver((80,80,1))
driver.load('./model.h5')
print(driver)

plot(driver.model, to_file='basic_model.png')

In [8]:
from model import Driver, ModelType

# inspect the model

driver = Driver((80,80,3))
driver.build(ModelType.VGG16)
print(driver)

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_3 (InputLayer)             (None, 80, 80, 3)     0                                            
____________________________________________________________________________________________________
block1_conv1 (Convolution2D)     (None, 80, 80, 64)    1792        input_3[0][0]                    
____________________________________________________________________________________________________
block1_conv2 (Convolution2D)     (None, 80, 80, 64)    36928       block1_conv1[0][0]               
____________________________________________________________________________________________________
block1_pool (MaxPooling2D)       (None, 40, 40, 64)    0           block1_conv2[0][0]               
___________________________________________________________________________________________

In [None]:
# create some images for the report

from matplotlib import pyplot as plt
import numpy as np
from process import read_image, pre_process
import matplotlib.gridspec as gridspec
import cv2

data_dir = './data/trk1_normal_1'
mapFile = data_dir + '/driving_log.csv'
images = data_dir + '/IMG'
data_points = []

# read the contents of the map file
with open(mapFile) as f:
    data_points.extend(f.readlines())
    
fig = plt.figure(figsize=(10, 3))
fig.suptitle('The preprocessing pipline', size=15)
gs1 = gridspec.GridSpec(1, 2)

np.random.seed(1050)
index = int(np.random.random()*len(data_points))
center, left, right, steering_angle, throttle, brake, speed = tuple(data_points[index].split(','))
img = cv2.cvtColor(read_image(center), cv2.COLOR_BGR2RGB)

axes = plt.subplot(gs1[0])
axes.set_xticklabels([])
axes.set_yticklabels([])
plt.imshow(img)
plt.xlabel('Original image')

axes = plt.subplot(gs1[1])
axes.set_xticklabels([])
axes.set_yticklabels([])
plt.imshow(np.squeeze(pre_process(img)), cmap='gray')
plt.xlabel('Preprocessed image')

plt.show()
fig.savefig('./preprocess.png', bbox_inches='tight')
            