# Behavioral Cloning Project with Darknet53

## Introduction

### Import packages

In [1]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [2]:
import os
import cv2
import copy
import random
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from skimage.transform import rotate
from skimage.filters import gaussian
from keras.models import Model, load_model
from keras.layers import Input, Conv2D, Activation, BatchNormalization, Dense, \
LeakyReLU, GlobalAveragePooling2D, Dropout, Flatten, Lambda, add
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.initializers import he_normal
from keras.regularizers import l2
from typing import Collection

Using TensorFlow backend.


In [3]:
np.random.seed(42)

## Data

### Data preparation

In this section, I will prepare DataLoader class to load and augment the data and split it into train and validation sets.

In [4]:
class DataLoader():
    def __init__(self, path, sz):
        self.path = path
        self.df = self.load_data("driving_log.csv")
        self.new_df = self.load_data("extended_log.csv")
        self.h, self.w, self.ch = sz
        
    def load_data(self, file_name):
        path_to_csv = os.path.join(self.path, file_name)
        df = pd.read_csv(path_to_csv)
        return df
        
    def load_image(self, file_name):
        path_to_image = os.path.join(self.path, file_name)
        image = cv2.imread(path_to_image)
        return image

    def match_X_y(self):
        new_df = pd.DataFrame(columns=['image', 'steering_angle'])
        new_df['image'] = None
        new_df['steering_angle'] = 0.
        cols = ['center', 'left', 'right']
        k = 0
        for col in cols:
            for i in range(len(self.df)):
                new_df.at[i+k, 'image'] = self.df.at[i, col]
                if col=='center':
                    new_df.at[i+k, 'steering_angle'] = self.df.at[i, 'steering']
                elif col=='left':
                    new_df.at[i+k, 'steering_angle'] = self.df.at[i, 'steering'] + 0.2
                else:
                    new_df.at[i+k, 'steering_angle'] = self.df.at[i, 'steering'] - 0.2
            k += len(self.df)
        new_df.to_csv(os.path.join(self.path, "extended_log.csv"))          
        
    def flip_horizontal(self, X_i):
        image = X_i[:, ::-1]
        return image
    
    #reference from https://subscription.packtpub.com/book/application_development/9781785283932/1/ch01lvl1sec11/image-translation
    def shift(self, X_i, y_i):
        w = X_i.shape[1]
        h = X_i.shape[0]
        #shift range between -0.10 and 0.10
        shift_w = w * (random.random()*0.2-0.1)
        shift_h = h * (random.random()*0.2-0.1)
        #adjust steering angle slightly
        s_angle = y_i + shift_w*1e-04
        #M = 2x3 transformation matrix
        M = np.float32([[1, 0, shift_w],[0, 1, shift_h]])
        #https://docs.opencv.org/2.4/modules/imgproc/doc/geometric_transformations.html?highlight=getrotationmatrix2d
        image = cv2.warpAffine(X_i, M, (w, h))
        return image, s_angle
    
    def blur(self, X_i):
        random_int = random.randrange(4)
        image = gaussian(X_i, random_int, multichannel=True, mode='reflect')
        return image
    
    #reference from https://stackoverflow.com/questions/32609098/how-to-fast-change-image-brightness-with-python-opencv
    def adjust_brightness(self, X_i):
        #change to hsv
        img_hsv = cv2.cvtColor(X_i, cv2.COLOR_RGB2HSV)
        #brigthness, third channel - value range is [0,255], can be darker or brighter by degree between -0.5 and 0.5
        brightness = img_hsv[:,:,2]*(random.random()-0.5)
        img_hsv[:,:,2] += brightness.astype(np.uint8)
        return cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB)
    
    def augment_data(self, X, y):
        if len(X)!=0:
            X_augmented = []
            y_augmented = []
            for i in range(len(X)):      
                X_i = self.load_image(X[i].strip())
                X_copy = copy.deepcopy(X_i)

                #original image
                X_copy = self.flip_horizontal(X_copy)
                X_augmented.append(self.crop_sky_car(X_i))
                y_augmented.append(y[i])
                #flipped image with adjusted y steering angle
                X_augmented.append(self.crop_sky_car(X_copy))
                y_augmented.append(-y[i])
                #other augmentations with probility
                X_copy = copy.deepcopy(X_i)
                y_copy = copy.deepcopy(y[i])
                #random probability and threshold
                rand_prob = random.random()
                thres = 0.4
                if rand_prob > thres:
                    rand_transform = random.randrange(3)
                    if rand_transform==0:
                        X_copy, y_copy = self.shift(X_copy, y_copy)
                    if rand_transform==1:
                        X_copy = self.blur(X_copy)
                    if rand_transform==2:
                        X_copy = self.adjust_brightness(X_copy)
                    X_augmented.append(self.crop_sky_car(X_copy))
                    y_augmented.append(y_copy)
        assert len(X_augmented)==len(y_augmented)
        return X_augmented, y_augmented  
    
    def split_data(self):
        #augment the dataset to increase and transform images
        X_augmented, y_augmented = self.augment_data(self.new_df['image'], self.new_df['steering_angle'])
        
        #split train and valid set
        X_train, X_valid, y_train, y_valid = train_test_split(X_augmented, y_augmented, test_size=0.2, 
                                                              random_state=42, shuffle=True)
        return X_train, X_valid, y_train, y_valid

I need to define the path to data, both images and csv files.

In [5]:
data_path = "data_ext/data"

Then, DataLoader is instantiated with the size of images as a parameter.

In [6]:
data = DataLoader(data_path, (160, 320, 3))

In [11]:
#create a new csv to utilise three different images
# data.match_X_y()

Split_data step encompasses data augmentation in this case. 

There are four data augmentation methods adopted for this project.

In [7]:
X_train, X_valid, y_train, y_valid = data.split_data()

In [8]:
len(X_train), len(X_valid), len(y_train), len(y_valid)

(80848, 20212, 80848, 20212)

In [9]:
def generate_batch(X, y, bs):
    len_X = len(X)
    #for generator, a continuous loop
    while True:
        X, y = shuffle(X, y)

        #batch generator
        for offset in range(0, len_X, bs):
            batch_X, batch_y = X[offset:offset+bs], y[offset:offset+bs]
            
            images = []
            s_angles = []
            for i in range(len(batch_X)):

                images.append(batch_X[i])
                s_angles.append(batch_y[i])
            X_batch = np.array(images)
            y_batch = np.array(s_angles)

            yield X_batch, y_batch

## Model Architecture

As mentioned earlier, I decided to use Darknet53, the backbone of YoloV3, for a steering angle prediction. 

Darknet53 performs well utilising Resnet layers with a relatively smaller number of billions of operations and billion floating operations per second (please see the comparison table below from [YOLOv3 paper](https://pjreddie.com/media/files/papers/YOLOv3.pdf). This is one of the reason why YoloV3 is able to produce a real-time prediction on embedded devices.

I believe that an efficient operation without sacrificing performance is crucial to computer vision components supporting self driving car engineering.

![Comparsion_between_Darknet_and_Resnet](examples/darknet53.PNG)

In [12]:
def dbl(input_data, soutput:int, skernel:int=3, stride:int=1, weight_decay=5e-04):
    x = Conv2D(soutput, (skernel, skernel), padding='same', strides=stride, kernel_regularizer=l2(weight_decay))(input_data)
    x = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, 
                                 beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', 
                                 moving_variance_initializer='ones')(x)
    x = LeakyReLU(alpha=0.3)(x)
    return x

In [13]:
def res_unit(input_data, soutput:int):
    x = dbl(input_data, soutput//2, 1)
    x = dbl(x, soutput, 3)

    return add([input_data, x])

In [14]:
def res_unit_block(input_data, soutput:int, num_block:int):
    x = res_unit(input_data, soutput)
    
    for i in range(num_block-1):
        x = res_unit(x, soutput)
    
    return x

I replaced the last fully connected layer, which has 1000 classes (for Imagenet dataset), and the softmax layer with only one-output fully connected layer so that the whole model can provide a single number that indicates a steering angle. Therefore, this architecture is for a regression problem, instead of a classification problem.

In [15]:
def darknet53(sinput:Collection[int], num_blocks:Collection[int], output_sz:int=32):
    input_shape = Input(shape=sinput)
    x = Lambda(lambda x: x/255.0-0.5, input_shape=sinput)(input_shape)
    
    #dbl in 3 channels, out 32 channels
    x = dbl(x, output_sz, 3, 1)

    #first - one dbl one res units - in 32 channels, out 64 channels
    x = dbl(x, output_sz*2, 3, 1)
    x = res_unit_block(x, output_sz*2, num_blocks[0])

    #second - one dbl two res units - in 64 ch, out 128 ch
    x = dbl(x, output_sz*4, 3, 2)
    x = res_unit_block(x, output_sz*4, num_blocks[1])

    #third - one dbl eight res units - in 128 ch, out 256 ch
    x = dbl(x, output_sz*8, 3, 2)
    x = res_unit_block(x, output_sz*8, num_blocks[2])

    #four - one dbl eight res units - in 256 ch, out 512 ch
    x = dbl(x, output_sz*16, 3, 2)
    x = res_unit_block(x, output_sz*16, num_blocks[3])

    #five - one dbl four res units - in 512 ch, out 1024 ch
    x = dbl(x, output_sz*32, 3, 2)
    x = res_unit_block(x, output_sz*32, num_blocks[4])
#     print(x.shape)

    x = GlobalAveragePooling2D()(x)
#     print(x.shape)
#     x = Flatten()(x)
    x = Dense(units=1)(x)

    model = Model(inputs=input_shape, outputs=x)

    return model

The model will take the input size of 160 Height, 320 Width and 3 Channel (BGR from CV2). There are five blocks that have different number of layers: 1, 2, 8, 8, 4, respectively. I will create a model with these configurations.

In [16]:
model = darknet53((160, 320, 3), [1, 2, 8, 8, 4])









It is important to investigate the model architecture to ensure the correct configuration. In my case, I found Lambda layer was missing, by looking at the architecture below. 

In [15]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 160, 320, 3)  0                                            
__________________________________________________________________________________________________
lambda_1 (Lambda)               (None, 160, 320, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 160, 320, 32) 896         lambda_1[0][0]                   
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 160, 320, 32) 128         conv2d_1[0][0]                   
__________________________________________________________________________________________________
leaky_re_l

## Training

In the pipeline, I incorporated two callbacks - saving the best weights and early stopping callbacks.

In [10]:
def training_pipeline(data, model, learning_rate, filepath, bs, epoch):
    save_best = ModelCheckpoint(filepath, monitor='val_loss', verbose=0, save_best_only=True, 
                    save_weights_only=False, mode='min', period=1)
    early_stopping = EarlyStopping(monitor='val_loss', min_delta=0.005, patience=2, verbose=0, 
                                   mode='min', baseline=None, restore_best_weights=False)
    
    model.compile(loss='mean_squared_error', optimizer=Adam(lr=learning_rate))
    step_train = len(X_train)//bs+1
    step_valid = len(X_valid)//bs+1
    
    #https://keras.io/models/sequential/
    model.fit_generator(generate_batch(X_train, y_train, bs), steps_per_epoch=step_train, epochs=epoch, verbose=1, callbacks=[save_best, early_stopping], 
                        validation_data=generate_batch(X_valid, y_valid, bs), validation_steps=step_valid, 
                        class_weight=None, max_queue_size=10, workers=0, use_multiprocessing=False, shuffle=True, initial_epoch=0)

I need to define certain hyperparameters here, Learning rate, the number of Epoch and Batch size. 

With a number of experiment, I found that Learning rate of 0.01 for a start is OK and Batch size of 8 is maximum I can pass on with my local environment. In addition, 2 Epochs are sufficient with Learning rate of 0.01.

In [11]:
lr = 0.01
epoch = 1

In [11]:
bs = 8

In [19]:
training_pipeline(data, model, lr, os.path.join(data_path, 'model_01.h5'), bs, epoch)


Epoch 1/1


In [12]:
model = load_model(os.path.join(data_path, 'model_01.h5'))










In [13]:
lr = 0.0001
epoch = 1

In [14]:
training_pipeline(data, model, lr, os.path.join(data_path, 'model_02.h5'), bs, epoch)

Epoch 1/1


In [22]:
model = load_model(os.path.join(data_path, 'model_02.h5'))

In [21]:
lr = 0.00001
epoch = 1

In [22]:
training_pipeline(data, model, lr, os.path.join(data_path, 'model_03.h5'), bs, epoch)

Epoch 1/1


## Result