In [40]:
import pandas as pd
from pandas import DataFrame, Series
import numpy as np
import random
from keras.models import load_model, Sequential, Model
from keras.layers import Cropping2D
import cv2
import os
import socket
import scipy
from sklearn import preprocessing

In [41]:
is_AWS = False if 'Macbook' in socket.gethostname() else True

In [42]:
from keras.models import Sequential
from keras.layers.core import Flatten, Dense, Dropout, Lambda
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import SGD
import cv2, numpy as np

def Model(weights_path=None, dropout=0.5, dropout_level=1, orig = True, discrete=False):
    if orig:
        model = Sequential()
                
        # Take 2x2 stride on the input to reduce dimensionality
        model.add(Lambda(lambda x: x[::2, ::2, :], input_shape=(160, 320, 3), output_shape=(80, 160, 3))) #(3, 80, 160)
        
        # Zero pad the width
        model.add(ZeroPadding2D(padding=(0, 20))) #(3, 80, 200)
        
        # Crop the height
        model.add(Cropping2D(cropping=((80-66, 0), (0, 0)))) #(3, 66, 200)
        
        # Normalize
        model.add(Lambda(lambda x: (x / 255.0) - 0.5, output_shape=(66, 200, 3)))
        
        # Original network
        model.add(Dropout(dropout)) if dropout_level >= 2 else None
        model.add(Convolution2D(24, 5, 5, border_mode='valid', subsample=(2,2), activation='relu')) #(24, 31, 98)
        model.add(Dropout(dropout)) if dropout_level >= 2 else None
        model.add(Convolution2D(36, 5, 5, border_mode='valid', subsample=(2,2), activation='relu')) #(36, 14, 47)
        model.add(Dropout(dropout)) if dropout_level >= 2 else None
        model.add(Convolution2D(48, 5, 5, border_mode='valid', subsample=(2,2), activation='relu')) #(48, 5, 22)
        model.add(Dropout(dropout)) if dropout_level >= 2 else None
        model.add(Convolution2D(64, 3, 3, border_mode='valid', subsample=(1,1), activation='relu')) #(64, 3, 20)
        model.add(Dropout(dropout)) if dropout_level >= 2 else None
        model.add(Convolution2D(64, 3, 3, border_mode='valid', subsample=(1,1), activation='relu')) #(64, 1, 18)
        model.add(Dropout(dropout)) if dropout_level >= 2 else None
        model.add(Flatten())
        model.add(Dense(100, activation='relu'))
        model.add(Dropout(dropout)) if dropout_level >= 1 else None
        model.add(Dense(50, activation='relu'))
        model.add(Dropout(dropout)) if dropout_level >= 1 else None

        if not discrete:
            model.add(Dense(10, activation='relu'))
            model.add(Dropout(dropout)) if dropout_level >= 1 else None
            model.add(Dense(1, activation='hard_sigmoid'))
        else:
            model.add(Dense(10, activation='softmax'))
    else:
        model = Sequential()
        model.add(Lambda(lambda x: (x / 255.0) - 0.5, input_shape=(160, 320, 3), output_shape=(160, 320, 3)))
        model.add(Dropout(dropout)) if dropout_level >= 2 else None
        model.add(Convolution2D(24, 5, 5, border_mode='valid', subsample=(3,3), activation='relu', init='normal')) #(24, 52, 106)
        model.add(Dropout(dropout)) if dropout_level >= 2 else None
        model.add(Convolution2D(36, 5, 5, border_mode='valid', subsample=(2,2), activation='relu', init='normal')) #(36, 24, 51)
        model.add(Dropout(dropout)) if dropout_level >= 2 else None
        model.add(Convolution2D(48, 5, 5, border_mode='valid', subsample=(2,2), activation='relu', init='normal')) #(48, 10, 24)
        model.add(Dropout(dropout)) if dropout_level >= 2 else None
        model.add(Convolution2D(64, 3, 3, border_mode='valid', subsample=(1,1), activation='relu', init='normal')) #(64, 8, 22)
        model.add(Dropout(dropout)) if dropout_level >= 2 else None
        model.add(Convolution2D(64, 3, 3, border_mode='valid', subsample=(1,1), activation='relu', init='normal')) #(64, 6, 20)
        model.add(Dropout(dropout)) if dropout_level >= 2 else None
#         model.add(Convolution2D(64, 3, 3, border_mode='valid', subsample=(1,1), activation='relu', init='normal')) #(64, 4, 18)
#         model.add(Dropout(dropout)) if dropout_level >= 2 else None
        model.add(Flatten())
        model.add(Dense(100, activation='relu', init='normal'))
        model.add(Dropout(dropout)) if dropout_level >= 1 else None
        model.add(Dense(50, activation='relu', init='normal'))
        model.add(Dropout(dropout)) if dropout_level >= 1 else None
        if not discrete:
            model.add(Dense(20, activation='relu', init='normal'))
            model.add(Dropout(dropout)) if dropout_level >= 1 else None
            model.add(Dense(1, activation='tanh', init='normal'))
        else:
            model.add(Dense(10, activation='softmax', init='normal'))

    if weights_path:
        model.load_weights(weights_path, by_name=True)

    return model

In [43]:
if is_AWS:
    track1_dir = '/home/carnd/Dropbox/udacity-data/track1'
else:
    track1_dir = '/Users/macbook/Development/personal/udacity-car/CarND-Behavioral-Cloning-P3/track1'

try:
    folders_to_exclude = pd.read_csv('to_exclude.csv', header=None, names=['Index', 'Name'])['Name'].tolist()
    model = load_model('model.h5')
except:
    folders_to_exclude = []
    model = None
    
folders_to_exclude += ['.DS_Store']

track1_data_dirs = [x for x in os.listdir(track1_dir) if x not in folders_to_exclude]
print(track1_data_dirs)

track1_data_dirs = [track1_dir + '/' + x for x in track1_data_dirs]

driving_log_df = None

for data_dir in track1_data_dirs:
    df = pd.read_csv(data_dir + "/driving_log.csv", header=None, names=["center","left","right","steering","throttle","brake","speed"])

    cols = ['center', 'left', 'right']
    for col in cols:
        df[col] = df[col].str.strip()
        df[col] = df[col].str.split("/").apply(lambda x: x[-1])
    df[['center', 'left', 'right']] = data_dir + "/IMG/" + df[['center', 'left', 'right']]
    
    if driving_log_df is None:
        driving_log_df = df
    else:
        driving_log_df = pd.concat([driving_log_df, df])

print("Length: ", len(driving_log_df))


driving_log_df.head()

['data_download']
Length:  8036


Unnamed: 0,center,left,right,steering,throttle,brake,speed
0,/Users/macbook/Development/personal/udacity-ca...,/Users/macbook/Development/personal/udacity-ca...,/Users/macbook/Development/personal/udacity-ca...,0.0,0.0,0.0,22.14829
1,/Users/macbook/Development/personal/udacity-ca...,/Users/macbook/Development/personal/udacity-ca...,/Users/macbook/Development/personal/udacity-ca...,0.0,0.0,0.0,21.87963
2,/Users/macbook/Development/personal/udacity-ca...,/Users/macbook/Development/personal/udacity-ca...,/Users/macbook/Development/personal/udacity-ca...,0.0,0.0,0.0,1.453011
3,/Users/macbook/Development/personal/udacity-ca...,/Users/macbook/Development/personal/udacity-ca...,/Users/macbook/Development/personal/udacity-ca...,0.0,0.0,0.0,1.438419
4,/Users/macbook/Development/personal/udacity-ca...,/Users/macbook/Development/personal/udacity-ca...,/Users/macbook/Development/personal/udacity-ca...,0.0,0.0,0.0,1.418236


In [44]:
def get_next_image_generator(df, position = 'center', offset = 0.2):
    for idx, image_path in enumerate(df[position]):
        img = cv2.imread(image_path)
        yield img


In [45]:
tempgen = get_next_image_generator(driving_log_df)
sample = next(tempgen)
print("Dimension of image: H x W X D = ", sample.shape)
print("# of images: ", len(driving_log_df))

print("Steering range: Min=", np.min(driving_log_df['steering']), " , Max=", np.max(driving_log_df['steering']))
print("Throttle range: Min=", np.min(driving_log_df['throttle']), " , Max=", np.max(driving_log_df['throttle']))
print("Brake range: Min=", np.min(driving_log_df['brake']), " , Max=", np.max(driving_log_df['brake']))
print("Speed range: Min=", np.min(driving_log_df['speed']), " , Max=", np.max(driving_log_df['speed']))

print("image Min: ", np.min(sample))
print("image Max: ", np.max(sample))
#sample

Dimension of image: H x W X D =  (160, 320, 3)
# of images:  8036
Steering range: Min= -0.9426954  , Max= 1.0
Throttle range: Min= 0.0  , Max= 0.9855326
Brake range: Min= 0.0  , Max= 1.0
Speed range: Min= 0.5024896  , Max= 30.70936
image Min:  0
image Max:  255


In [46]:
def offset_steering(df, offset):
    df[df['target'] == 'left']['steering'] = df[df['target'] == 'left']['steering'] + offset
    df[df['target'] == 'right']['steering'] = df[df['target'] == 'right']['steering'] - offset
    return df

def filter_by_steering(df, min_angle):
    return df[np.abs(df['steering']) >= min_angle]

def append_mirrored_data(df):
    dfMirror = df.copy(deep=True)
    dfMirror['mirror'] = True
    dfMirror['steering'] *= -1
    df = pd.concat([df, dfMirror])
    return df

def set_position_targets(df, position):
    if position == 'all':
        dfLeft = df.copy(deep=True)
        dfLeft['target'] = 'left'
        dfCenter = df.copy(deep=True)
        dfCenter['target'] = 'center'
        dfRight = df.copy(deep=True)
        dfRight['target'] = 'right'
        df = pd.concat([dfLeft, dfCenter, dfRight])
    else:
        df['target'] = position 
    return df
    
def degrees_to_proportion(val):
    return val / 25.
    
class MyMinMaxScaler():
    def __init__(self, new_range=(0,1)):
        self.new_min = new_range[0]
        self.new_max = new_range[1]
        
    def fit(self, inputs):
        self.min = np.min(inputs)
        self.max = np.max(inputs)
        
    def transform(self, inputs):
        return (inputs - self.min) / (self.max - self.min) * (self.new_max - self.new_min) + self.new_min
    
def get_next_feature(df, batch_size = 10, mode = 'train', position = 'center', offset = 5., val_portion = 0.2, include_mirror=True, steering_multiplier=1.0, min_angle=1.5):
    total_len = len(df)
    val_len = int(val_portion * total_len)
    train_len = total_len - val_len
    
    offset = degrees_to_proportion(offset)
    min_angle = degrees_to_proportion(min_angle)

    if mode == "train":
        df = df[:train_len]
    else: #Validation set
        df = df[train_len:]
        position = 'center' #Only use center data
    
    min_max_scaler = MyMinMaxScaler((-1,1))
    #min_max_scaler = preprocessing.MinMaxScaler((0,1))
    min_max_scaler.fit(df['steering'])
    df['steering'] = min_max_scaler.transform(df['steering'])
    offset, min_angle = min_max_scaler.transform(np.array([offset, min_angle]))
    
    df = set_position_targets(df, position)
    df = offset_steering(df, offset)
    df = filter_by_steering(df, min_angle)
            
    df['mirror'] = False
    if include_mirror:
        df = append_mirrored_data(df)
    
    
    #Shuffle
    df = df.sample(frac=1).reset_index(drop=True)
    df = df.sample(frac=1).reset_index(drop=True)
    
    row = df.iloc[0]
    sample_image = cv2.imread(row[row['target']])

    image_size = sample_image.shape

    inputs = np.zeros([batch_size, *image_size]) #length of prediction output
    targets = np.zeros([batch_size])
    
    count = 0
    
    while(True):
        for idx in range(len(df)):
            row = df.iloc[idx]
            image_path = row[row['target']]
            img = cv2.imread(image_path)
            if row['mirror']:
                img = img[:,::-1,:]
                
            img = img[np.newaxis, :, :, :]
    
            inputs[count] = img
            targets[count] = row['steering']
            
            targets[count] *= steering_multiplier
            
            count += 1
            if count == batch_size:
                yield inputs, targets
                inputs = np.zeros([batch_size, *image_size])
                targets = np.zeros([batch_size])
                count = 0
                
def get_next_feature_discrete(df, batch_size = 10, mode = 'train', position = 'center', offset = 0.1, val_portion = 0.2, include_mirror=True, steering_multiplier=1.0):
    gen = get_next_feature(df, batch_size, mode, position, offset, val_portion, include_mirror, steering_multiplier)
    while True:
        X, y = next(gen)
        
        # y is continuous between -1 and 1. Bucket into 10 categories.
        bin_boundaries = scipy.linspace(-1, 1, 11)
        bin_boundaries[-1] +=1 # To include absolutes
        bin_boundaries[0] -=1 # To include absolutes
        
        y_discrete = np.digitize(y, bin_boundaries)
        
        

In [47]:
# Define the model

#model = Model(dropout=0.7, dropout_level=1, orig = False, discrete=False)
if model is None:
    model = Model(dropout=0.7, dropout_level=1, orig = False, discrete=False)


In [48]:
model.compile(optimizer='Nadam', loss='mean_squared_error')

# train model
EPOCHS = 5
BATCH_SIZE = 300
OFFSET = 4. # 0.08-0.25 from forums. 4 degrees = 0.16
VAL_PORTION = 0.1
INCLUDE_MIRROR = True
STEERING_MULTIPLIER = 1.0
MIN_ANGLE=0.15 #1.0

# Train on all the data
# position = 'all'
position = 'all'

train_generator_all = get_next_feature(driving_log_df, 10, 'train', position, OFFSET, VAL_PORTION, INCLUDE_MIRROR, STEERING_MULTIPLIER, MIN_ANGLE)
validation_generator_all = get_next_feature(driving_log_df, 10, 'val', position, OFFSET, VAL_PORTION, INCLUDE_MIRROR, STEERING_MULTIPLIER, MIN_ANGLE)
model.fit_generator(train_generator_all, BATCH_SIZE, EPOCHS, verbose=2, validation_data=validation_generator_all, nb_val_samples=BATCH_SIZE/3)

# driving_log_df



Epoch 1/5


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas

20s - loss: 0.0192 - val_loss: 0.2495
Epoch 2/5
14s - loss: 0.0190 - val_loss: 0.2190
Epoch 3/5
13s - loss: 0.0185 - val_loss: 0.2254
Epoch 4/5
14s - loss: 0.0143 - val_loss: 0.2521
Epoch 5/5
13s - loss: 0.3581 - val_loss: 1.2888


<keras.callbacks.History at 0x1211cd160>

In [49]:
# # Force learn problem areas
# if is_AWS:
#     track1_dir = '/home/carnd/Dropbox/udacity-data/track1'
# else:
#     track1_dir = '/Users/macbook/Development/personal/udacity-car/CarND-Behavioral-Cloning-P3/track1'

# folders_to_include = ['trial4-startfix']
# track1_data_dirs = [track1_dir + '/' + x for x in folders_to_include]

# driving_log_df = None

# for data_dir in track1_data_dirs:
#     df = pd.read_csv(data_dir + "/driving_log.csv", header=None, names=["center","left","right","steering","throttle","brake","speed"])

#     cols = ['center', 'left', 'right']
#     for col in cols:
#         df[col] = df[col].str.strip()
#         df[col] = df[col].str.split("/").apply(lambda x: x[-1])
#     df[['center', 'left', 'right']] = data_dir + "/IMG/" + df[['center', 'left', 'right']]
    
#     if driving_log_df is None:
#         driving_log_df = df
#     else:
#         driving_log_df = pd.concat([driving_log_df, df])

# # # shuffle
# # np.random.seed(42)
# # driving_log_df = driving_log_df.reindex(np.random.permutation(driving_log_df.index), copy=False)

# train_generator_all = get_next_feature(driving_log_df, 10, 'train', position, OFFSET, VAL_PORTION, INCLUDE_MIRROR)
# validation_generator_all = get_next_feature(driving_log_df, 10, 'val', position, OFFSET, VAL_PORTION, INCLUDE_MIRROR)
# model.fit_generator(train_generator_all, BATCH_SIZE, EPOCHS, verbose=2, validation_data=validation_generator_all, nb_val_samples=BATCH_SIZE/3)


In [50]:
model.save('modeltest.h5')

In [51]:
#model.save('model.h5')

In [52]:
#Series([x.split("/")[-1] for x in track1_data_dirs], name="done_to_exclude").to_csv("to_exclude.csv")