# Statefarm Distracted Driver Classification (using Keras)
## Satchel Grant

The goal of this notebook is to classify the statefarm distracted drivers using Keras instead of TensorFlow. I also will implement a generator for data feeding to reduce the memory consumption.

### Initial Imports

In [13]:
import numpy as np
import cv2
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import os
from sklearn.utils import shuffle
import scipy.misc as sci
import time
import sys
from PIL import Image

%matplotlib inline

def show_img(img):
    plt.imshow(img)
    plt.show()

### Read in Data

First I read in the file paths of each of the image files and create a parallel array to store the labels and then I shuffle the order of the data.


In [2]:
external_drive_path = '/Volumes/WhiteElephant/'
home_path = os.getcwd()
os.chdir(external_drive_path)

In [3]:
path = './statefarm_drivers/imgs/train'

def read_paths(path, no_labels=False):
    file_paths = []
    labels = []
    labels_to_nums = dict()
    for dir_name, subdir_list, file_list in os.walk(path):
        if len(subdir_list) > 0:
            label_types = subdir_list
            for i,subdir in enumerate(subdir_list):
                labels_to_nums[subdir] = i
        for img_file in file_list:
            if '.jpg' in img_file.lower():
                file_paths.append(os.path.join(dir_name,img_file))
                if no_labels: labels.append(img_file)
                else: labels.append(labels_to_nums[dir_name[-2:]])
    if no_labels: return file_paths, labels
    n_labels = len(label_types)
    return file_paths, labels, n_labels
    

file_paths, labels,n_labels = read_paths(path)
file_paths, labels = shuffle(file_paths, labels)
print("Number of data samples: " + str(len(file_paths)))

Number of data samples: 22424


In [4]:
def one_hot_encode(labels, n_classes):
    one_hots = []
    for label in labels:
        one_hot = [0]*n_classes
        one_hot[label] = 1
        one_hots.append(one_hot)
    return np.array(one_hots,dtype=np.float32)

labels = one_hot_encode(labels,n_labels)

The following cell contains some data augmentation functions to increase the amount of useable data. It includes rotations, translations, and combinations of the two. The code is probably overly verbose, but it works and doesn't do anything too tricky.

In [79]:
import random

def rotate(img, angle, ones):
    rot_img = sci.imrotate(img, angle).astype(np.float32)
    color_range = 255
    rand_filler = np.random.random(rot_img.shape).astype(np.float32)*color_range
    rot_img[ones[:,:,:]!=1] = rand_filler[ones[:,:,:]!=1]
    return rot_img

def translate(img, row_amt, col_amt):
    color_range = 255
    translation = np.random.random(img.shape).astype(img.dtype)*color_range
    if row_amt > 0:
        if col_amt > 0:
            translation[row_amt:,col_amt:] = img[:-row_amt,:-col_amt]
        elif col_amt < 0:
            translation[row_amt:,:col_amt] = img[:-row_amt,-col_amt:]
        else:
            translation[row_amt:,:] = img[:-row_amt,:]
    elif row_amt < 0:
        if col_amt > 0:
            translation[:row_amt,col_amt:] = img[-row_amt:,:-col_amt]
        elif col_amt < 0:
            translation[:row_amt,:col_amt] = img[-row_amt:,-col_amt:]
        else:
            translation[:row_amt,:] = img[-row_amt:,:]
    else:
        if col_amt > 0:
            translation[:,col_amt:] = img[:,:-col_amt]
        elif col_amt < 0:
            translation[:,:col_amt] = img[:,-col_amt:]
        else:
            return img.copy()
    return translation


def add_augmentations(paths, rot_angles=[10,-10], row_shift=5, col_shift=3):
    img = mpimg.imread(paths[0])
    ones = [sci.imrotate(np.ones_like(img),rot_angles[i]) for i in range(len(rot_angles))]
    for path in paths:
        img = mpimg.imread(path)
        for i,angle in enumerate(rot_angles):
            add_augmentation(img,path,angle,row_shift,col_shift,ones[i])

def add_augmentation(img,path,angle,row_shift,col_shift,ones):
    new_img = rotate(img,angle,ones)
    new_img = translate(new_img,random.randint(-row_shift,row_shift),random.randint(-col_shift,col_shift))
    new_img = new_img.astype(np.uint8)
    split_path = path.split('/')
    i = 1
    if angle < 0: i = 2
    split_path[-1] = 'augmented_'+ str(i)+"_"+ split_path[-1]
    new_path = '/'.join(split_path)
    jpeg = Image.fromarray(new_img)
    jpeg.save(new_path)

        

In [None]:
add_augmentations(file_paths, row_shift=20, col_shift=20)




Next I create a generator to read in the images in batches. This reduces the amount of memory required to deal with the entire dataset.

In [10]:
split_index = int(.75*len(file_paths))
X_train_paths, y_train = file_paths[:split_index], labels[:split_index]
X_valid_paths, y_valid = file_paths[split_index:], labels[split_index:]
batch_size = 128
train_steps_per_epoch = len(X_train_paths)//batch_size + 1
if len(X_train_paths) % batch_size == 0: train_steps_per_epoch = len(X_train_paths)//batch_size
valid_steps_per_epoch = len(X_valid_paths)//batch_size
resize_dims = (120,120)


def convert_images(paths, resize_dims, img_depth=3):
    images = []
    for i,path in enumerate(paths):
        img = mpimg.imread(path)
        images.append(sci.imresize(img, resize_dims))
    return np.array(images,dtype=np.float32)

def image_generator(file_paths, labels, batch_size, resize_dims=(120,120),testing=False,img_depth=3):
    while 1:
        for batch in range(0, len(file_paths), batch_size):
            images = convert_images(file_paths[batch:batch+batch_size],resize_dims,img_depth=img_depth)
            if testing: yield images
            else: 
                batch_labels = labels[batch:batch+batch_size]
                yield images, batch_labels


train_generator = image_generator(X_train_paths, y_train, batch_size)
valid_generator = image_generator(X_valid_paths, y_valid, batch_size)

### Keras Imports

In [7]:
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D, Dense, Input, concatenate, \
        Flatten, Dropout, Lambda
from keras.layers.normalization import BatchNormalization


Using TensorFlow backend.


### Model Architecture

The model consists of 4 convolutional stacks followed by 2 dense layers. I had good success with this model while predicting the required steering angle from an image of a track for a car to drive around a track in real time. It is also a lightweight model making it quick and easy to train.


In [8]:
stacks = []
conv_shapes = [(1,1),(3,3),(5,5)]
conv_depths = [8,10,10,10]
pooling_filter = (2,2)
pooling_stride = (2,2)
dense_shapes = [150,50,n_labels]

inputs = Input(shape=(resize_dims[0],resize_dims[1],3))
zen_layer = BatchNormalization()(inputs)

for shape in conv_shapes:
    stacks.append(Conv2D(conv_depths[0], shape, padding='same', activation='elu')(inputs))
layer = concatenate(stacks,axis=-1)
layer = BatchNormalization()(layer)
layer = MaxPooling2D(pooling_filter,strides=pooling_stride,padding='same')(layer)
layer = Dropout(0.05)(layer)

for i in range(1,len(conv_depths)):
    stacks = []
    for shape in conv_shapes:
        stacks.append(Conv2D(conv_depths[i],shape,padding='same',activation='elu')(layer))
    layer = concatenate(stacks,axis=-1)
    layer = BatchNormalization()(layer)
    layer = MaxPooling2D(pooling_filter,strides=pooling_stride, padding='same')(layer)

layer = Flatten()(layer)
layer = Dropout(0.5)(layer)

for i in range(len(dense_shapes)-1):
    layer = Dense(dense_shapes[i], activation='elu')(layer)
    layer = BatchNormalization()(layer)

outputs = Dense(dense_shapes[-1], activation='softmax')(layer)

### Training and Validation
The next cell trains the model using the adam optimizer and categorical_crossentropy. The adam optimizer is most efficient because it has specific learning rates for each parameter in the net and it uses momentum. Both of these techniques improves the efficiency of the training process.

I use the categorical_crossentropy loss function because this a good loss function for classification problems.

In [54]:
model = Model(inputs=inputs,outputs=outputs)
model.load_weights('model.h5')
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit_generator(train_generator, train_steps_per_epoch, epochs=2,
                    validation_data=valid_generator,validation_steps=valid_steps_per_epoch)
model.save('model.h5')


Epoch 1/2
Epoch 2/2


### Testing

In [None]:
path = './statefarm_drivers/imgs/test'
test_paths,test_labels = read_paths(path,no_labels=True)
print(str(len(test_paths))+' testing images')

In [None]:
del file_paths
del labels

In [None]:
pool = ThreadPool(processes=1)
test_divisions = 4
portion = len(test_paths)//test_divisions+1
async_result = pool.apply_async(convert_images,(test_paths[0*portion:portion*(0+1)],resize_dims))
count+=1
predictions = []
batch_size = 100

In [None]:
for i in range(1,test_divisions+1):
    base_time = time.time()
    print("Begin set")
    X_set = async_result.get()
    if i < test_divisions:
        async_result = pool.apply_async(convert_images,(test_paths[i*portion:portion*(i+1)],resize_dims))
    predictions.append(model.predict(X_set,batch_size=batch_size,verbose=0))
    print("\nExecution Time: " + str((time.time()-base_time)/60)+'min')

In [None]:
counter+=1
base_time = time.time()
batch_size = 50

print('Begin set ' + str(counter))
X_set = next(test_generator)
print('Running time: ' + str((time.time()-base_time)/60)+ 'min')

predictions.append(model.predict(X_set,batch_size=batch_size,verbose=0))

print("\nExecution Time: " + str((time.time()-base_time)/60)+'min')

In [None]:
total = 0
for prediction in predictions:
    
    total += len(prediction)
print(total)

In [None]:
backup = predictions

In [None]:
counter = 0
with open('./statefarm_drivers/submission.csv', 'w') as f:
    f.write('img,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9\n')
    for i,logit_group in enumerate(predictions):
        for j,logit in enumerate(logit_group):
            id_ = test_labels[counter]
            counter+=1 # I use a counter here because the size of the logit_groups changes
            f.write(id_+',')
            for k,element in enumerate(logit):
                if k == logit.shape[0]-1: f.write(str(element)+'\n')
                else: f.write(str(element)+',')

Got 1.8 log loss on private leader board