# Create Validation Set
First, we move 2 drivers out of the training folder so that we can use them in validation.

In [1]:
path = 'data/statefarm'
trainpath = path + '/train'
validpath = path + '/valid'

In [13]:
import os
for i in range(0, 10):
    cpath = validpath + '/c%d' % i
    if not os.path.exists(cpath):
        os.makedirs(cpath)

In [29]:
import csv
drivers = dict()
with open(path + '/driver_imgs_list.csv') as driverlist:
    reader = csv.reader(driverlist)
    for driver, classname, image in reader:
        if not drivers.has_key(driver):
            drivers[driver] = list()
        drivers[driver].append(os.path.join(classname, image))

['p022', 'p049']


In [35]:
validdrivers = 2
driverimages = drivers.values()[0:validdrivers]
for images in driverimages:
    for image in images:
        if os.path.exists(os.path.join(trainpath, image)):
            os.rename(os.path.join(trainpath, image), os.path.join(validpath, image))

In [10]:
samplepath = path+'/sample'
import os, shutil

def copy_samples(subpath, sample_subpath):
    if not os.path.exists(sample_subpath):
        os.makedirs(sample_subpath)
    for i in range(0, 10):
        path_i = '/c%d' % i
        subpath_i = subpath + path_i

        sample_subpath_i = sample_subpath + path_i
        if not os.path.exists(sample_subpath_i):
            os.makedirs(sample_subpath_i)

        samplefiles = os.listdir(subpath_i)
        for image_path in samplefiles[0:10]:
            shutil.copyfile(os.path.join(subpath_i, image_path), os.path.join(sample_subpath_i, image_path))
            
sample_train_path = samplepath + '/train'
copy_samples(trainpath, sample_train_path)

sample_valid_path = samplepath + '/valid'
copy_samples(validpath, sample_valid_path)

# Build the VGG16 Model

In [2]:
download_path = 'http://files.fast.ai/models/'

In [14]:
from keras import backend as K
from keras.models import Sequential
from keras.utils.data_utils import get_file
from keras.layers.core import Lambda, Flatten, Dense, Dropout
from keras.layers.convolutional import Convolution2D, MaxPooling2D
import numpy as np

vgg_mean = np.array([123.68, 116.779, 103.939], dtype=np.float32).reshape((3, 1, 1))
def vgg_preprocess(x):
    # subtracts the mean so that we get a 0-centered value
    x = x - vgg_mean
    
    # reverses the axis, since most pretrained data comes from OpenCV, which uses BGR rather than RGB
    return x[:, ::-1]

def vgg_convblock(model, layers, filters):
    for _ in xrange(0, layers):
        model.add(Convolution2D(filters, 3, 3, border_mode='same', activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

model = Sequential()
model.add(Lambda(vgg_preprocess, input_shape=(3, 224, 224), output_shape=(3,224,224)))

vgg_convblock(model, 2, 64)
vgg_convblock(model, 2, 128)
vgg_convblock(model, 3, 256)
vgg_convblock(model, 3, 512)
vgg_convblock(model, 3, 512)

model.add(Flatten())
model.add(Dense(4096, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(4096, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1000, activation='softmax'))

weights_file = get_file('vgg16.h5', download_path+'vgg16.h5', cache_subdir='models')
model.load_weights(weights_file)
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
lambda_4 (Lambda)                (None, 3, 224, 224)   0           lambda_input_4[0][0]             
____________________________________________________________________________________________________
convolution2d_40 (Convolution2D) (None, 64, 224, 224)  1792        lambda_4[0][0]                   
____________________________________________________________________________________________________
convolution2d_41 (Convolution2D) (None, 64, 224, 224)  36928       convolution2d_40[0][0]           
____________________________________________________________________________________________________
maxpooling2d_16 (MaxPooling2D)   (None, 64, 112, 112)  0           convolution2d_41[0][0]           
___________________________________________________________________________________________

# Finetune the model

In [15]:
from keras.layers.normalization import BatchNormalization

for layer in model.layers[0:-5]:
    layer.trainable = False

model.pop()
model.add(BatchNormalization())
model.add(Dense(10, activation='softmax'))
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
lambda_4 (Lambda)                (None, 3, 224, 224)   0           lambda_input_4[0][0]             
____________________________________________________________________________________________________
convolution2d_40 (Convolution2D) (None, 64, 224, 224)  1792        lambda_4[0][0]                   
____________________________________________________________________________________________________
convolution2d_41 (Convolution2D) (None, 64, 224, 224)  36928       convolution2d_40[0][0]           
____________________________________________________________________________________________________
maxpooling2d_16 (MaxPooling2D)   (None, 64, 112, 112)  0           convolution2d_41[0][0]           
___________________________________________________________________________________________

In [16]:
from keras.optimizers import Adam
model.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [25]:
from keras.preprocessing.image import ImageDataGenerator
def batch_gen(path, class_mode='categorical'):
    return ImageDataGenerator().flow_from_directory(path, target_size=(224, 224), batch_size=8, class_mode=class_mode)

In [17]:
train_gen = batch_gen(trainpath)
valid_gen = batch_gen(validpath)

model.fit_generator(train_gen, samples_per_epoch=train_gen.nb_sample, nb_epoch=5, 
                    validation_data=valid_gen, nb_val_samples=valid_gen.nb_sample)

Found 20180 images belonging to 10 classes.
Found 2244 images belonging to 10 classes.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x99839dd8>

In [22]:
import os
models_path = path+'/models/'
if not os.path.exists(models_path):
    os.makedirs(models_path)
model.save_weights(models_path+'vgg16_bn.h5')

# Generate submission

In [65]:
testpath = path + '/test'
test_gen = batch_gen(testpath, class_mode=None)

Found 79726 images belonging to 1 classes.


In [66]:
predictions = model.predict_generator(test_gen, test_gen.nb_sample)

In [67]:
for _, _, files in sorted(os.walk(testpath+'/unknown'), key=lambda tpl: tpl[0]):
    with open(path+'/submission.csv', 'wb') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['img','c0','c1','c2','c3','c4','c5','c6','c7','c8','c9'])
        for filename, file_predictions in zip(files, predictions):
            writer.writerow([filename] + file_predictions.tolist())