# Distracted Driver Detection

Attempting to finetune Resnet model to solve https://www.kaggle.com/c/state-farm-distracted-driver-detection.

In [17]:
import sys
sys.path.insert(0, './../..')
from importlib import reload
import pandas as pd
import utils; reload(utils)
from utils import *

K.set_image_dim_ordering('th')
# base_dir = '../sample/'
base_dir = '../'
batch_size = 64
output_size = 10
dropout = 0.85

In [7]:
gen = image.ImageDataGenerator()
gen_with_aug = image.ImageDataGenerator(rotation_range=15, 
                                        height_shift_range=0.05, 
                                        shear_range=0.1, 
                                        channel_shift_range=20, 
                                        width_shift_range=0.1)

batches = gen.flow_from_directory(base_dir + 'train/', 
                                  target_size=(224,224),
                                  class_mode='categorical', 
                                  shuffle=False, 
                                  batch_size=batch_size)
batches_aug = gen_with_aug.flow_from_directory(base_dir + 'train/', 
                                                   target_size=(224,224),
                                                   class_mode='categorical', 
                                                   shuffle=True, 
                                                   batch_size=batch_size)
val_batches = gen.flow_from_directory(base_dir + 'valid/',
                                     target_size=(224,224),
                                     class_mode='categorical',
                                     shuffle=False,
                                     batch_size=batch_size)
test_batches = gen.flow_from_directory(base_dir + 'test/',
                                     target_size=(224,224),
                                     class_mode='categorical',
                                     shuffle=False,
                                     batch_size=batch_size)

# Fragmented test batches for pseudo labeling
# test_batches_arr = []
# for i in range(4):
#     b = gen.flow_from_directory(base_dir + 'test' + str(i+1) + '/',
#                                      target_size=(224,224),
#                                      class_mode='categorical',
#                                      shuffle=False,
#                                      batch_size=batch_size)
#     test_batches_arr.append(b)

trn_labels = to_categorical(batches.classes)
val_labels = to_categorical(val_batches.classes)

Found 17850 images belonging to 10 classes.
Found 17850 images belonging to 10 classes.
Found 4574 images belonging to 10 classes.
Found 79726 images belonging to 1 classes.


## Convolutional pre-computing

In [None]:
cm = Resnet50((224,224), False).model # convolutional layers

batches.reset()
trn_features = cm.predict_generator(batches, steps(batches, batch_size), verbose=1)
save_array(base_dir + 'models/resnet_train_convlayer_features.bc', trn_features)

val_batches.reset()
val_features = cm.predict_generator(val_batches, steps(val_batches, batch_size), verbose=1)
save_array(base_dir + 'models/resnet_valid_convlayer_features.bc', val_features)

In [None]:
for i in range(4):
    b = test_batches_arr[i]
    b.reset()
    features = cm.predict_generator(b, steps(b, batch_size), verbose=1)
    save_array(base_dir + 'models/resnet_test' + str(i + 1) + '_convlayer_features.bc', features)

In [8]:
trn_features = load_array(base_dir + 'models/resnet_train_convlayer_features.bc')
val_features = load_array(base_dir + 'models/resnet_valid_convlayer_features.bc')

## Finetuning

In [9]:
def finetune_dense_layers(model, index):
    dm = resnet_fc_model(model, output_size, dropout=dropout)
    for l in dm.layers: l.trainable = True
    
    fit_with_features(dm, RMSprop(1e-3), 16, trn_features, trn_labels, val_features, val_labels, batch_size=batch_size)
    fit_with_features(dm, RMSprop(1e-4), 14, trn_features, trn_labels, val_features, val_labels, batch_size=batch_size)
    fit_with_features(dm, RMSprop(1e-5), 12, trn_features, trn_labels, val_features, val_labels, batch_size=batch_size)
    fit_with_features(dm, RMSprop(1e-6), 10, trn_features, trn_labels, val_features, val_labels, batch_size=batch_size)
    fit_with_features(dm, RMSprop(1e-7), 8, trn_features, trn_labels, val_features, val_labels, batch_size=batch_size)
    
    for i in range(6): model.layers[-6 + i].set_weights(dm.layers[i+1].get_weights())
    model.save_weights(base_dir + 'models/resnet_last' + str(index) + '.h5')
    
def finetune_w_valid_pseudo_labels(model, index):
    finetune_w_pseudo_labels(model, index, val_features, "resnet_pseudo_label")  
    
def finetune_w_test_pseudo_labels(model, index):
    for i in range(4):
        features = load_array(base_dir + 'models/resnet_test' + str(i+1) + '_convlayer_features.bc')
        finetune_w_pseudo_labels(model, index, features, "resnet_pseudo_label_w_test")
        
def finetune_w_pseudo_labels(model, index, features, log_id):
    dm = resnet_fc_model(model, output_size, dropout=dropout)
    for l in dm.layers: l.trainable = True
        
    pseudo_labels = dm.predict(features, batch_size=batch_size, verbose=1)
    comb_labels = np.concatenate([trn_labels, pseudo_labels])
    comb_features = np.concatenate([trn_features, features])

    fit_with_features(dm, RMSprop(1e-5), 10, comb_features, comb_labels, val_features, val_labels, batch_size=batch_size) #10
    fit_with_features(dm, RMSprop(1e-6), 8, comb_features, comb_labels, val_features, val_labels, batch_size=batch_size) #8
    fit_with_features(dm, RMSprop(1e-7), 6, comb_features, comb_labels, val_features, val_labels, batch_size=batch_size) #6

    for i in range(6): model.layers[-6 + i].set_weights(dm.layers[i+1].get_weights())
    model.save_weights(base_dir + 'models/' + log_id + str(index) + '.h5')
    
def finetune_dense_layers_with_aug(model, index):
    for i in range(len(model.layers)): model.layers[i].trainable = i >= len(model.layers) - 6
    fit_with_batches(model, RMSprop(1e-3), 5, batches_aug, val_batches, batch_size=batch_size)
    fit_with_batches(model, RMSprop(1e-4), 4, batches_aug, val_batches, batch_size=batch_size)
    fit_with_batches(model, RMSprop(1e-5), 3, batches_aug, val_batches, batch_size=batch_size)
    fit_with_batches(model, RMSprop(1e-6), 2, batches_aug, val_batches, batch_size=batch_size)
    fit_with_batches(model, RMSprop(1e-7), 1, batches_aug, val_batches, batch_size=batch_size)
    model.save_weights(base_dir + 'models/resnet_dense_pseudo_aug' + str(index) + '.h5')

## Training

In [None]:
# model = resnet(output_size, dropout=dropout)
# finetune_dense_layers(model, 1)
# finetune_w_valid_pseudo_labels(model, 1)
# finetune_w_test_pseudo_labels(model, 1)
finetune_dense_layers_with_aug(model, 1)
log_action("Finished training model")

Epoch 1/5

In [None]:
model = resnet(output_size)
model.load_weights(base_dir + 'models/resnet_dense_pseudo_aug0.h5')

## Submit to Kaggle

In [None]:
def do_clip(arr, mx): return np.clip(arr, (1-mx)/9, mx)

preds = model.predict_generator(test_batches, steps(test_batches, batch_size), verbose=1)
clipped_preds = do_clip(preds, 0.93)

classes = sorted(batches.class_indices, key=batches.class_indices.get)
submission = pd.DataFrame(clipped_preds, columns=classes)
submission.insert(0, 'img', [a[8:] for a in test_batches.filenames])
file_path = base_dir + 'submission.gz'
submission.to_csv(file_path, index=False, compression='gzip')
    
from IPython.display import FileLink
FileLink(file_path)

In [22]:
from IPython.display import FileLink
FileLink(file_path)

## Not tried

- Other architecture
- Ensembling