# Distracted Driver Predictions Using Fast AI
Use the Fast.AI library to quickly create an image recognition model with performance in the top 25% of the private leaderboard.

## 1 - Import Libraries

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [4]:
# import fast ai vision library
from fastai.vision import *

## 2 - Create Validation Set

In [None]:
# create validation set by removing drivers from the training set and putting their pictures in a validation set.
path = '/kaggle/input/state-farm-distracted-driver-detection/'
img_list = pd.read_csv(path + 'driver_imgs_list.csv')

# select a subset of the subjects for validation
valid_subjects = img_list.subject.sort_values().unique()[-4:]
# create new column identifying the subjects for validation
img_list['is_valid'] = img_list['subject'].isin(valid_subjects)

print("valid subjects: ", valid_subjects)
print(img_list[img_list['is_valid']==True].subject.count())

img_list['img_path'] = img_list.classname + '/' + img_list.img

valid_names = img_list[img_list['subject'].isin(valid_subjects)].img
valid_names = valid_names.to_list()

## 3 - Create Data Bunch

In [None]:
# apply standard image transformations except flipping the pictures.  
# The categories we are trying to predict can be specific to left hand / right hand
tfms = get_transforms(do_flip=False)

# create the data bunch
data = (ImageList.from_df(df=img_list, path = path + 'train/', cols='img_path' )
        #.filter_by_rand(.) #sample data for faster training.  be sure to remove sample before finalizing model
        .split_by_valid_func(lambda o: os.path.basename(o) in valid_names)
        .label_from_df(1)
        .transform(tfms=tfms)
        .add_test_folder(path + 'test/')
        .databunch(bs=32))

# output description of data
data

In [None]:
# review images from a batch

data.show_batch(3)

## 4 - Define CNN Model w/ Transfer Learning

In [None]:
# Used resnet34 because of memory errors.  Would have liked to try resnet50 or VGG16.

learn = cnn_learner(data, models.resnet34, metrics=accuracy, model_dir='/')

In [None]:
# find optimal learning rate

#learn.lr_find()
#learn.recorder.plot()

In [None]:
# for this model we are not going to train the resnet34 layers.  
# Instead we are going to go with the pretrained weights and apply the standard model head defined by Fast AI.
# you can review the layers of the model by calling "learn.layer_groups"

learn.freeze()

# unfreezing the layers may yeild better results, especially if the learning rates are differentiated for the layer groups.

In [None]:
# fit the model
# this takes several minutes to run on the kaggle GPU.
# You can run this cell multiple times until the validation error stops improving.

learn.fit_one_cycle(2, max_lr= .001)

## 5 - Review Errors

In [None]:
# interp = ClassificationInterpretation.from_learner(learn)

In [None]:
# interp.plot_top_losses(6)

## 6 - Create Predictions for Test Set
To get the best results you would want to retrain the model using all of the training data.

In [None]:
# preds = learn.get_preds(DatasetType.Test)

In [None]:
# labels = pd.DataFrame(data.test_ds.x.items, columns=['img'])
# labels.img = labels.img.astype(str)
# labels = labels.img.str.rsplit('/', 1, expand=True)
# labels.drop(0, axis=1, inplace=True)
# labels.rename(columns={1: 'img'}, inplace=True)

In [None]:
# columns = data.classes

# submission = pd.DataFrame(preds[0].numpy(), columns=columns, index=labels.img)
# submission.reset_index(inplace=True)
# submission.to_csv('submission.csv', index=False)