# State Farm Distracted Driver Detection

In [18]:
# %pip install ultralytics
# %pip install split-folders
# %pip install -U ipywidgets


Note: you may need to restart the kernel to use updated packages.
Collecting split-folders
  Downloading split_folders-0.5.1-py3-none-any.whl.metadata (6.2 kB)
Downloading split_folders-0.5.1-py3-none-any.whl (8.4 kB)
Installing collected packages: split-folders
Successfully installed split-folders-0.5.1
Note: you may need to restart the kernel to use updated packages.


Note: you may need to restart the kernel to use updated packages.


In [2]:
import csv
import os
from glob import glob
from shutil import copyfile
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers.legacy import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, BatchNormalization, Dropout
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.vgg16 import VGG16
from ultralytics import YOLO

## Loading the data

In [3]:
data = {}

with open('../datasets/state-farm-distracted-driver-detection/driver_imgs_list.csv') as file:
    read_file = csv.reader(file)
    read_file = list(read_file)
    
    for row in read_file[1:]:
        key = row[1]
        if key in data:
            data[key].append(row[2])
        else:
            data[key] = [row[2]]

In [4]:
data

{'c0': ['img_44733.jpg',
  'img_72999.jpg',
  'img_25094.jpg',
  'img_69092.jpg',
  'img_92629.jpg',
  'img_3370.jpg',
  'img_67639.jpg',
  'img_58560.jpg',
  'img_35779.jpg',
  'img_10012.jpg',
  'img_70354.jpg',
  'img_54617.jpg',
  'img_26684.jpg',
  'img_82409.jpg',
  'img_5585.jpg',
  'img_19029.jpg',
  'img_40930.jpg',
  'img_31547.jpg',
  'img_87373.jpg',
  'img_10627.jpg',
  'img_66355.jpg',
  'img_97714.jpg',
  'img_37344.jpg',
  'img_54848.jpg',
  'img_59964.jpg',
  'img_70675.jpg',
  'img_43317.jpg',
  'img_13585.jpg',
  'img_63064.jpg',
  'img_91058.jpg',
  'img_79799.jpg',
  'img_18198.jpg',
  'img_55108.jpg',
  'img_87789.jpg',
  'img_11582.jpg',
  'img_24152.jpg',
  'img_28404.jpg',
  'img_8868.jpg',
  'img_14949.jpg',
  'img_41276.jpg',
  'img_70611.jpg',
  'img_42895.jpg',
  'img_87995.jpg',
  'img_40990.jpg',
  'img_68866.jpg',
  'img_71265.jpg',
  'img_90102.jpg',
  'img_30954.jpg',
  'img_3570.jpg',
  'img_59000.jpg',
  'img_48323.jpg',
  'img_13073.jpg',
  'img_806

In [5]:
classes_list = list(data.keys())
classes_list

['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9']

In [6]:
dataset_folder = '../datasets/state-farm-distracted-driver-detection/imgs/'

train_dir = os.path.join(dataset_folder, 'train/')
test_dir = os.path.join(dataset_folder, 'test/')

In [7]:
print('Number of images in the training dataset : ', str(len(glob(train_dir+'*/*'))))
print('Number of images in the testing dataset : ', str(len(glob(test_dir+'*'))))

Number of images in the training dataset :  22424
Number of images in the testing dataset :  79726


In [None]:
# Need to separate the training set and the validation set.

### Writing helper function for creating directories for training set and validation set

In [8]:
def remove_directory(path):
    for root, dirs, files in os.walk(path, topdown = False):
        for name in files:
            file_path = os.path.join(root, name)
            os.remove(file_path)
        for name in dirs:
            dir_path = os.path.join(root, name)
            os.rmdir(dir_path)
    os.rmdir(path)

In [9]:
def create_directories(paths, subfolders):
    for path in paths:
        if os.path.exists(path):
            remove_directory(path)
        
        for folder in subfolders:
            subfolder_path = os.path.join(path, folder)
            os.makedirs(subfolder_path)

In [10]:
paths = ['../datasets/state-farm-distracted-driver-detection/train',
         '../datasets/state-farm-distracted-driver-detection/val']

In [11]:
subfolders = classes_list

In [12]:
create_directories(paths, subfolders)

In [13]:
split_size = 0.8

for clas, images in data.items():
    # print(len(images))
    train_size = int(split_size*len(images))
    train_images = images[:train_size]
    val_images = images[train_size:]
    for image in train_images:
        source = os.path.join(train_dir, clas, image)
        # print(os.path.exists(source))
        dest = os.path.join(paths[0], clas, image)
        copyfile(source, dest)
    
    for image in val_images:
        source = os.path.join(train_dir, clas, image)
        dest = os.path.join(paths[1], clas, image)
        copyfile(source, dest)

## Creating Image data generator

In [14]:
train_dir = '../datasets/state-farm-distracted-driver-detection/train'
val_dir = '../datasets/state-farm-distracted-driver-detection/val'

train_datagen = ImageDataGenerator(rescale = 1.0/255,
                                  rotation_range = 30,
                                  width_shift_range = 0.2,
                                  height_shift_range = 0.2,
                                  zoom_range = 0.2,
                                  shear_range = 0.05,
                                  fill_mode = "nearest")

train_generator = train_datagen.flow_from_directory(
                                                        train_dir,
                                                        target_size = (256, 256),
                                                        class_mode = 'categorical',
                                                        shuffle = True,
                                                        batch_size = 32
                                                    )

val_datagen = ImageDataGenerator(rescale = 1.0/255)
val_generator = val_datagen.flow_from_directory(
                                                        val_dir,
                                                        target_size = (256, 256),
                                                        class_mode = 'categorical',
                                                        shuffle = True,
                                                        batch_size = 32
                                                )

Found 17934 images belonging to 10 classes.
Found 4490 images belonging to 10 classes.


In [15]:
es = EarlyStopping(monitor = 'val_acc', patience = 4, min_delta = 0.001)

## First Model -> Dense Model

In [None]:
model1 = tf.keras.models.Sequential([
    Flatten(input_shape = (256, 256, 3)),
    Dense(16, activation = 'relu'),
    BatchNormalization(),
    Dense(32, activation = 'relu'),
    BatchNormalization(),
    Dense(64, activation = 'relu'),
    BatchNormalization(),
    Dense(128, activation = 'relu'),
    BatchNormalization(),
    Dense(256, activation = 'relu'),
    BatchNormalization(),
    Dense(512, activation = 'relu'),
    BatchNormalization(),
    Dense(1024, activation = 'relu'),
    BatchNormalization(),
    Dense(10, activation = 'softmax')
])

In [None]:
model1.compile(optimizer = Adam(), loss = 'categorical_crossentropy', metrics = ['acc'])

In [None]:
model1.summary()

In [None]:
model1.fit(train_generator,
                   epochs = 20,
                   verbose = 1,
                   validation_data = val_generator,
                   callbacks = [es])

## Second Model -> CNN Model

In [None]:
model2 = tf.keras.models.Sequential([
    Conv2D(32, (3, 3), activation = 'relu', input_shape = (256, 256, 3)),
    Conv2D(32, (3, 3), activation = 'relu'),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation = 'relu'),
    Conv2D(64, (3, 3), activation = 'relu'),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation = 'relu'),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation = 'relu'),
    BatchNormalization(),
    Flatten(),
    Dense(512, activation = 'relu'),
    BatchNormalization(),
    Dense(256, activation = 'relu'),
    BatchNormalization(),
    Dense(256, activation = 'relu'),
    BatchNormalization(),
    Dense(128, activation = 'relu'),
    BatchNormalization(),
    Dense(10, activation = 'softmax')
])

In [None]:
model2.compile(optimizer = Adam(), loss = 'categorical_crossentropy', metrics = ['acc'])
model2.summary()

In [None]:
model2.fit(train_generator,
          epochs = 20,
          verbose = 1,
          validation_data = val_generator,
          callbacks = [es])

## Third Model -> VGG16

In [None]:
pretrained_model = VGG16(weights = 'imagenet', include_top = False, input_shape = (256, 256, 3))
pretrained_model.summary()

In [None]:
# for layer in pretrained_model.layers[:-5]:
#     layer.trainable = False
    

In [None]:
last_layer = pretrained_model.get_layer('block5_pool')
last_output = last_layer.output

model3 = tf.keras.models.Sequential([ 
    pretrained_model,
    Flatten(),
    Dense(2048, activation = 'relu'),
    Dense(1024, activation = 'relu'),
    Dropout(0.2),
    Dense(256, activation = 'relu'),
    Dropout(0.2),
    Dense(10, activation = 'softmax')
    
])

In [None]:
model3.compile(optimizer = Adam(learning_rate = 0.005), loss = 'categorical_crossentropy', metrics = ['acc'])
model3.summary()

In [None]:
model3.fit(train_generator,
          steps_per_epoch = 250,
          epochs = 20,
          verbose = 1,
          validation_steps = 50,
          validation_data = val_generator,
          callbacks = [es])

## Fourth Model -> ResNet50

In [None]:
pretrained_model = ResNet50(weights = 'imagenet', include_top = False, input_shape = (256, 256, 3))

In [None]:
for layer in pretrained_model.layers[:-3]:
    layer.trainable = False

In [None]:
model4 = tf.keras.models.Sequential([ 
    pretrained_model,
    Flatten(),
    Dense(512, activation = 'relu'),
    BatchNormalization(),
    Dense(256, activation = 'relu'),
    BatchNormalization(),
    Dense(128, activation = 'relu'),
    BatchNormalization(),
    Dense(10, activation = 'softmax')
])

In [None]:
model4.compile(optimizer = Adam(learning_rate = 0.0001), loss = 'categorical_crossentropy', metrics = ['acc'])
model4.summary()

In [None]:
model4.fit(train_generator,
          steps_per_epoch = 250,
          epochs = 20,
          verbose = 1,
          validation_steps = 50,
          validation_data = val_generator,
          callbacks = [es])

## Fifth Model -> Yolo v9

In [17]:
model5 = YOLO('yolov9c.pt')

Downloading https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov9c.pt to 'yolov9c.pt'...


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 49.4M/49.4M [00:10<00:00, 4.86MB/s]
