## Distracted Driver Recognition - Deep learning

This is the code for our Machine Learning Report



* [Part 1: Importing the libraries](#section-one)
* [Part 2: Preprocessing](#section-two)
* [Part 3: EDA](#section-three)
    - [Part 3.1: Statistics](#threeone)
    - [Part 3.2: Visualization](#threetwo)
* [Part 4: ANN](#section-four)
    - [Part 4.1: Creating the Model](#fourone)
    - [Part 4.2: Training the Model](#train-model)
    - [Part 4.3: Testing the Model](#test-model)
    - [Part 4.4: Experiments](#fourfour)
* [Part 5: RNN](#section-five)
    - [Part 5.1: Prepare data for RNN](#fiveone)
    - [Part 5.2: Creating the Model](#fivetwo)
    - [Part 5.3: Training the Model](#fivethree)
    - [Part 5.4: Testing the Model](#fivefour)

<a id="section-one"></a>
# Part 1: Importing the libraries

In [71]:
#Files
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import cv2
import glob

#DATA
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import one_hot
from keras.utils.np_utils import to_categorical
from sklearn.model_selection import train_test_split
import matplotlib.image as mpimg

#CNN
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.models import Sequential
from keras.layers import Convolution2D,MaxPooling2D,Flatten,Dense
from tensorflow.keras.optimizers import Adam
from keras.losses import CategoricalCrossentropy

#VIS
from keras.utils.vis_utils import plot_model

<a id="section-two"></a>
# Part 2: Preprocessing

In [72]:
def _prepareData(path): 
    '''
    params: path(string)
    return: [list list] of images in dataset and the list of labels
    '''
    labelsList = []
    listOfimg = []
    for directory in sorted(glob.glob(os.path.join(path, '*')), key = lambda k: k.split("/")[-1]):
            for img in glob.glob(os.path.join(directory,'*.jpg')):
                imgcv = cv2.imread(img)
                imgcv_r = cv2.resize(imgcv,(128,128)) #Resize to 128,128
                listOfimg.append(imgcv_r)
                labelsList.append(int(directory.split("/")[-1].replace('c','')))
    
    X_Train, X_Test, Y_Train, Y_Test =  train_test_split(listOfimg,labelsList, test_size = 0.2)
    Y_Train = tf.keras.utils.to_categorical(Y_Train, num_classes=10)
    Y_Test = tf.keras.utils.to_categorical(Y_Test, num_classes=10)

    return np.array(X_Train), np.array(X_Test), Y_Train, Y_Test

In [73]:
'''#Paths
pathTrainImages = "/kaggle/input/state-farm-distracted-driver-detection/img/train/"
pathPropagateImages =  "/kaggle/input/state-farm-distracted-driver-detection/img/test/"

#List of Images for Train and Test
X_Train, X_Test, Y_Train, Y_Test = _prepareData(pathTrainImages)

print("Size X_Train: {}, Size Y_Train: {}".format(len(X_Train),len(Y_Train)))
print("Size X_Test: {}, Size Y_Test: {}".format(len(X_Test),len(Y_Test)))
'''
#Paths
pathTrain_Images = "/kaggle/input/state-farm-distracted-driver-detection/imgs/train/"
pathPropagate_Images =  "/kaggle/input/state-farm-distracted-driver-detection/imgs/test/"

#List of Images for Train and Test
X_Train, X_Test, Y_Train, Y_Test = _prepareData(pathTrain_Images)

print("Size X_Train: {}, Size Y_Train: {}".format(len(X_Train),len(Y_Train)))
print("Size X_Test: {}, Size Y_Test: {}".format(len(X_Test),len(Y_Test)))

<a id="section-three"></a>
# Part 3: EDA

<a id="threeone"></a>
# Part 3.1: Statistics

In [74]:
data_file = pd.read_csv("/kaggle/input/state-farm-distracted-driver-detection/driver_imgs_list.csv")
data_classes = data_file.loc[:,['classname','img']].groupby(by='classname').count().reset_index()

data_x = list(pd.unique(data_file['classname']))
data_y =list(data_classes['img'])

# Parámetros de ploteo (Se va a generar un plot diferente para cada Clase)
plt.rcParams.update({'font.size': 22})
plt.figure(figsize=(30,10))
plt.bar(data_x, data_y, color=['cornflowerblue', 'lightblue', 'steelblue'])  
plt.ylabel('Count classes')
plt.title('Classes')
plt.xticks(rotation=45)

<a id="threetwo"></a>
# Part 3.2: Visualization

In [75]:
import plotly.express as px

px.histogram(data_file, x="classname", color="classname", title="Number of images by categories ")

In [76]:
# Find the frequency of images per driver
driversID = pd.DataFrame((data_file['subject'].value_counts()).reset_index())
driversID.columns = ['driver_id', 'Counts']
px.histogram(driversID, x="driver_id",y="Counts" ,color="driver_id", title="Number of images by subjects ")

In [77]:
categories = {'c0': 'Safe driving', 
                'c1': 'Texting - right', 
                'c2': 'Talking on the phone - right', 
                'c3': 'Texting - left', 
                'c4': 'Talking on the phone - left', 
                'c5': 'Operating the radio', 
                'c6': 'Drinking', 
                'c7': 'Reaching behind', 
                'c8': 'Hair and makeup', 
                'c9': 'Talking to passenger'}


plt.figure(figsize = (12, 20))
image_count = 1
BASE_URL = '../input/state-farm-distracted-driver-detection/imgs/train/'
for directory in os.listdir(BASE_URL):
    if directory[0] != '.':
        for i, file in enumerate(os.listdir(BASE_URL + directory)):
            if i == 1:
                break
            else:
                fig = plt.subplot(5, 2, image_count)
                image_count += 1
                image = mpimg.imread(BASE_URL + directory + '/' + file)
                plt.imshow(image)
                plt.title(categories[directory])

<a id="section-four"></a>
# Part 4: ANN

<a id="fourone"></a>
## Part 4.1: Creating the Model

In [78]:
model = keras.models.Sequential()

model.add(keras.layers.InputLayer(
    input_shape=(128, 128, 3)
))


model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(units=1024, activation='relu',name = 'fc_0'))
model.add(keras.layers.Dropout(rate=0.2))
model.add(keras.layers.Dense(units=1024, activation='relu',name = 'fc_1'))
model.add(keras.layers.Dropout(rate=0.2))
model.add(keras.layers.Dense(units=512, activation='relu',name = 'fc_2'))
model.add(keras.layers.Dense(units=10,activation='softmax',name = 'fc_3'))
model.save('/tmp/model')
#model.compute_output_shape(input_shape=(256,8,8,1))
print("done")

In [79]:
tf.random.set_seed(1)
#model.build(input_shape=(None,128,128,3))
model.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.CategoricalCrossentropy(from_logits = False), metrics = ['accuracy'])
print(model.summary())

<a id="train-model"></a>
## Part 4.2: Training the Model

In [80]:
#history = model.fit(x = X_Train, y=Y_Train,epochs = 10, batch_size = 500, verbose = 1,validation_split=0.2)

<a id="test-model"></a>
## Part 4.3: Evaluating the Model

In [81]:
test_loss, test_acc = model.evaluate(X_Test, Y_Test, verbose = 1)

In [82]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])

plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
#plt.ylim([0.9,1])
plt.legend(['train','test'], loc='upper left')
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])

plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
#plt.ylim([0,.4])
plt.legend(['train','test'], loc='upper left')
plt.show()

<a id="fourfour"></a>
## Part 4.4: Experiments

In [83]:
tf.random.set_seed(1)
#model.build(input_shape=(None,128,128,3))
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss=tf.keras.losses.CategoricalCrossentropy(from_logits = False), metrics = ['accuracy'])
print(model.summary())

history = model.fit(x = X_Train, y=Y_Train,epochs = 10, batch_size = 500, verbose = 1,validation_split=0.2)
test_loss, test_acc = model.evaluate(X_Test, Y_Test, verbose = 1)



In [84]:
test_loss, test_acc = model.evaluate(X_Test, Y_Test, verbose = 1)

In [85]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])

plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
#plt.ylim([0.9,1])
plt.legend(['train','test'], loc='upper left')
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])

plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
#plt.ylim([0,.4])
plt.legend(['train','test'], loc='upper left')
plt.show()

<a id="section-five"></a>
# Part 5: RNN

<a id="fiveone"></a>
## Part 5.1: Prepare data for RNN


In [86]:
print(len(X_Train))
print(X_Train[202].shape)
print(X_Train.shape)
save = X_Train
z_train = X_Train
z_new = np.squeeze((tf.image.rgb_to_grayscale(z_train)))
print(z_new.shape)
z_test  =  np.squeeze((tf.image.rgb_to_grayscale(X_Test)))
im = X_Train[202]

RGB_im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
plt.imshow(RGB_im)
plt.show()
print("Class: {}".format(Y_Train[202]))

<a id="fivetwo"></a>
## Part 5.2: Creating the Model

In [87]:
model = keras.models.Sequential()
model.add(keras.layers.SimpleRNN(128, input_shape=(z_new.shape[1:]), activation='relu', return_sequences=True))
model.add(keras.layers.SimpleRNN(128, activation='relu',  return_sequences=True))
model.add(keras.layers.SimpleRNN(128, activation='relu'))

model.add(keras.layers.Dense(units=10,activation='softmax',name = 'fc_3'))

model.save('/tmp/model')

In [88]:
tf.random.set_seed(1)
#model.build(input_shape=(None,128,128,3))
model.compile(optimizer=tf.keras.optimizers.Adam(lr=1e-3, decay=1e-5), loss=tf.keras.losses.CategoricalCrossentropy(from_logits = False), metrics = ['accuracy'])
print(model.summary())

<a id="fivethree"></a>
## Part 5.3: Training the Model

In [None]:
history = model.fit(x = z_new, y=Y_Train,epochs = 30, batch_size = 1000 , verbose = 1,validation_split=0.2)

<a id="fivefour"></a>
## Part 5.4: Testing the Model

In [None]:
test_loss, test_acc = model.evaluate(z_test, Y_Test, verbose = 1)

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])

plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
#plt.ylim([0.9,1])
plt.legend(['train','test'], loc='upper left')
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])

plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
#plt.ylim([0,.4])
plt.legend(['train','test'], loc='upper left')
plt.show()