In [2]:
import keras
from keras.layers import Dense
from keras.models import Model
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.applications.resnet50 import preprocess_input, decode_predictions

import numpy as np
import cv2
from PIL import Image

import tensorflow as tf

In [7]:
dataset_dir = "dataset_B_Eye_Images"
batch_size=32
train_dir,validation_dir=(dataset_dir + "/train", dataset_dir + "/validation")

In [5]:
# ImageDataGenerator from Keras
datagen = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest',
        #data_format="channels_last",
        preprocessing_function=preprocess_input,
)

## Loading the dataset
The data set is a combination multiple datasets found online. They are listed below:

1. http://mrl.cs.vsb.cz/eyedataset
2. R. Ghoddoosian, M. Galib and V. Athitsos, "A Realistic Dataset and Baseline
Temporal Model for Early Drowsiness Detection," _in Proceedings of the IEEE
Conference on Computer Vision and Pattern Recognition Workshops_ , 2019.
   * This dataset was a set of videos that had recordings of people. We wrote a script ```video-eye-generator.py``` that extracted the eyes from the videos and manually split them into closed and open
3. F.Song, X.Tan, X.Liu and S.Chen, Eyes Closeness Detection from Still Images with Multi-scale Histograms of Principal Oriented Gradients, Pattern Recognition, 2014.

In [4]:
# generator for train data
train_generator = datagen.flow_from_directory(
        train_dir,  #target directory
        target_size=(224, 224),  #all images will be resized to (224,224)
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=True
)

Found 34931 images belonging to 2 classes.


In [5]:
# generator for validation data
validation_generator = datagen.flow_from_directory(
        validation_dir, #target directory
        target_size=(224, 224),  #all images will be resized to (224,224)
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=True
)

Found 970 images belonging to 2 classes.


In [6]:
# Resnet model pretrained on "imagenet" dataset
resnet50=keras.applications.resnet50.ResNet50(include_top=True,
                                     weights="imagenet", 
                                     input_tensor=None, 
                                     input_shape=None,
                                     pooling=None, 
                                     )

The last layer of the model will be the softmax layer with two outputs to categorize our images

In [7]:
out = Dense(2, activation="softmax")(resnet50.layers[-2].output) # new output layer
classifier_model = Model(resnet50.input, out, name="eye_classifier") 
opt = keras.optimizers.Adam(learning_rate=0.01)
classifier_model.compile(loss="categorical_crossentropy", optimizer=opt,metrics=['accuracy'])

In [8]:
classifier_model.summary()

[0][0]           
__________________________________________________________________________________________________
conv4_block1_1_bn (BatchNormali (None, 14, 14, 256)  1024        conv4_block1_1_conv[0][0]        
__________________________________________________________________________________________________
conv4_block1_1_relu (Activation (None, 14, 14, 256)  0           conv4_block1_1_bn[0][0]          
__________________________________________________________________________________________________
conv4_block1_2_conv (Conv2D)    (None, 14, 14, 256)  590080      conv4_block1_1_relu[0][0]        
__________________________________________________________________________________________________
conv4_block1_2_bn (BatchNormali (None, 14, 14, 256)  1024        conv4_block1_2_conv[0][0]        
__________________________________________________________________________________________________
conv4_block1_2_relu (Activation (None, 14, 14, 256)  0           conv4_block1_2_bn[0][0]   

In [9]:
classifier_model.fit_generator(train_generator,
                          steps_per_epoch=len(train_generator),
                          epochs=25,
                          validation_data=validation_generator,
                          validation_steps=len(validation_generator),
)

Instructions for updating:
Please use Model.fit, which supports generators.
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<tensorflow.python.keras.callbacks.History at 0x2ba527b0100>

In [10]:
# Save the model
classifier_model.save("models/eye_classifier_20201219.h5")

## Evaluation

In [3]:
classifier_model = keras.models.load_model("models/eye_classifier_20201219.h5")

In [8]:
test_dir = "dataset_B_Eye_Images/test"
# generator for train data
test_generator = datagen.flow_from_directory(
        test_dir,  #target directory
        target_size=(224, 224),  #all images will be resized to (224,224)
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=True
)

Found 11746 images belonging to 2 classes.


In [11]:
score = classifier_model.evaluate_generator(generator=test_generator,
                                     steps=test_generator.samples//batch_size, 
                                     max_queue_size=10,
                                     workers=1,
                                     use_multiprocessing=False,
                                     verbose=0)
print("loss: %.3f - acc: %.3f" % (score[0], score[1]))

Instructions for updating:
Please use Model.evaluate, which supports generators.
loss: 0.179 - acc: 0.946


Testing this model on some random images

In [12]:
classes={1:"open",0:"close"}

In [14]:
img = cv2.imread("dataset/eyes_closed/02-0.mov_frame199.jpg")
preprocessed_img = cv2.resize(preprocess_input(img.astype(np.float32)),(224,224))
new_preprocessed_img = np.expand_dims(preprocessed_img, axis=0)

In [16]:
preds=classifier_model.predict(new_preprocessed_img)

In [17]:
labels=[classes[p] for p in np.argmax(preds, axis=1)] #close/open label for each frame
print(preds)
print(labels)

[[9.9959856e-01 4.0148813e-04]]
['close']


In [18]:
img = cv2.imread("dataset/eyes_closed/02-5.MOV_frame41.jpg")
preprocessed_img = cv2.resize(preprocess_input(img.astype(np.float32)),(224,224))
new_preprocessed_img = np.expand_dims(preprocessed_img, axis=0)

In [19]:
preds=classifier_model.predict(new_preprocessed_img)

In [20]:
labels=[classes[p] for p in np.argmax(preds, axis=1)] #close/open label for each frame
print(preds)
print(labels)

[[0.9979036  0.00209646]]
['close']


In [21]:
img = cv2.imread("saved_frames/leye/01-0.mov_frame157.jpg")
preprocessed_img = cv2.resize(preprocess_input(img.astype(np.float32)),(224,224))
new_preprocessed_img = np.expand_dims(preprocessed_img, axis=0)

In [22]:
preds=classifier_model.predict(new_preprocessed_img)

In [23]:
labels=[classes[p] for p in np.argmax(preds, axis=1)] #close/open label for each frame
print(preds)
print(labels)

[[1.4268703e-16 1.0000000e+00]]
['open']


In [24]:
img = cv2.imread("saved_frames_webcam/leye/01-0.mov_frame94.jpg")
preprocessed_img = cv2.resize(preprocess_input(img.astype(np.float32)),(224,224))
new_preprocessed_img = np.expand_dims(preprocessed_img, axis=0)

In [25]:
preds=classifier_model.predict(new_preprocessed_img)

In [26]:
labels=[classes[p] for p in np.argmax(preds, axis=1)] #close/open label for each frame
print(preds)
print(labels)

[[0.70463145 0.29536855]]
['close']


## Drowsiness Detection
Run the ```drowsiness_detection.py``` script, which will make use of the model that was trained here, to read the webcam input, frame by frame, detects each eye separately, and checks if the eye is open or close.

If both eyes are closed at any given time, a point is added to a score, and a point is removed from the score otherwise. So, if the eyes are closed for a long period of time consequtively, the score value increases, and when it increases to a value above a set threshold, an alarm will sound, along with a graphical way of notifying the user that he is drowsy and will wake them up.

## Conclusion

After running the above script, we find that there are some flaws. Particularly with eye extraction. For the purposes of detecting the eye, we used the inbuilt Haar Cascade function. In the future, to improve the feature extraction better, we can probably implement a CNN.