# DrowsyDrive-Alert

## **Driving Drowsiness Detection**

*Master's in Automation and Robotics - ETSII (UPM)* \
**Subject:** Computer Vision\
**Course:** 2023-24\
**Student Name (ID):**
- Ivonne Quishpe (23146)
- Gustavo Maldonado (23102)
- Jorge Guijarro (23075)
- Micaela Cabrera (23023)
- Josep Mª Barberá (17048)

**Date:** December, 6

This Notebook implements a drowsiness detection system using MediaPipe. From the live camera or already recorded videos, the most relevant facial features regarding eyes and mouth are extracted from each frame. Once these coordinates are obtained, the Eye Aspect Ratio (EAR) and Mouth Aspect Ratio (MAR) are calculated to determine whether the eyes or mouth are open or closed. Thus, once the capture speed (frames per second) is known, it is possible to determine whether the person is drowsy or not. 

#### Import Libraries

In [9]:
import cv2
import mediapipe as mp
from mediapipe.framework.formats import landmark_pb2
from mediapipe import solutions
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import numpy as np
import glob

#### Parameters

We define the face landmarks to be used in the EAR and MAR calculation. All the landmarks numbers are shown [here](https://storage.googleapis.com/mediapipe-assets/documentation/mediapipe_face_landmark_fullsize.png).


In [10]:
eye_landmarks_num = 6   # 4 or 6

if eye_landmarks_num == 4:
    left_landmarks = [362, 386, 263, 374]
    right_landmarks = [33, 158, 133, 153]
else:
    left_landmarks = [362, 384, 386, 263, 374, 381]
    right_landmarks = [33, 160, 158, 133, 153, 144]
    if eye_landmarks_num != 6:
        print("Number of landmarks per eye should be 4 or 6. Setting it to 6 by default...")
    
mouth_landmarks = [78, 81, 13, 311, 308, 402, 14, 178]

#### Save the landmarks coordinates for eyes and mouth

In [11]:
def get_face_points(face_landmarks):
    # Save the landmark coordinates for  the eyes
    right_eye_points = []
    for landmark in right_landmarks:
        right_eye_points.append([face_landmarks[landmark].x,
                            face_landmarks[landmark].y,
                            face_landmarks[landmark].z])

    left_eye_points = []
    for landmark in left_landmarks:
        left_eye_points.append([face_landmarks[landmark].x,
                            face_landmarks[landmark].y,
                            face_landmarks[landmark].z])

    # Save the landmark coordinates for  the mouth
    mouth_points = []
    for landmark in mouth_landmarks:
        mouth_points.append([face_landmarks[landmark].x,
                            face_landmarks[landmark].y,
                            face_landmarks[landmark].z])
    return right_eye_points, left_eye_points, mouth_points

#### Compute the Eye Aspect Ratio (EAR)

In [12]:

def get_EARs(l_p, r_p):
    if eye_landmarks_num == 4:
        p2_p4 = np.linalg.norm(l_p[1]-l_p[3])
        p1_p3 = np.linalg.norm(l_p[0]-l_p[2])
        left_EAR = (p2_p4)/(p1_p3)

        p2_p4 = np.linalg.norm(r_p[1]-r_p[3])
        p1_p3 = np.linalg.norm(r_p[0]-r_p[2])
        right_EAR = (p2_p4)/(p1_p3)
    else:
        p2_p6 = np.linalg.norm(l_p[1]-l_p[5])
        p3_p5 = np.linalg.norm(l_p[2]-l_p[4])
        p1_p4 = np.linalg.norm(l_p[0]-l_p[3])
        left_EAR = (p2_p6 + p3_p5)/(2*p1_p4)

        p2_p6 = np.linalg.norm(r_p[1]-r_p[5])
        p3_p5 = np.linalg.norm(r_p[2]-r_p[4])
        p1_p4 = np.linalg.norm(r_p[0]-r_p[3])
        right_EAR = (p2_p6 + p3_p5)/(2*p1_p4)

    return left_EAR, right_EAR

#### Compute the Mouth Aspect Ratio (MAR)

In [13]:
def get_MAR(m_p):
  p2_p8 = np.linalg.norm(m_p[1]-m_p[7])
  p3_p7= np.linalg.norm(m_p[2]-m_p[6])
  p4_p6 = np.linalg.norm(m_p[3]-m_p[5])
  p1_p5 = np.linalg.norm(m_p[0]-m_p[4])
  MAR = (p2_p8 + p3_p7 + p4_p6)/(2*p1_p5)
  return MAR

#### Actualize the state of the person due to the past and current EAR and MAR

In [14]:
def check_drowsiness(left_EAR, right_EAR, MAR, data):
    left_score, right_score, mouth_score, eyes_tag, yawn_tag, alert, frames_sleep, frames_awake = data
    
    ## Increase score if eyes closed, decrease otherwise
    if (left_EAR < 0.15):
        left_score += 1
    else:
        left_score -= 1
        
    if (right_EAR < 0.15):
        right_score += 1
    else: 
        right_score -= 1

    ## Increase score if mouth open, decrease otherwise
    if (MAR > 0.3):
        mouth_score += 1
    else: 
        mouth_score -= 1

    ## Scores can't be negative
    if (left_score < 0):
        left_score = 0
    if (right_score < 0):
        right_score = 0
    if (mouth_score < 0):
        mouth_score = 0

    ## Un parpadeo normal equivale a 9 frames (300 ms si se toman 30 fps).
    if (left_score > 12 and right_score > 12):
        eyes_tag = "closed"
    else:
        eyes_tag = "open"
        
    ## Si la boca está abierta más de tres segundos se considera bostezo
    if (mouth_score > 70):
        yawn_tag = True
    else:
        yawn_tag = False

    if (eyes_tag == "closed" or yawn_tag == True):
        alert += 1
        frames_sleep += 1
    else:
        alert -= 10
        if (alert < 0):
            alert = 0
        frames_awake += 1
    return [left_score, right_score, mouth_score, eyes_tag, yawn_tag, alert, frames_sleep, frames_awake]

#### Live Video Detection

In [16]:
## Define your captura number device here:

# camera = 2 # this is for selecting my IR Camera
camera = 0 # this is for selecting my normal Camera

cap = cv2.VideoCapture(camera)
# frame_rate = cap.get(cv2.CAP_PROP_FPS)
# print(frame_rate)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) + 0.5)
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) + 0.5)
size = (width, height)
fourcc = cv2.VideoWriter_fourcc(*'MJPG')
out = cv2.VideoWriter('your_video.avi', fourcc, 20.0, size)
index_list = mouth_landmarks + left_landmarks + right_landmarks

BaseOptions = mp.tasks.BaseOptions
FaceLandmarker = mp.tasks.vision.FaceLandmarker
FaceLandmarkerOptions = mp.tasks.vision.FaceLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode

options = FaceLandmarkerOptions(
    base_options=BaseOptions(model_asset_path='face_landmarker_v2_with_blendshapes.task'),
    running_mode=VisionRunningMode.IMAGE)

i = 0
prev_left_EAR, prev_right_EAR, prev_MAR = 0,0,0
data = [0, 0, 0, 0, 0, 0, 0, 0]
alert_time = 0
with FaceLandmarker.create_from_options(options) as landmarker:
    while True:
        i += 1
        ret, frame = cap.read()
        if ret == False:
            break
        height, width, _ = frame.shape
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)
        results = landmarker.detect(mp_image)

        if results.face_landmarks is not None:
            face_landmarks_list = results.face_landmarks
            for idx in range(len(face_landmarks_list)):
                face_landmarks = face_landmarks_list[idx]
                face_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
                face_landmarks_proto.landmark.extend([
                    landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in face_landmarks])
                for index in index_list:
                    x = int(face_landmarks[index].x * width)
                    y = int(face_landmarks[index].y * height)
                    cv2.circle(frame, (x, y), 2, (0, 255, 0), 1)

        try:
            right_eye_points, left_eye_points, mouth_points = get_face_points(results.face_landmarks[0])
        except:
            right_eye_points = right_eye_points
            left_eye_points = left_eye_points 
            mouth_points = mouth_points
            
        left_EAR, right_EAR = get_EARs(np.array(left_eye_points), np.array(right_eye_points))
        MAR = get_MAR(np.array(mouth_points))

        data = check_drowsiness(left_EAR, right_EAR, MAR, data)
        _, _, _, eyes_tag, yawn_tag, alert, frames_sleep, frames_awake = data
        
        if i > 7 and i % 7 == 0:
            if (eyes_tag == "open"):
                eyes_color = (0, 255, 255)
            else:
                eyes_color = (0, 0, 255)
            if (yawn_tag == True):
                mouth_color = (0, 0, 255)
            else:
                mouth_color = (0, 255, 255)
            cv2.putText(frame,f"Left EAR: {left_EAR:.2f}",(30, 30),cv2.FONT_HERSHEY_DUPLEX,0.7,eyes_color,1,)
            cv2.putText(frame,f"Right EAR: {right_EAR:.2f}",(30, 60),cv2.FONT_HERSHEY_DUPLEX,0.7,eyes_color,1,)
            cv2.putText(frame,f"MAR: {MAR:.2f}",(30, 90),cv2.FONT_HERSHEY_DUPLEX,0.7,mouth_color,1,)
            prev_left_EAR, prev_right_EAR, prev_MAR = left_EAR, right_EAR, MAR
        else:
            if (eyes_tag == "open"):
                eyes_color = (0, 255, 255)
            else:
                eyes_color = (0, 0, 255)
            if (yawn_tag == True):
                mouth_color = (0, 0, 255)
            else:
                mouth_color = (0, 255, 255)
            cv2.putText(frame,f"Left EAR: {prev_left_EAR:.2f}",(30, 30),cv2.FONT_HERSHEY_DUPLEX,0.7,eyes_color,1,)
            cv2.putText(frame,f"Right EAR: {prev_right_EAR:.2f}",(30, 60),cv2.FONT_HERSHEY_DUPLEX,0.7,eyes_color,1,)
            cv2.putText(frame,f"MAR: {prev_MAR:.2f}",(30, 90),cv2.FONT_HERSHEY_DUPLEX,0.7,mouth_color,1,)

        if (alert > 0):
            alert_time += 1
            cv2.rectangle(frame, (0,0), (frame.shape[1]-1, frame.shape[0]-1), (0,0,255), 20)
        cv2.imshow("Frame", frame)
        out.write(frame)
        k = cv2.waitKey(1) & 0xFF
        if k == 27:
            break

cap.release()
cv2.destroyAllWindows()

I0000 00:00:1704282615.071553   11475 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1704282615.075422   11617 gl_context.cc:344] GL version: 3.2 (OpenGL ES 3.2 Mesa 21.2.6), renderer: Mesa Intel(R) UHD Graphics 620 (KBL GT2)
W0000 00:00:1704282615.076589   11475 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.


#### Video processing

In [114]:
def face_mesh(path):
    cap = cv2.VideoCapture(path)

    index_list = mouth_landmarks + left_landmarks + right_landmarks

    BaseOptions = mp.tasks.BaseOptions
    FaceLandmarker = mp.tasks.vision.FaceLandmarker
    FaceLandmarkerOptions = mp.tasks.vision.FaceLandmarkerOptions
    VisionRunningMode = mp.tasks.vision.RunningMode
    options = FaceLandmarkerOptions(
        base_options=BaseOptions(model_asset_path='face_landmarker_v2_with_blendshapes.task'),
        running_mode=VisionRunningMode.IMAGE)

    i = 0
    prev_left_EAR, prev_right_EAR, prev_MAR = 0,0,0
    data = [0, 0, 0, 0, 0, 0, 0, 0]
    alert_time = 0
    right_eye_points = [[0.5324623584747314, 0.5747805833816528, 0.004818260669708252], 
                        [0.5445109009742737, 0.5637452006340027, -0.0011025648564100266], 
                        [0.5702856183052063, 0.5580060482025146, -0.0021580627653747797], 
                        [0.5986989736557007, 0.5627603530883789, 0.023030918091535568], 
                        [0.5674318075180054, 0.5806047916412354, 0.004358564969152212], 
                        [0.5431403517723083, 0.5771798491477966, 0.00291746249422431]]
    
    left_eye_points =  [[0.37691813707351685, 0.5854962468147278, 0.002876535290852189], 
                        [0.3939114511013031, 0.5759596824645996, -0.014068366028368473], 
                        [0.4192018210887909, 0.5710769891738892, -0.016808023676276207], 
                        [0.4473620355129242, 0.5828512907028198, -0.004112580791115761], 
                        [0.42484530806541443, 0.592927098274231, -0.010263157077133656], 
                        [0.3993476927280426, 0.597295880317688, -0.0077733625657856464]]
    mouth_points = [[0.46936848759651184, 0.8040176630020142, 0.005104257259517908], 
                    [0.49690064787864685, 0.7995496392250061, -0.012885571457445621], 
                    [0.5246973037719727, 0.7988075017929077, -0.0167536623775959], 
                    [0.5486900210380554, 0.7920270562171936, -0.006666502915322781], 
                    [0.572444498538971, 0.7885196208953857, 0.017038118094205856], 
                    [0.5485737919807434, 0.7921735644340515, -0.0028128675185143948], 
                    [0.5245987176895142, 0.7992886304855347, -0.012132205069065094], 
                    [0.4968000650405884, 0.8000173568725586, -0.009343835525214672]]

    with FaceLandmarker.create_from_options(options) as landmarker:
        while True:
            i += 1
            ret, frame = cap.read()
            if ret == False:
                break
            height, width, _ = frame.shape
            mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)
            results = landmarker.detect(mp_image)

            if results.face_landmarks is not None:
                face_landmarks_list = results.face_landmarks
                for idx in range(len(face_landmarks_list)):
                    face_landmarks = face_landmarks_list[idx]
                    face_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
                    face_landmarks_proto.landmark.extend([
                        landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in face_landmarks])
                    for index in index_list:
                        x = int(face_landmarks[index].x * width)
                        y = int(face_landmarks[index].y * height)
                        cv2.circle(frame, (x, y), 2, (0, 255, 0), 1)

            try:
                right_eye_points, left_eye_points, mouth_points = get_face_points(results.face_landmarks[0])
            except:
                right_eye_points = right_eye_points
                left_eye_points = left_eye_points
                mouth_points = mouth_points

            left_EAR, right_EAR = get_EARs(np.array(left_eye_points), np.array(right_eye_points))
            MAR = get_MAR(np.array(mouth_points))
            # print(left_EAR, right_EAR, MAR)

            data = check_drowsiness(left_EAR, right_EAR, MAR, data)
            _, _, _, eyes_tag, yawn_tag, alert, frames_sleep, frames_awake = data

            if (alert > 0):
                alert_time += 1
            # cv2_imshow(frame)

    cap.release()
    cv2.destroyAllWindows()

    return alert_time

#### Compute the Drowsyness for a list of videos

In [None]:
results = []

files = glob.glob("/home/josep/Desktop/*.mp4")
for my_file in files:
    result = face_mesh(my_file)
    results.append(result)
    print(f"Processing file: {my_file} with results= {result}\n")