### Mediapipe & OpenCV project : Drowsiness Detection

In [16]:
import cv2
import time
import numpy as np
import mediapipe as mp

In [17]:
# mediapipe를 이용해 양쪽 눈에 대한 landmark(index) 포인트를 가져옴

mp_facemesh = mp.solutions.face_mesh
mp_drawing  = mp.solutions.drawing_utils
denormalize_coordinates = mp_drawing._normalized_to_pixel_coordinates

In [18]:
# FaceMash 설정

def get_facemesh(
                max_num_faces=1,                # 감지할 얼굴 수
                refine_landmarks=False,         # 눈 외의 landmark는 세분화시키지 않음
                min_detection_confidence=0.5,   # 얼굴 인식에 성공한 것으로 간주되는 최소 신뢰도
                min_tracking_confidence= 0.5    # 성공적으로 추적한 것으로 간주되는 최소 신뢰도
):
    face_mesh = mp_facemesh.FaceMesh(
        max_num_faces=max_num_faces,
        refine_landmarks=refine_landmarks,
        min_detection_confidence=min_detection_confidence,
        min_tracking_confidence=min_tracking_confidence
    )

    return face_mesh
# 감지된 landmark points 목록
# face_mesh.multi_face_landmarks

In [19]:
def distance(point_1, point_2):
    # L2 norm 계산 (두 벡터 사이의 거리 계산)
    dist = sum([(i - j) ** 2 for i, j in zip(point_1, point_2)]) ** 0.5
    return dist

In [20]:
# EAR 공식 적용

def get_ear(landmarks, refer_idxs, frame_width, frame_height):
    # landmarks : 검출된 lanmarks list
    # refer_idxs : 검출을 위해 지정한 landmarks list [index]

    try:
        # 수평 거리 계산
        coords_points = []
        for i in refer_idxs:
            lm = landmarks[i]
            coord = denormalize_coordinates(lm.x, lm.y, frame_width, frame_height)
            coords_points.append(coord)
 
        # EAR 공식에 맞춰 P2-P6, P3-P5, P1-P4를 연산함
        P2_P6 = distance(coords_points[1], coords_points[5])
        P3_P5 = distance(coords_points[2], coords_points[4])
        P1_P4 = distance(coords_points[0], coords_points[3])
 
        ear = (P2_P6 + P3_P5) / (2.0 * P1_P4)
 
    except:
        ear = 0.0
        coords_points = None
 
    return ear, coords_points

In [21]:
def calculate_avg_ear(landmarks, left_eye_idxs, right_eye_idxs, image_w, image_h):
    
    # 왼쪽 눈의 EAR 값과 landmarks의 좌표 값을 반환함
    left_ear, left_lm_coordinates = get_ear(
                                      landmarks, 
                                      left_eye_idxs, 
                                      image_w, 
                                      image_h
                                    )
    
    # 오른쪽 눈의 EAR 값과 landmarks의 좌표 값을 반환함
    right_ear, right_lm_coordinates = get_ear(
                                      landmarks, 
                                      right_eye_idxs, 
                                      image_w, 
                                      image_h
                                    )
    # 최종 EAR 값을 얻기 위해 왼쪽 오른쪽의 EAR 값 평균을 계산함
    Avg_EAR = (left_ear + right_ear) / 2.0
 
    return Avg_EAR, (left_lm_coordinates, right_lm_coordinates)

In [22]:
def plot_eye_landmarks(frame, left_lm_coordinates, 
                       right_lm_coordinates, color
                       ):
    for lm_coordinates in [left_lm_coordinates, right_lm_coordinates]:
        if lm_coordinates:
            for coord in lm_coordinates:
                cv2.circle(frame, coord, 2, color, -1)
 
    frame = cv2.flip(frame, 1)
    return frame

In [23]:
def plot_text(image, text, origin, 
              color, font=cv2.FONT_HERSHEY_SIMPLEX, 
              fntScale=0.8, thickness=2
              ):
    image = cv2.putText(image, text, origin, font, fntScale, color, thickness)
    return image

In [24]:
class Video:
    def __init__(self):
        self.red = (0,0,255)
        self.green = (0,255,0)
        self.white = (255,255,255)

        self.eye_idxs = {
            "left": [362, 385, 387, 263, 373, 380],
            "right": [33, 160, 158, 133, 153, 144],
        }

        self.facemesh_model = get_facemesh()

        self.state_tracker = {
            "start_time": time.perf_counter(),
            "DROWSY_TIME": 0.0,  # Holds time passed with EAR < EAR_THRESH
            "COLOR": self.green,
            "play_alarm": False,
        }
        
        self.EAR_txt_pos = (10, 30)

In [25]:
def process(self, frame: np.array, thresholds : dict):

        frame.flags.writeable = False
        frame_h, frame_w, _ = frame.shape
        DROWSY_TIME_txt_pos = (10, int(frame_h // 2 * 1.7))
        ALM_txt_pos = (10, int(frame_h // 2 * 1.85))
 
        results = self.facemesh_model.process(frame)
 
        if results.multi_face_landmarks:
            landmarks = results.multi_face_landmarks[0].landmark
            EAR, coordinates = calculate_avg_ear(landmarks,
                                                 self.eye_idxs["left"], 
                                                 self.eye_idxs["right"], 
                                                 frame_w, 
                                                 frame_h
                                                 )
            frame = plot_eye_landmarks(frame, 
                                       coordinates[0], 
                                       coordinates[1],
                                       self.state_tracker["COLOR"]
                                       )
 
            if EAR < thresholds["EAR_THRESH"]:
 
                end_time = time.perf_counter()
 
                self.state_tracker["DROWSY_TIME"] += end_time - self.state_tracker["start_time"]
                self.state_tracker["start_time"] = end_time
                self.state_tracker["COLOR"] = self.red
 
                if self.state_tracker["DROWSY_TIME"] >= thresholds["WAIT_TIME"]:
                    self.state_tracker["play_alarm"] = True
                    plot_text(frame, "WAKE UP! WAKE UP", 
                              ALM_txt_pos, self.state_tracker["COLOR"])
 
            else:
                self.state_tracker["start_time"] = time.perf_counter()
                self.state_tracker["DROWSY_TIME"] = 0.0
                self.state_tracker["COLOR"] = self.green
                self.state_tracker["play_alarm"] = False
 
            EAR_txt = f"EAR: {round(EAR, 2)}"
            DROWSY_TIME_txt = f"DROWSY: {round(self.state_tracker['DROWSY_TIME'], 3)} Secs"
            plot_text(frame, EAR_txt, 
                      self.EAR_txt_pos, self.state_tracker["COLOR"])
            plot_text(frame, DROWSY_TIME_txt, 
                      DROWSY_TIME_txt_pos, self.state_tracker["COLOR"])
 
        else:
            self.state_tracker["start_time"] = time.perf_counter()
            self.state_tracker["DROWSY_TIME"] = 0.0
            self.state_tracker["COLOR"] = self.green
            self.state_tracker["play_alarm"] = False
 
            frame = cv2.flip(frame, 1)
 
        return frame, self.state_tracker["play_alarm"]

In [27]:
webcam = cv2.VideoCapture(0)
webcam.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
webcam.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
fps=webcam.get(4)

thresholds = {
    "EAR_THRESH": 0.4,
    "WAIT_TIME": 5.0,
}

while webcam.isOpened():
    status, frame = webcam.read()

    if not status:
      print("Ignoring empty camera frame.")
      continue

    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
    video_handler = Video()
    frame, play_alarm = video_handler.process(frame, thresholds)


webcam.release()
cv2.destroyAllWindows()

AttributeError: 'Video' object has no attribute 'process'