In [7]:
import dlib
import cv2
import numpy as np
from imutils import face_utils
from EAR import eye_aspect_ratio
from MAR import mouth_aspect_ratio





In [None]:

detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('./dlib_shape_predictor/shape_predictor_68_face_landmarks.dat')

EYE_AR_THRESH = 0.25
MOUTH_AR_THRESH = 0.79
HEADPOSE_THRESH = 25   

(lStart, lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
(rStart, rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"]
(mStart, mEnd) = (49, 68)


In [9]:

def predict_drowsiness(frame):
    """
    ËæìÂÖ•: BGR ÂõæÂÉè (cv2.imreadËØªÂèñÁöÑ)
    ËæìÂá∫: "Drowsy" Êàñ "Non Drowsy"
    """

    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    rects = detector(gray, 0)

    if len(rects) == 0:
        return "Non Drowsy"   

    rect = rects[0]
    shape = predictor(gray, rect)
    shape = face_utils.shape_to_np(shape)

    # EAR
    leftEye = shape[lStart:lEnd]
    rightEye = shape[rStart:rEnd]
    ear = (eye_aspect_ratio(leftEye) + eye_aspect_ratio(rightEye)) / 2.0

    # MAR
    mouth = shape[mStart:mEnd]
    mar = mouth_aspect_ratio(mouth)

    size = gray.shape

    if ear < EYE_AR_THRESH:
        return "Drowsy"
    if mar > MOUTH_AR_THRESH:
        return "Drowsy"

    return "Non Drowsy"

In [None]:
import os
import cv2
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, f1_score
from tqdm import tqdm

test_dir = "../data/splitted_Data/test"
classes = ["Non Drowsy", "Drowsy"]   

y_true = []
y_pred = []

for cls in classes:
    folder = os.path.join(test_dir, cls)
    label = cls  

    for img_name in tqdm(os.listdir(folder), desc=f"Processing {cls}"):
        if not img_name.endswith(".png"):
            continue

        img_path = os.path.join(folder, img_name)
        frame = cv2.imread(img_path)

        pred = predict_drowsiness(frame)

        y_true.append(label)
        y_pred.append(pred)

 
label_map = {"Non Drowsy": 0, "Drowsy": 1}
y_true = [label_map[l] for l in y_true]
y_pred = [label_map[p] for p in y_pred]

 
print("\n‚úî Accuracy:", accuracy_score(y_true, y_pred))
print("‚úî F1 Score:", f1_score(y_true, y_pred, average="binary"))

print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=["Non Drowsy", "Drowsy"]))

cm = confusion_matrix(y_true, y_pred)
print("\nConfusion Matrix:\n", cm)


Processing Non Drowsy: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 973/973 [00:05<00:00, 191.54it/s]
Processing Drowsy: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1118/1118 [00:05<00:00, 205.42it/s]


‚úî Accuracy: 0.49115255858440937
‚úî F1 Score: 0.11333333333333333

Classification Report:
              precision    recall  f1-score   support

  Non Drowsy       0.48      0.99      0.64       973
      Drowsy       0.83      0.06      0.11      1118

    accuracy                           0.49      2091
   macro avg       0.65      0.52      0.38      2091
weighted avg       0.67      0.49      0.36      2091


Confusion Matrix:
 [[ 959   14]
 [1050   68]]





Processing Non Drowsy: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 973/973 [00:05<00:00, 192.49it/s]
Processing Drowsy: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1118/1118 [00:05<00:00, 195.55it/s]


KeyError: 'None Drowsy'

In [42]:
from scipy.spatial import distance as dist
from imutils.video import VideoStream
from imutils import face_utils
import argparse
import imutils
import time
import dlib
import math
import cv2
import numpy as np
from EAR import eye_aspect_ratio
from MAR import mouth_aspect_ratio
def process_video_withDetails(input_video_path, output_video_path="output.mp4"):
    print("[INFO] loading facial landmark predictor...")
    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor(
        './dlib_shape_predictor/shape_predictor_68_face_landmarks.dat'
    )

    cap = cv2.VideoCapture(input_video_path)

    if not cap.isOpened():
        print("‚ùå Failed to open input video!")
        return

    # video properties
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    print(f"[INFO] Video: {width}x{height}, {fps:.2f} FPS")

    fourcc = cv2.VideoWriter_fourcc(*'avc1')
    writer = cv2.VideoWriter("output.avi", fourcc, fps, (width, height))

    # EAR / MAR related
    EYE_AR_THRESH = 0.25
    MOUTH_AR_THRESH = 0.79
    EYE_AR_CONSEC_FRAMES = 3
    COUNTER = 0

    (lStart, lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
    (rStart, rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"]
    (mStart, mEnd) = (49, 68)

    # head pose ref points (init won't be used but updated each frame)
    image_points = np.zeros((6, 2), dtype="double")

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame = imutils.resize(frame, width=1024)
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        rects = detector(gray, 0)

        if len(rects) > 0:
            cv2.putText(frame, f"{len(rects)} face(s) found", (10, 50),
                        cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0,0,255), 2)

        for rect in rects:
            (bX, bY, bW, bH) = face_utils.rect_to_bb(rect)
            cv2.rectangle(frame, (bX, bY), (bX+bW,bY+bH),(0,255,0),1)

            shape = predictor(gray, rect)
            shape = face_utils.shape_to_np(shape)

            # EYE
            leftEye = shape[lStart:lEnd]
            rightEye = shape[rStart:rEnd]
            ear = (eye_aspect_ratio(leftEye) + eye_aspect_ratio(rightEye)) / 2.0
            leftHull = cv2.convexHull(leftEye)
            rightHull = cv2.convexHull(rightEye)
            cv2.drawContours(frame,[leftHull],-1,(0,255,0),1)
            cv2.drawContours(frame,[rightHull],-1,(0,255,0),1)

            if ear < EYE_AR_THRESH:
                COUNTER += 1
                if COUNTER >= EYE_AR_CONSEC_FRAMES:
                    STATUS = "Drowsy"
                    cv2.putText(frame, "Eyes Closed!", (300,100),
                                cv2.FONT_HERSHEY_SIMPLEX, 1.5,(0,0,255),2)
            else:
                STATUS = "Non Drowsy"
                COUNTER = 0

            # MOUTH
            mouth = shape[mStart:mEnd]
            mar = mouth_aspect_ratio(mouth)
            mouthHull = cv2.convexHull(mouth)
            cv2.drawContours(frame,[mouthHull],-1,(0,255,0),1)
            cv2.putText(frame, f"EAR: {ear:.2f}", (650,50),
                        cv2.FONT_HERSHEY_SIMPLEX,1.5,(0,0,255),2)
            cv2.putText(frame, f"MAR: {mar:.2f}", (400,50),
                        cv2.FONT_HERSHEY_SIMPLEX,1.5,(0,0,255),2)
            if mar > MOUTH_AR_THRESH:
                STATUS = "Drowsy"
                cv2.putText(frame, "Yawning!", (800, 100),
                            cv2.FONT_HERSHEY_SIMPLEX,1.5,(0,0,255),2)
                
            # Draw Overall Status
            if STATUS == "Drowsy":
                color = (0, 0, 255)
                text = "DROWSY!"
            else:
                color = (0, 255, 0)
                text = "Non Drowsy"

            # Landmark Visualization
            for (i,(x,y)) in enumerate(shape):
                cv2.circle(frame,(x,y),1,(0,0,255),-1)
            cv2.putText(frame, text, (50, height - 50),
                cv2.FONT_HERSHEY_SIMPLEX, 1.2, color, 3)
        writer.write(frame)
        cv2.imshow("Output", frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    writer.release()

    # Prevent hanging window on macOS
    for i in range(10):
        cv2.waitKey(1)

    cv2.destroyAllWindows()
    print(f"üé¨ Done! Output saved => {output_video_path}")

In [43]:
process_video_withDetails("../self-uploaded/drowsy.mp4", "result.mp4")

[INFO] loading facial landmark predictor...
[INFO] Video: 544x960, 30.00 FPS
üé¨ Done! Output saved => result.mp4
