In [6]:
from scipy.spatial import distance

In [7]:
pip install imutils

Note: you may need to restart the kernel to use updated packages.


In [8]:
pip install opencv-python

Note: you may need to restart the kernel to use updated packages.


In [9]:
from imutils import face_utils

In [10]:
from pygame import mixer

In [11]:
pip install dlib-19.24.99-cp312-cp312-win_amd64.whl

Processing c:\users\heman\majorproject\dlib-19.24.99-cp312-cp312-win_amd64.whl
dlib is already installed with the same version as the provided wheel. Use --force-reinstall to force an installation of the wheel.
Note: you may need to restart the kernel to use updated packages.


In [12]:
import dlib

In [13]:
import cv2

In [7]:
# Import required libraries
from scipy.spatial import distance  # Used for calculating distances between points
from imutils import face_utils  # Helps handle facial landmarks and shapes easily.
from pygame import mixer  # For playing alert sound
import imutils  # Utility functions for resizing images
import dlib  # For face detection and landmarks
import cv2  # OpenCV for image/video processing

# Initialize audio mixer and load the alert sound
mixer.init()  # Initialize the mixer (for playing sound).
mixer.music.load("music.mp3")  # Load the music file (the alert sound).

# Function to calculate Eye Aspect Ratio (EAR)
def eye_aspect_ratio(eye):
    A = distance.euclidean(eye[1], eye[5])  # Vertical distance 1.  Calculates distance between two points on the eye.
    B = distance.euclidean(eye[2], eye[4])  # Vertical distance 2.  Calculates another vertical distance.
    C = distance.euclidean(eye[0], eye[3])  # Horizontal distance. Calculates the width of the eye.
    ear = (A + B) / (2.0 * C)  # EAR formula.  Calculates the Eye Aspect Ratio using the distances.
    return ear  # Returns the calculated EAR value.

# Function to calculate Mouth Aspect Ratio (MAR)
def mouth_aspect_ratio(mouth):
    A = distance.euclidean(mouth[2], mouth[10])  # Vertical distance 1. Distance between mouth points.
    B = distance.euclidean(mouth[4], mouth[8])  # Vertical distance 2. Another distance.
    C = distance.euclidean(mouth[0], mouth[6])  # Horizontal distance.  Width of the mouth.
    mar = (A + B) / (2.0 * C)  # MAR formula.  Calculates Mouth Aspect Ratio.
    return mar  # Returns the MAR.

# Threshold values and check settings
EYE_THRESH = 0.25  # EAR below this means eyes are likely closed.  A threshold for how much the eye is closed.
MOUTH_THRESH = 0.5  # MAR above this means yawning. Threshold for how wide the mouth is.
FRAME_CHECK = 20  # Number of continuous frames to confirm drowsiness.  How many frames the eyes/mouth must be in the drowsy state.

# Load the face detector and facial landmarks model
detect = dlib.get_frontal_face_detector()  #detects faces facing the camera.  Loads a pre-trained model to find faces.
predict = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")  # detects 68 facial keypoints. Loads a model to find specific points on the face (eyes, mouth, etc.).

# Get the indexes for eyes and mouth from the 68 landmarks
(lStart, lEnd) = face_utils.FACIAL_LANDMARKS_68_IDXS["left_eye"]  #we want to extract only Points around the eyes to calculate EAR.  Gets the numbers that correspond to the left eye points.
(rStart, rEnd) = face_utils.FACIAL_LANDMARKS_68_IDXS["right_eye"]
(mStart, mEnd) = face_utils.FACIAL_LANDMARKS_68_IDXS["mouth"]  #we want to extract only Points around the mouth to calculate MAR

# Start capturing video from webcam
cap = cv2.VideoCapture(0)  # Starts capturing video from the default webcam (0).
flag = 0  # Counts how many continuous frames person seems drowsy.  A counter for how long the person has been drowsy.

# Counters for performance evaluation
total_frames = 0
drowsy_frames = 0
alert_triggered_frames = 0

# Start reading video frames
while True:  # Loop that continues until the program is stopped.
    ret, frame = cap.read()  # Read a frame from webcam, ret is a boolean.  Reads one frame of video from the webcam.  'ret' indicates if the frame was read successfully.
    frame = imutils.resize(frame, width=450)  # Resize frame for better performance.  Makes the frame smaller to speed up processing.
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)  # Convert to grayscale.  Converts the color frame to grayscale (black and white), which is easier for face detection.
    subjects = detect(gray, 0)  # Detect faces in the grayscale frame.  Uses the face detector to find faces in the image.

    for subject in subjects:  # Loop that goes through each face found.
        shape = predict(gray, subject)  # Predict landmarks on the face.  Finds the 68 facial landmark points on the face.
        shape = face_utils.shape_to_np(shape)  # Convert to numpy array.  Converts the landmark points to a format that's easier to work with.
        total_frames += 1  # Count total processed frames

        # Get eye and mouth landmarks
        leftEye = shape[lStart:lEnd]  # Extracts the landmark points for the left eye.
        rightEye = shape[rStart:rEnd]  # Extracts points for the right eye.
        mouth = shape[mStart:mEnd]  # Extracts points for the mouth.

        # Calculate EAR and MAR
        leftEAR = eye_aspect_ratio(leftEye)  # Calculates EAR for the left eye.
        rightEAR = eye_aspect_ratio(rightEye)  # Calculates EAR for the right eye.
        ear = (leftEAR + rightEAR) / 2.0  # Average EAR.  Averages the EAR from both eyes.
        mar = mouth_aspect_ratio(mouth)  # MAR

        # Draw contours around eyes and mouth
        leftEyeHull = cv2.convexHull(leftEye)  # Finds the outline of the left eye points.
        rightEyeHull = cv2.convexHull(rightEye)  # Outline of right eye.
        mouthHull = cv2.convexHull(mouth)  # Outline of mouth.
        cv2.drawContours(frame, [leftEyeHull], -1, (0, 255, 0), 1)  # Draws the outline on the image.
        cv2.drawContours(frame, [rightEyeHull], -1, (0, 255, 0), 1)
        cv2.drawContours(frame, [mouthHull], -1, (0, 255, 0), 1)

        # Drowsiness condition check
        if ear < EYE_THRESH or mar > MOUTH_THRESH:  # If EAR is low or MAR is high (eyes closed or mouth open).
            drowsy_frames += 1  # Person looks drowsy
            flag += 1  # Increase continuous drowsy flag.  Increments the counter.
            print(f"Flag: {flag}, EAR: {ear:.2f}, MAR: {mar:.2f}")

            # If drowsy for FRAME_CHECK frames continuously, trigger alert
            if flag >= FRAME_CHECK:  # If the person has been drowsy for long enough.
                alert_triggered_frames += 1
                cv2.putText(frame, "*********************ALERT!*********************", (10, 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)  # Draws alert text on the image.
                cv2.putText(frame, "*********************ALERT!*********************", (10, 325),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

                # Play alert sound only if it's not already playing
                if not mixer.music.get_busy():
                    mixer.music.play()  # Plays the alert sound.
        else:
            flag = 0  # Reset the flag if not drowsy.  Resets the counter if the person is not drowsy.

            # Stop music if it's still playing
            if mixer.music.get_busy():
                mixer.music.stop()  # Stops the music if it was playing.

    # Show the output frame with annotations
    cv2.imshow("Frame", frame)  # Displays the video frame with the eye and mouth outlines, and any alerts.

    # Break the loop if user presses '1'
    key = cv2.waitKey(1) & 0xFF  # Waits for a key press.
    if key == ord("1"):  # If the key '1' is pressed.
        break  # Exit the loop.

# Release camera and close all OpenCV windows
cap.release()  # Releases the webcam.
cv2.destroyAllWindows()  # Closes the display windows.

# Print performance metrics
if total_frames > 0:
    print(f"\n--- Performance Metrics ---")
    print(f"Total Frames with Face: {total_frames}")
    print(f"Drowsy Frames Detected: {drowsy_frames}")
    print(f"Alerts Triggered: {alert_triggered_frames}")

    detection_rate = (drowsy_frames / total_frames) * 100
    alert_rate = (alert_triggered_frames / total_frames) * 100
    non_drowsy_frames = total_frames - drowsy_frames
    accuracy = ((non_drowsy_frames + alert_triggered_frames) / total_frames) * 100

    print(f"Detection Rate: {detection_rate:.2f}%")
    print(f"Alert Trigger Rate: {alert_rate:.2f}%")
    print(f"Approximate Accuracy: {accuracy:.2f}%")
else:
    print("No frames with face detected.")


Flag: 1, EAR: 0.24, MAR: 0.35
Flag: 2, EAR: 0.24, MAR: 0.35
Flag: 3, EAR: 0.22, MAR: 0.35
Flag: 1, EAR: 0.23, MAR: 0.35
Flag: 2, EAR: 0.23, MAR: 0.36
Flag: 3, EAR: 0.23, MAR: 0.35
Flag: 4, EAR: 0.23, MAR: 0.35
Flag: 5, EAR: 0.24, MAR: 0.37
Flag: 6, EAR: 0.20, MAR: 0.35
Flag: 7, EAR: 0.20, MAR: 0.35
Flag: 8, EAR: 0.22, MAR: 0.35
Flag: 9, EAR: 0.22, MAR: 0.35
Flag: 10, EAR: 0.25, MAR: 0.34
Flag: 11, EAR: 0.22, MAR: 0.34
Flag: 12, EAR: 0.22, MAR: 0.34
Flag: 13, EAR: 0.21, MAR: 0.34
Flag: 14, EAR: 0.23, MAR: 0.35
Flag: 15, EAR: 0.22, MAR: 0.36
Flag: 16, EAR: 0.22, MAR: 0.36
Flag: 17, EAR: 0.21, MAR: 0.35
Flag: 18, EAR: 0.21, MAR: 0.35
Flag: 19, EAR: 0.24, MAR: 0.35
Flag: 20, EAR: 0.24, MAR: 0.35
Flag: 21, EAR: 0.24, MAR: 0.35
Flag: 22, EAR: 0.18, MAR: 0.37
Flag: 23, EAR: 0.18, MAR: 0.36
Flag: 24, EAR: 0.18, MAR: 0.36
Flag: 25, EAR: 0.20, MAR: 0.34
Flag: 26, EAR: 0.21, MAR: 0.35
Flag: 27, EAR: 0.20, MAR: 0.35
Flag: 28, EAR: 0.20, MAR: 0.35
Flag: 29, EAR: 0.21, MAR: 0.36
Flag: 30, EAR: 0.21,