In [None]:
import os
import re
import cv2
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, f1_score
from tqdm import tqdm
import dlib
import cv2
import numpy as np
from imutils import face_utils
from EAR import eye_aspect_ratio
from MAR import mouth_aspect_ratio


In [None]:

 
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('./dlib_shape_predictor/shape_predictor_68_face_landmarks.dat')

EYE_AR_THRESH = 0.16
MOUTH_AR_THRESH = 0.68
HEADPOSE_THRESH = 25    

(lStart, lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
(rStart, rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"]
(mStart, mEnd) = (49, 68)


In [26]:
import os
import numpy as np
import cv2
import re
from tqdm import tqdm
from imutils import face_utils

train_dir = "../data/splitted_Data/train"
classes = ["Non Drowsy", "Drowsy"]

ear_values = []
mar_values = []

for cls in classes:
    folder = os.path.join(train_dir, cls)
    for img_name in tqdm(os.listdir(folder), desc=f"Processing {cls}"):
        if not img_name.lower().endswith(".png"):
            continue
        
        img_path = os.path.join(folder, img_name)
        frame = cv2.imread(img_path)
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        rects = detector(gray, 0)
        if len(rects) == 0:
            continue
        
        rect = rects[0]
        shape = predictor(gray, rect)
        shape = face_utils.shape_to_np(shape)

        leftEye = shape[lStart:lEnd]
        rightEye = shape[rStart:rEnd]
        ear = (eye_aspect_ratio(leftEye) + eye_aspect_ratio(rightEye)) / 2.0

        mouth = shape[mStart:mEnd]
        mar = mouth_aspect_ratio(mouth)

        ear_values.append(ear)
        mar_values.append(mar)

ear_values = np.array(ear_values)
mar_values = np.array(mar_values)


Processing Non Drowsy: 100%|██████████| 15556/15556 [01:22<00:00, 188.63it/s]
Processing Drowsy: 100%|██████████| 17878/17878 [01:32<00:00, 193.80it/s]


In [None]:
 
EYE_AR_THRESH = np.percentile(ear_values, 10)  

 
MOUTH_AR_THRESH = np.percentile(mar_values, 90)  

print("Learned EAR threshold:", EYE_AR_THRESH)
print("Learned MAR threshold:", MOUTH_AR_THRESH)

Learned EAR threshold: 0.1697924387701805
Learned MAR threshold: 0.6899959667111429


In [31]:
test_root = "../data/splitted_Data/test"
classes = ["Non Drowsy", "Drowsy"]  
SEQ_LEN = 5                
MIN_DROWSY_IN_SEQ = 3      


In [None]:

def group_frames_by_video(folder):

    video_dict = {}
    for img_name in os.listdir(folder):
        if not img_name.lower().endswith((".png", ".jpg", ".jpeg")):
            continue
        m = re.match(r"[A-Za-z]+", img_name)
        if not m:
            continue
        prefix = m.group()  
        video_dict.setdefault(prefix, []).append(os.path.join(folder, img_name))

     
    for vid in video_dict:
        video_dict[vid].sort()
    return video_dict

In [None]:

def predict_drowsiness(frame):
    """
    
    "Drowsy" 或 "Non Drowsy"
    """

    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    rects = detector(gray, 0)

    if len(rects) == 0:
        return "Non Drowsy"   

    rect = rects[0]
    shape = predictor(gray, rect)
    shape = face_utils.shape_to_np(shape)

    # EAR
    leftEye = shape[lStart:lEnd]
    rightEye = shape[rStart:rEnd]
    ear = (eye_aspect_ratio(leftEye) + eye_aspect_ratio(rightEye)) / 2.0

    # MAR
    mouth = shape[mStart:mEnd]
    mar = mouth_aspect_ratio(mouth)

    # Head Pose
    size = gray.shape
    # head_tilt_degree, _, _, _ = getHeadTiltAndCoords(size, np.zeros((6,2)), size[0])

    # Decision rules
    if ear < EYE_AR_THRESH:
        return "Drowsy"
    if mar > MOUTH_AR_THRESH:
        return "Drowsy"
    # if head_tilt_degree[0] > HEADPOSE_THRESH:
        # return "Drowsy"

    return "Non Drowsy"

In [None]:

def evaluate_sequences_for_class(class_name):
    folder = os.path.join(test_root, class_name)
    video_dict = group_frames_by_video(folder)

    y_true_video = []
    y_pred_video = []

    for vid, frame_paths in tqdm(video_dict.items(), desc=f"Eval {class_name} (video-level)"):
        
        frame_preds = []
        for fp in frame_paths:
            frame = cv2.imread(fp)
            pred_label = predict_drowsiness(frame)  # "Drowsy" or "Non Drowsy"
            frame_preds.append(pred_label)

        video_is_drowsy = False

        if len(frame_preds) < SEQ_LEN:
            drowsy_count = sum(p == "Drowsy" for p in frame_preds)
            if drowsy_count >= len(frame_preds) / 2:
                video_is_drowsy = True
        else:
            for i in range(len(frame_preds) - SEQ_LEN + 1):
                window = frame_preds[i:i+SEQ_LEN]
                drowsy_count = sum(p == "Drowsy" for p in window)
                if drowsy_count >= MIN_DROWSY_IN_SEQ:
                    video_is_drowsy = True
                    break

        true_label = class_name          # "Non Drowsy"  "Drowsy"
        pred_label = "Drowsy" if video_is_drowsy else "Non Drowsy"

        y_true_video.append(true_label)
        y_pred_video.append(pred_label)

    return y_true_video, y_pred_video



In [None]:

 
y_true_all = []
y_pred_all = []

for cls in classes:
    yt, yp = evaluate_sequences_for_class(cls)
    y_true_all.extend(yt)
    y_pred_all.extend(yp)

 
label_map = {"Non Drowsy": 0, "Drowsy": 1}
y_true_bin = [label_map[x] for x in y_true_all]
y_pred_bin = [label_map[x] for x in y_pred_all]

 
print("\n=== Video-level Evaluation (with Continuous Frames) ===")
print("Accuracy:", accuracy_score(y_true_bin, y_pred_bin))
print("F1 Score:", f1_score(y_true_bin, y_pred_bin, average="binary"))
print("\nClassification Report:")
print(classification_report(y_true_bin, y_pred_bin, target_names=["Non Drowsy", "Drowsy"]))

cm = confusion_matrix(y_true_bin, y_pred_bin)
print("Confusion Matrix:\n", cm)

Eval Non Drowsy (video-level): 100%|██████████| 26/26 [00:04<00:00,  5.32it/s]
Eval Drowsy (video-level): 100%|██████████| 28/28 [00:05<00:00,  5.00it/s]


=== Video-level Evaluation (with Continuous Frames) ===
Accuracy: 0.5740740740740741
F1 Score: 0.5106382978723404

Classification Report:
              precision    recall  f1-score   support

  Non Drowsy       0.54      0.73      0.62        26
      Drowsy       0.63      0.43      0.51        28

    accuracy                           0.57        54
   macro avg       0.59      0.58      0.57        54
weighted avg       0.59      0.57      0.56        54

Confusion Matrix:
 [[19  7]
 [16 12]]



