In [11]:
# Re-running the model training setup since kernel was reset

# Install required packages
!pip install opencv-python-headless mediapipe scikit-learn pandas joblib tqdm --quiet



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [6]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("ismailnasri20/driver-drowsiness-dataset-ddd")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/ismailnasri20/driver-drowsiness-dataset-ddd?dataset_version_number=1...


100%|██████████| 2.58G/2.58G [01:36<00:00, 28.8MB/s]

Extracting files...





Path to dataset files: /Users/chetan/.cache/kagglehub/datasets/ismailnasri20/driver-drowsiness-dataset-ddd/versions/1


In [16]:
# This script modifies the training pipeline to include:
# - Manual capture of 5 'alert' and 5 'drowsy' baseline samples from webcam
# - Uses these samples to personalize and extend the training dataset
# - Computes a personalized EAR threshold for PERCLOS
# - Trains a RandomForest model on all features

import cv2
import numpy as np
import joblib
import mediapipe as mp
from collections import deque
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report

# Load dataset with EAR, head pitch, PERCLOS columns
df = pd.read_csv("drowsiness_augmented_dataset.csv")
baseline_samples = []

# Setup MediaPipe
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
    static_image_mode=False,
    max_num_faces=1,
    refine_landmarks=True,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.5
)

LEFT_EYE = [33, 160, 158, 133, 153, 144]
RIGHT_EYE = [362, 387, 385, 263, 373, 380]
CHIN = 152
FOREHEAD = 10

def calculate_ear(landmarks, indices):
    pts = np.array([[landmarks[i].x, landmarks[i].y] for i in indices])
    v1 = np.linalg.norm(pts[1] - pts[5])
    v2 = np.linalg.norm(pts[2] - pts[4])
    h = np.linalg.norm(pts[0] - pts[3])
    return (v1 + v2) / (2.0 * h) if h > 0 else 0

def estimate_head_pitch(landmarks):
    chin = np.array([landmarks[CHIN].x, landmarks[CHIN].y])
    forehead = np.array([landmarks[FOREHEAD].x, landmarks[FOREHEAD].y])
    delta = chin - forehead
    return np.arctan2(delta[1], delta[0])

# Setup camera
cap = cv2.VideoCapture(0)
ear_history = deque(maxlen=5)
print("Press 'a' to record ALERT, 'd' for DROWSY. Need 5 of each.")

while len([x for x in baseline_samples if x[1] == 'alert']) < 5 or len([x for x in baseline_samples if x[1] == 'drowsy']) < 5:
    ret, frame = cap.read()
    if not ret:
        continue

    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = face_mesh.process(rgb)

    if result.multi_face_landmarks:
        landmarks = result.multi_face_landmarks[0].landmark
        left_ear = calculate_ear(landmarks, LEFT_EYE)
        right_ear = calculate_ear(landmarks, RIGHT_EYE)
        avg_ear = (left_ear + right_ear) / 2.0
        head_pitch = estimate_head_pitch(landmarks)

        ear_history.append(avg_ear)

        if len(ear_history) == ear_history.maxlen:
            ear_std = np.std(ear_history)
            perclos = sum(1 for e in ear_history if e < 0.21) / len(ear_history)

            feature_vector = [left_ear, right_ear, avg_ear, head_pitch, ear_std, perclos]

            key = cv2.waitKey(1) & 0xFF
            if key == ord('a') and len([x for x in baseline_samples if x[1] == 'alert']) < 5:
                baseline_samples.append((feature_vector, 'alert'))
                print("Added ALERT sample")
            elif key == ord('d') and len([x for x in baseline_samples if x[1] == 'drowsy']) < 5:
                baseline_samples.append((feature_vector, 'drowsy'))
                print("Added DROWSY sample")

        label_text = f"EAR: {avg_ear:.3f}"
        cv2.putText(frame, label_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 0), 2)

    else:
        cv2.putText(frame, "No face", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (128, 128, 128), 2)

    cv2.imshow("Capture Baseline", frame)
    if cv2.waitKey(1) & 0xFF == 27:
        break

cap.release()
cv2.destroyAllWindows()

# Append baseline samples to dataset
baseline_df = pd.DataFrame([x[0] + [x[1]] for x in baseline_samples], columns=df.columns)
df = pd.concat([df, baseline_df], ignore_index=True)
df.to_csv("drowsiness_augmented_dataset_with_baseline.csv", index=False)

# Train model with added baseline data
X = df.drop("label", axis=1).values
y = df["label"].values

le = LabelEncoder()
y_encoded = le.fit_transform(y)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, stratify=y_encoded, random_state=42)

model = RandomForestClassifier(n_estimators=100, max_depth=10, class_weight='balanced', random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
report = classification_report(y_test, y_pred, target_names=le.classes_)

# Save updated model
joblib.dump(model, "rf_drowsiness_model.pkl")
joblib.dump(scaler, "rf_scaler.pkl")
joblib.dump(le, "rf_label_encoder.pkl")

report


I0000 00:00:1751556970.365575 1618214 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.4), renderer: Apple M1 Pro
W0000 00:00:1751556970.368295 1779033 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1751556970.377352 1779036 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Press 'a' to record ALERT, 'd' for DROWSY. Need 5 of each.
Added DROWSY sample
Added ALERT sample
Added ALERT sample
Added ALERT sample
Added ALERT sample
Added DROWSY sample
Added DROWSY sample
Added DROWSY sample
Added DROWSY sample
Added ALERT sample


'              precision    recall  f1-score   support\n\n       alert       0.76      0.87      0.81      3889\n      drowsy       0.87      0.76      0.81      4469\n\n    accuracy                           0.81      8358\n   macro avg       0.81      0.81      0.81      8358\nweighted avg       0.82      0.81      0.81      8358\n'

In [18]:
import cv2
import numpy as np
import joblib
import mediapipe as mp
from collections import deque

# Load model, scaler, and encoder
model = joblib.load("rf_drowsiness_model.pkl")
scaler = joblib.load("rf_scaler.pkl")
encoder = joblib.load("rf_label_encoder.pkl")

# MediaPipe setup
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
    static_image_mode=False,
    max_num_faces=1,
    refine_landmarks=True,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.5
)

# Facial landmark indices
LEFT_EYE = [33, 160, 158, 133, 153, 144]
RIGHT_EYE = [362, 387, 385, 263, 373, 380]
CHIN = 152
FOREHEAD = 10

def calculate_ear(landmarks, indices):
    pts = np.array([[landmarks[i].x, landmarks[i].y] for i in indices])
    v1 = np.linalg.norm(pts[1] - pts[5])
    v2 = np.linalg.norm(pts[2] - pts[4])
    h = np.linalg.norm(pts[0] - pts[3])
    return (v1 + v2) / (2.0 * h) if h > 0 else 0

def estimate_head_pitch(landmarks):
    chin = np.array([landmarks[CHIN].x, landmarks[CHIN].y])
    forehead = np.array([landmarks[FOREHEAD].x, landmarks[FOREHEAD].y])
    delta = chin - forehead
    return np.arctan2(delta[1], delta[0])  # vertical over horizontal

cap = cv2.VideoCapture(0)
ear_history = deque(maxlen=5)

print("Real-time drowsiness detection with fixed confidence threshold (20%)")

while True:
    ret, frame = cap.read()
    if not ret:
        continue

    threshold = 0.20  # fixed 20% confidence threshold

    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = face_mesh.process(rgb)

    status_text = "No Face"
    box_color = (128, 128, 128)

    if result.multi_face_landmarks:
        landmarks = result.multi_face_landmarks[0].landmark

        left_ear = calculate_ear(landmarks, LEFT_EYE)
        right_ear = calculate_ear(landmarks, RIGHT_EYE)
        avg_ear = (left_ear + right_ear) / 2.0
        ear_history.append(avg_ear)

        head_pitch = estimate_head_pitch(landmarks)

        # Temporal features
        if len(ear_history) == ear_history.maxlen:
            ear_std = np.std(ear_history)
            perclos = sum(1 for e in ear_history if e < 0.21) / len(ear_history)
        else:
            ear_std = 0
            perclos = 0

        # Final feature vector
        features = np.array([[left_ear, right_ear, avg_ear, head_pitch, ear_std, perclos]])
        features_scaled = scaler.transform(features)
        probs = model.predict_proba(features_scaled)[0]
        predicted_index = np.argmax(probs)
        confidence = probs[predicted_index]
        label = encoder.inverse_transform([predicted_index])[0]

        # Apply threshold
        if confidence >= threshold:
            status_text = f"{label} ({confidence:.2f})"
            box_color = (0, 255, 0) if label == "alert" else (0, 0, 255)
        else:
            status_text = f"Uncertain ({confidence:.2f})"
            box_color = (0, 255, 255)

    # Draw result on frame
    cv2.putText(frame, status_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, box_color, 2)
    cv2.imshow("Drowsiness Detection", frame)

    if cv2.waitKey(1) & 0xFF == 27:  # ESC
        break

cap.release()
cv2.destroyAllWindows()


I0000 00:00:1751557367.040957 1618214 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.4), renderer: Apple M1 Pro
W0000 00:00:1751557367.053100 1791830 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1751557367.062601 1791830 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Real-time drowsiness detection with fixed confidence threshold (20%)


KeyboardInterrupt: 