<a href="https://colab.research.google.com/github/Ummmahek/Driver-drowsiness-detection/blob/main/YOLOFACEMARK.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install opencv-python



In [None]:
# YOLOFaceMark on NTHU-DDD Dataset (with YOLOFaceMark model and EAR/MAR classification)

import os
import cv2
import glob
import torch
import numpy as np
import matplotlib.pyplot as plt
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import ToTensor
from tqdm import tqdm
import requests
import zipfile

# --- 1. DOWNLOAD AND EXTRACT DATASET ---
data_url = "https://universe.roboflow.com/ds/p8jjDsbFGl?key=Bwh2h28aje"
dataset_path = "nthu_ddd_dataset.zip"
extract_path = "nthu_ddd_dataset"

if not os.path.exists(extract_path):
    print("Downloading dataset...")
    response = requests.get(data_url, stream=True)
    with open(dataset_path, "wb") as file:
        for chunk in response.iter_content(chunk_size=1024):
            file.write(chunk)
    print("Extracting dataset...")
    with zipfile.ZipFile(dataset_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
    print("Dataset extracted to:", extract_path)



Downloading dataset...
Extracting dataset...
Dataset extracted to: nthu_ddd_dataset


In [None]:
!pip install dlib



In [3]:
!unzip lmk5.zip -d /content/lmk

Archive:  lmk5.zip
   creating: /content/lmk/lmk5/
   creating: /content/lmk/lmk5/train2/
   creating: /content/lmk/lmk5/train2/images/
  inflating: /content/lmk/lmk5/train2/images/001_glasses_sleepyCombination_1465_drowsy_jpg.rf.6fe0486cc1cde212f79451cbb464ca2c.jpg  
  inflating: /content/lmk/lmk5/train2/images/002_glasses_yawning_644_drowsy_jpg.rf.fecd43ee58e0c25073ca454a6d437a16.jpg  
  inflating: /content/lmk/lmk5/train2/images/001_glasses_slowBlinkWithNodding_1592_drowsy_jpg.rf.3f5624c98c0b6da627f1b19feea0a6f5.jpg  
  inflating: /content/lmk/lmk5/train2/images/385_jpg.rf.7d4e0af3b7d1d733ab8e55f256862a93.jpg  
  inflating: /content/lmk/lmk5/train2/images/525_jpg.rf.5d1568f80401f1671092701c4811795c.jpg  
  inflating: /content/lmk/lmk5/train2/images/001_glasses_slowBlinkWithNodding_1124_drowsy_jpg.rf.facecc4cf545125785431ae4fb9e841f.jpg  
  inflating: /content/lmk/lmk5/train2/images/530_jpg.rf.7c1f5a72839ed090594fb579524248e3.jpg  
  inflating: /content/lmk/lmk5/train2/images/001_gla

In [None]:
import os
import glob
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import numpy as np
from tqdm import tqdm

# --- 1. Dataset Loader ---

class YOLOLandmarkDataset(Dataset):
    def __init__(self, root_dir, img_folder="images", label_folder="labels", img_size=416):
        self.img_dir = os.path.join(root_dir, img_folder)
        self.label_dir = os.path.join(root_dir, label_folder)
        self.img_size = img_size
        all_img_files = sorted(glob.glob(os.path.join(self.img_dir, "*.jpg")) + glob.glob(os.path.join(self.img_dir, "*.png")))
        self.img_files = []
        for img_path in all_img_files:
            img_basename = os.path.basename(img_path)
            base_name_without_ext = img_basename.rsplit('.', 1)[0]
            label_filename = base_name_without_ext + ".txt"
            label_path = os.path.join(self.label_dir, label_filename)
            if os.path.exists(label_path):
                self.img_files.append(img_path)
        if not self.img_files:
            raise FileNotFoundError(f"No image files with corresponding label files found in {self.img_dir} and {self.label_dir} after checking for .txt files.")
        self.transform = transforms.Compose([
            transforms.Resize((img_size, img_size)),
            transforms.ToTensor(),
        ])

    def __len__(self):
        return len(self.img_files)

    def __getitem__(self, idx):
        img_path = self.img_files[idx]
        img = Image.open(img_path).convert("RGB")
        img = self.transform(img)
        img_basename = os.path.basename(img_path)
        base_name_without_ext = img_basename.rsplit('.', 1)[0]
        label_filename = base_name_without_ext + ".txt"
        label_path = os.path.join(self.label_dir, label_filename)
        with open(label_path, "r") as f:
            vals = [float(x) for x in f.read().strip().split()]
            if len(vals) < 5:
                raise ValueError(f"Label file {label_path} has insufficient data: {len(vals)} values found, expected at least 5.")
            box = vals[1:5]
            landmarks = []
            lmk_vals = vals[5:]
            if len(lmk_vals) % 3 != 0:
                raise ValueError(f"Landmark values in label file {label_path} are not a multiple of 3. Found {len(lmk_vals)} values.")
            for i in range(0, len(lmk_vals), 3):
                landmarks.extend(lmk_vals[i:i+2])
            target = torch.tensor(box + landmarks, dtype=torch.float32)
        return img, target

# --- 2. YOLOFCEMark Baseline Model (with Stem, Bottleneck, Neck) ---

class Stem(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(Stem, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=2, padding=1, bias=False)
        self.bn = nn.BatchNorm2d(out_channels)
        self.act = nn.ReLU(inplace=True)
    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.act(x)
        return x

class Bottleneck(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(Bottleneck, self).__init__()
        self.dwconv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=stride, padding=1, groups=in_channels, bias=False)
        self.bn1 = nn.BatchNorm2d(in_channels)
        self.act1 = nn.ReLU(inplace=True)
        self.pwconv = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.act2 = nn.ReLU(inplace=True)
        self.use_res_connect = (stride == 1 and in_channels == out_channels)
    def forward(self, x):
        identity = x
        out = self.dwconv(x)
        out = self.bn1(out)
        out = self.act1(out)
        out = self.pwconv(out)
        out = self.bn2(out)
        if self.use_res_connect:
            out += identity
        out = self.act2(out)
        return out

class Neck(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(Neck, self).__init__()
        self.bneck1 = Bottleneck(in_channels, out_channels)
        self.bneck2 = Bottleneck(out_channels, out_channels)
    def forward(self, x):
        x = self.bneck1(x)
        x = self.bneck2(x)
        return x

class YOLOFCEMarkBaseline(nn.Module):
    def __init__(self, num_landmarks=5):
        super(YOLOFCEMarkBaseline, self).__init__()
        self.stem = Stem(3, 24)
        self.layer1 = Bottleneck(24, 48, stride=2)
        self.layer2 = Bottleneck(48, 96, stride=2)
        self.neck = Neck(96, 96)
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.head = nn.Conv2d(96, 5 + num_landmarks * 2, kernel_size=1, stride=1)
    def forward(self, x):
        x = self.stem(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.neck(x)
        x = self.avgpool(x)
        x = self.head(x)
        x = x.view(x.size(0), -1)
        return x

# --- 3. Loss Function ---

def yoloface_loss(pred, target, num_landmarks=5):
    loss_fn = nn.MSELoss()
    pred_bbox = pred[:, :4]
    target_bbox = target[:, :4]
    pred_conf = pred[:, 4]
    target_conf = target[:, 4]
    pred_lmk = pred[:, 5:5+num_landmarks*2]
    target_lmk = target[:, 5:5+num_landmarks*2]
    bbox_loss = loss_fn(pred_bbox, target_bbox)
    conf_loss = loss_fn(pred_conf, target_conf)
    lmk_loss = loss_fn(pred_lmk, target_lmk)
    return bbox_loss + conf_loss + lmk_loss

# --- 4. Metrics Function ---

def compute_metrics(preds, targets, num_landmarks=5, iou_thresh=0.5, lmk_thresh=0.05):
    # Only for batch, not cumulative across epochs
    preds = preds.detach().cpu().numpy()
    targets = targets.detach().cpu().numpy()
    batch_size = preds.shape[0]
    # Face bbox metrics (IoU, acc, precision, recall, f1)
    TP = FP = TN = FN = 0
    iou_list = []
    lmk_dist_list = []
    for i in range(batch_size):
        pred_bbox = preds[i, :4]
        true_bbox = targets[i, :4]
        # Convert xc, yc, w, h (normalized) to x1, y1, x2, y2
        def xywh2xyxy(box):
            xc, yc, w, h = box
            x1 = xc - w / 2
            y1 = yc - h / 2
            x2 = xc + w / 2
            y2 = yc + h / 2
            return x1, y1, x2, y2
        pb = xywh2xyxy(pred_bbox)
        tb = xywh2xyxy(true_bbox)
        # Compute IoU
        ix1 = max(pb[0], tb[0])
        iy1 = max(pb[1], tb[1])
        ix2 = min(pb[2], tb[2])
        iy2 = min(pb[3], tb[3])
        iw = max(ix2 - ix1, 0)
        ih = max(iy2 - iy1, 0)
        inter = iw * ih
        area_p = (pb[2] - pb[0]) * (pb[3] - pb[1])
        area_t = (tb[2] - tb[0]) * (tb[3] - tb[1])
        union = area_p + area_t - inter + 1e-6
        iou = inter / union
        iou_list.append(iou)
        # For face detection metrics:
        if iou >= iou_thresh:
            TP += 1
        else:
            FP += 1 # In this context, we assume one gt per image, so all are positive
        # Landmarks: mean point distance (normalized)
        pred_lmk = preds[i, 5:5+num_landmarks*2].reshape(-1,2)
        true_lmk = targets[i, 5:5+num_landmarks*2].reshape(-1,2)
        dists = np.linalg.norm(pred_lmk - true_lmk, axis=1)
        lmk_dist_list.extend(dists)
    # Face (object) metrics
    total = batch_size
    accuracy = TP / total
    precision = TP / (TP + FP) if (TP+FP) > 0 else 0
    recall = TP / (TP + FN + 1e-6)
    f1 = 2 * precision * recall / (precision + recall + 1e-6) if (precision+recall) > 0 else 0
    mean_iou = np.mean(iou_list)
    mean_lmk_dist = np.mean(lmk_dist_list)
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'mean_iou': mean_iou,
        'mean_lmk_dist': mean_lmk_dist
    }

def avg_metrics(metrics_list):
    # Average a list of dicts
    if not metrics_list:
        return {}
    keys = metrics_list[0].keys()
    return {k: np.mean([m[k] for m in metrics_list]) for k in keys}

# --- 5. Training Loop ---

def train_model(model, train_loader, valid_loader, device, num_epochs=10, lr=1e-3, num_landmarks=5):
    optimizer = optim.Adam(model.parameters(), lr=lr)
    model.to(device)
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        train_metrics_list = []
        for imgs, targets in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Train]"):
            imgs, targets = imgs.to(device), targets.to(device)
            confidence_target = torch.ones(targets.shape[0], 1, device=device)
            targets_padded = torch.cat([targets[:, :4], confidence_target, targets[:, 4:]], dim=1)
            preds = model(imgs)
            loss = yoloface_loss(preds, targets_padded, num_landmarks)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            batch_metrics = compute_metrics(preds, targets_padded, num_landmarks)
            train_metrics_list.append(batch_metrics)
        train_metrics = avg_metrics(train_metrics_list)
        print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {total_loss/len(train_loader):.4f} | "
              f"Acc: {train_metrics['accuracy']:.3f} | Prec: {train_metrics['precision']:.3f} | "
              f"Rec: {train_metrics['recall']:.3f} | F1: {train_metrics['f1']:.3f} | "
              f"IoU: {train_metrics['mean_iou']:.3f} | LmkDist: {train_metrics['mean_lmk_dist']:.4f}")

        # Validation
        model.eval()
        val_loss = 0
        val_metrics_list = []
        with torch.no_grad():
            for imgs, targets in tqdm(valid_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Valid]"):
                imgs, targets = imgs.to(device), targets.to(device)
                confidence_target = torch.ones(targets.shape[0], 1, device=device)
                targets_padded = torch.cat([targets[:, :4], confidence_target, targets[:, 4:]], dim=1)
                preds = model(imgs)
                loss = yoloface_loss(preds, targets_padded, num_landmarks)
                val_loss += loss.item()
                batch_metrics = compute_metrics(preds, targets_padded, num_landmarks)
                val_metrics_list.append(batch_metrics)
        val_metrics = avg_metrics(val_metrics_list)
        print(f"  Val Loss: {val_loss/len(valid_loader):.4f} | "
              f"Acc: {val_metrics['accuracy']:.3f} | Prec: {val_metrics['precision']:.3f} | "
              f"Rec: {val_metrics['recall']:.3f} | F1: {val_metrics['f1']:.3f} | "
              f"IoU: {val_metrics['mean_iou']:.3f} | LmkDist: {val_metrics['mean_lmk_dist']:.4f}")

# --- 6. Putting it all together ---

if __name__ == "__main__":
    train_dir = "/content/lmk/lmk5/train2"
    valid_dir = "/content/lmk/lmk5/valid2"
    img_size = 416
    sample_label_files = glob.glob(os.path.join(train_dir, "labels", "*.txt"))
    if not sample_label_files:
        raise FileNotFoundError(f"No label files found in {os.path.join(train_dir, 'labels')}")
    sample_label = None
    num_landmarks = None
    for lbl_path in sample_label_files:
        with open(lbl_path, "r") as f:
            vals = f.read().strip().split()
            if len(vals) >= 5:
                num_lmk_vals = len(vals) - 5
                if num_lmk_vals % 3 == 0:
                    num_landmarks = num_lmk_vals // 3
                    sample_label = lbl_path
                    break
    if sample_label is None or num_landmarks is None:
        raise FileNotFoundError(f"Could not find a sample label file with a valid format in {os.path.join(train_dir, 'labels')}")
    print(f"Detected number of landmarks per object: {num_landmarks}")

    batch_size = 8
    num_epochs = 10
    train_set = YOLOLandmarkDataset(train_dir, img_size=img_size)
    valid_set = YOLOLandmarkDataset(valid_dir, img_size=img_size)
    if len(train_set) == 0:
        raise RuntimeError(f"Training dataset is empty after filtering.")
    if len(valid_set) == 0:
        print(f"Warning: Validation dataset is empty after filtering.")
    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
    valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=False)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = YOLOFCEMarkBaseline(num_landmarks=num_landmarks)
    train_model(model, train_loader, valid_loader, device, num_epochs=num_epochs, num_landmarks=num_landmarks)

Detected number of landmarks per object: 5


Epoch 1/10 [Train]:  32%|███▏      | 63/195 [00:32<01:15,  1.74it/s]