In [2]:
import cv2
import os
import glob
import pandas as pd
import numpy as np
import mediapipe as mp
from tqdm import tqdm

# --- KONFIGURASI ---
DATASET_ROOT = "data/raw/train/"  # Path folder dataset utama
EXTENSIONS = ['*.mp4', '*.avi', '*.mov', '*.mkv'] # Format video yang dicari

# Init MediaPipe untuk Cek Wajah (Mode Cepat)
mp_face_detection = mp.solutions.face_detection
face_detector = mp_face_detection.FaceDetection(model_selection=1, min_detection_confidence=0.5)

def get_video_properties(video_path):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        return None

    # 1. Properti Teknis
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    duration = total_frames / fps if fps > 0 else 0

    # 2. Analisis Kualitas (Sampling Frame Tengah)
    # Kita ambil frame di tengah video untuk menghindari black screen di awal
    cap.set(cv2.CAP_PROP_POS_FRAMES, total_frames // 2)
    ret, frame = cap.read()
    
    avg_brightness = 0
    blur_score = 0
    face_detected = False

    if ret:
        # Hitung Brightness (Rata-rata pixel grayscale)
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        avg_brightness = np.mean(gray)

        # Hitung Blur (Laplacian Variance)
        # Semakin rendah angkanya, semakin blur
        blur_score = cv2.Laplacian(gray, cv2.CV_64F).var()

        # Cek Keberadaan Wajah
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = face_detector.process(frame_rgb)
        if results.detections:
            face_detected = True

    cap.release()

    return {
        "Resolution": f"{width}x{height}",
        "FPS": round(fps, 2),
        "Total Frames": total_frames,
        "Duration (s)": round(duration, 2),
        "Brightness": round(avg_brightness, 2),
        "Blur Score": round(blur_score, 2),
        "Face Found": face_detected
    }

def main():
    # Cari semua video secara rekursif
    video_files = []
    for ext in EXTENSIONS:
        video_files.extend(glob.glob(os.path.join(DATASET_ROOT, "**", ext), recursive=True))
    
    print(f"Ditemukan {len(video_files)} video. Sedang menganalisis...")

    data_report = []

    for filepath in tqdm(video_files):
        # Parse Path: data/raw/train/CATEGORY/SUBJECT/video.mp4
        parts = filepath.split(os.sep)
        category = parts[-3] # drowsiness / non-drowsiness
        subject = parts[-2]  # subject_xx
        filename = os.path.basename(filepath)

        props = get_video_properties(filepath)
        
        if props:
            row = {
                "Category": category,
                "Subject": subject,
                "Video Name": filename,
                **props
            }
            data_report.append(row)

    # Buat DataFrame
    df = pd.DataFrame(data_report)

    # Tampilkan Statistik Ringkas
    print("\n=== LAPORAN EDA VIDEO ===")
    print(df.head())
    print("\n--- Ringkasan per Kategori ---")
    print(df.groupby("Category")[["Duration (s)", "Brightness", "Blur Score"]].mean())
    print("\n--- Cek Validitas Wajah ---")
    print(df["Face Found"].value_counts())

    # Simpan ke CSV
    output_csv = "video_eda_report.csv"
    df.to_csv(output_csv, index=False)
    print(f"\nLaporan lengkap disimpan ke: {output_csv}")

if __name__ == "__main__":
    main()

I0000 00:00:1763982941.418501  548915 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1763982941.422289  551248 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 25.0.7-0ubuntu0.24.04.2), renderer: Mesa Intel(R) HD Graphics 620 (KBL GT2)


Ditemukan 120 video. Sedang menganalisis...


  0%|          | 0/120 [00:00<?, ?it/s]W0000 00:00:1763982941.461112  551244 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
100%|██████████| 120/120 [00:50<00:00,  2.39it/s]


=== LAPORAN EDA VIDEO ===
         Category     Subject         Video Name Resolution    FPS  \
0  non-drowsiness  subject_13     side_focus.mp4   1280x720  30.04   
1  non-drowsiness  subject_13  front_talking.mp4  1920x1080  30.00   
2  non-drowsiness  subject_13    front_focus.mp4  1920x1080  30.00   
3  non-drowsiness  subject_13   side_talking.mp4   1280x720  30.04   
4  non-drowsiness  subject_06     side_focus.mp4   1280x720  30.00   

   Total Frames  Duration (s)  Brightness  Blur Score  Face Found  
0           435         14.48      117.33      432.39        True  
1           550         18.33      104.79      181.78        True  
2           481         16.03      102.71      156.95        True  
3           479         15.94      115.38      539.48        True  
4           489         16.30      150.03      188.37        True  

--- Ringkasan per Kategori ---
                Duration (s)  Brightness  Blur Score
Category                                            
drowsi




Ini adalah situasi "Paling Rumit" (Worst Case Scenario) dalam Computer Vision, tapi sangat umum terjadi.

Mari kita bedah strateginya satu per satu. Kuncinya adalah: **Kita harus membuat Script Auto-Labeling kita "Sadar Konteks"**. Script tidak boleh buta-tuli melabeli semua titik yang dikasih MediaPipe.

---

### 1. Masalah Side View (Kamera Samping)

Jika kamera dipasang di pilar A mobil (melihat profil wajah dari samping), atau sopir menoleh drastis:

*   **Masalah:** Salah satu mata dan separuh mulut akan tersembunyi. MediaPipe terkadang "halusinasi" (menebak posisi mata yang hilang itu ada di balik kepala), tapi secara visual mata itu tidak ada.
*   **Bahaya:** Jika kita melabeli mata yang "tidak terlihat" itu, Model YOLO akan bingung: *"Disuruh cari mata, tapi di gambar cuma ada pipi/telinga. Aku harus belajar apa?"*
*   **Solusi di Script:** Gunakan **Head Pose Estimation (Yaw)**.
    *   Hitung sudut toleh wajah.
    *   Jika menoleh ke **Kiri** > 20 derajat $\rightarrow$ **HANYA labeli Mata Kiri & Sudut Mulut Kiri**. Jangan labeli Mata Kanan.
    *   Jika menoleh ke **Kanan** > 20 derajat $\rightarrow$ **HANYA labeli Mata Kanan & Sudut Mulut Kanan**.
    *   Label Wajah (`face`) tetap dibuat utuh.

### 2. Masalah Kacamata Hitam (Sunglasses) & Kacamata Biasa

*   **Kacamata Biasa (Bening):**
    *   Biasanya aman. MediaPipe dan YOLO bisa melihat tembus pandang. Perlakuan: **Normal**.
*   **Kacamata Hitam (Gelap):**
    *   **Masalah:** Mata tidak terlihat. MediaPipe mungkin masih bisa tracking bentuk wajah, tapi akurasi kedipan (EAR) sangat rendah. YOLO tidak bisa melihat pola mata.
    *   **Solusi di Script:** Deteksi area mata. Hitung **Rata-rata Kecerahan Pixel** di area mata.
        *   Jika Pixel sangat gelap (Hitam) $\rightarrow$ **JANGAN buat label mata (`o_eyes`/`c_eyes`)**.
        *   Biarkan YOLO belajar bahwa di area itu ada Wajah, tapi tidak ada Mata yang bisa dideteksi.
        *   *(Opsional: Labeli sebagai kelas baru `sunglasses`)*.

### 3. Masalah Masker & Tangan (Covered Mouth)

*   **Masalah:** Mulut tertutup.
*   **Solusi di Script:**
    *   Gunakan logika **MediaPipe Hands** (seperti diskusi sebelumnya) untuk tangan.
    *   Gunakan logika **Pixel Variance/Color** atau sekadar asumsi dataset: Jika dataset `train/drowsiness` subject tertentu pakai masker terus, lebih baik **Folder Subject itu di-exclude** dari labeling mulut, ATAU kita labeli semua frame videonya sebagai `covered_mouth` (menggunakan logika deteksi warna solid di area mulut).

### 4. Masalah Kombinasi (Combo Maut)

Contoh: **Side View + Kacamata Hitam + Menguap Tutup Mulut.**
*   Apa yang terlihat? Hanya telinga, pipi, kacamata hitam, dan punggung tangan.
*   **Label apa yang harus dihasilkan?**
    *   `face`: YES.
    *   `o_eyes` / `c_eyes`: **NO** (Ketutup kacamata & angle).
    *   `o_mouth` / `c_mouth`: **NO** (Ketutup tangan).
    *   `covered_mouth`: **YES** (Deteksi tangan).
*   **Kesimpulan:** Model YOLO hanya akan mendeteksi `face` dan `covered_mouth`.

---

### Rencana Update Kode: `prepare_data_advanced.py`

Kita perlu meng-upgrade script kita menjadi sangat pintar. Script ini akan melakukan **Filtering Logika** sebelum menulis file `.txt`.

Berikut adalah **Kode Lengkap** untuk menangani Side View, Kacamata Hitam (Pixel Check), dan Tangan (Covered Mouth).

```python
import cv2
import mediapipe as mp
import os
import glob
import numpy as np
from tqdm import tqdm

# --- KONFIGURASI ---
ROOT_TRAIN_DIR = "data/raw/train/" 
OUTPUT_BASE_DIR = "data/yolo_dataset_advanced/"

FRAME_INTERVAL = 10
VAL_SUBJECTS = ['subject_13', 'subject_14', 'subject_15']

# Thresholds
EAR_THRESH = 0.25
MAR_THRESH = 0.50
DARKNESS_THRESH = 40  # Ambang batas kegelapan pixel (0-255) untuk deteksi kacamata hitam

# Class ID
CLS_FACE = 0
CLS_O_EYES = 1
CLS_C_EYES = 2
CLS_O_MOUTH = 3
CLS_C_MOUTH = 4
CLS_COVERED_MOUTH = 5

# Init MediaPipe
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, refine_landmarks=True, min_detection_confidence=0.5)

mp_hands = mp.solutions.hands
hands_detector = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5)

def calc_dist(p1, p2):
    return np.linalg.norm(np.array([p1.x, p1.y]) - np.array([p2.x, p2.y]))

def get_bbox_str(landmarks, h, w):
    x_c = [lm.x for lm in landmarks]; y_c = [lm.y for lm in landmarks]
    x_min, x_max = min(x_c), max(x_c)
    y_min, y_max = min(y_c), max(y_c)
    
    # Padding
    pad_x = (x_max - x_min) * 0.2
    pad_y = (y_max - y_min) * 0.2
    x_min = max(0, x_min - pad_x); y_min = max(0, y_min - pad_y)
    x_max = min(1, x_max + pad_x); y_max = min(1, y_max + pad_y)

    x_center = (x_min + x_max) / 2
    y_center = (y_min + y_max) / 2
    width = x_max - x_min
    height = y_max - y_min
    return f"{x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}"

def is_region_dark(image, landmarks, h, w):
    """Cek apakah area landmarks (mata) gelap (kacamata hitam)"""
    # Ambil bounding box area mata
    x_c = [int(lm.x * w) for lm in landmarks]
    y_c = [int(lm.y * h) for lm in landmarks]
    x1, x2 = min(x_c), max(x_c)
    y1, y2 = min(y_c), max(y_c)
    
    # Safety check image bounds
    x1, y1 = max(0, x1), max(0, y1)
    x2, y2 = min(w, x2), min(h, y2)
    
    if x2 <= x1 or y2 <= y1: return False

    roi = image[y1:y2, x1:x2]
    gray_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    avg_brightness = np.mean(gray_roi)
    
    # Jika rata-rata brightness sangat rendah -> Hitam/Gelap
    return avg_brightness < DARKNESS_THRESH

def check_head_pose(landmarks):
    """
    Return: 'frontal', 'left', 'right'
    Logic: Bandingkan jarak hidung ke pipi kiri vs kanan
    """
    nose = landmarks[1]
    cheek_left = landmarks[234]
    cheek_right = landmarks[454]
    
    dist_L = calc_dist(nose, cheek_left)
    dist_R = calc_dist(nose, cheek_right)
    
    if dist_R == 0: return 'frontal'
    ratio = dist_L / dist_R
    
    if ratio < 0.2: return 'left'   # Menoleh Kiri (Pipi kiri dekat hidung)
    if ratio > 5.0: return 'right'  # Menoleh Kanan
    return 'frontal'

def is_hand_covering_mouth(face_lms, hand_results):
    if not hand_results.multi_hand_landmarks: return False
    mouth_idxs = [61, 291, 39, 181, 0, 17]
    m_x = [face_lms[i].x for i in mouth_idxs]
    m_y = [face_lms[i].y for i in mouth_idxs]
    min_mx, max_mx = min(m_x), max(m_x)
    min_my, max_my = min(m_y), max(m_y)
    
    for hand_lms in hand_results.multi_hand_landmarks:
        for idx in [0, 5, 9, 13, 17, 8, 12, 16, 20]: # Cek semua titik penting tangan
            hx, hy = hand_lms.landmark[idx].x, hand_lms.landmark[idx].y
            if (min_mx - 0.05 < hx < max_mx + 0.05) and (min_my - 0.05 < hy < max_my + 0.05):
                return True
    return False

def process_dataset():
    for split in ['train', 'val']:
        os.makedirs(os.path.join(OUTPUT_BASE_DIR, "images", split), exist_ok=True)
        os.makedirs(os.path.join(OUTPUT_BASE_DIR, "labels", split), exist_ok=True)

    video_files = glob.glob(os.path.join(ROOT_TRAIN_DIR, "**", "*.*"), recursive=True)
    # Filter ekstensi video
    video_files = [f for f in video_files if f.lower().endswith(('.mp4', '.avi', '.mov', '.mkv'))]

    print(f"Total Video: {len(video_files)}")

    for video_path in tqdm(video_files):
        # Parse Path
        path_parts = video_path.split(os.sep)
        # Sesuaikan index ini dengan struktur folder Anda!
        # Misal: data/raw/train/drowsiness/subject_01/vid.mp4
        try:
            category = path_parts[-3] 
            subject = path_parts[-2]  
        except:
            category = "unknown"
            subject = "unknown"
            
        filename = os.path.splitext(os.path.basename(video_path))[0]
        
        split_type = "val" if subject in VAL_SUBJECTS else "train"
        img_out_dir = os.path.join(OUTPUT_BASE_DIR, "images", split_type)
        lbl_out_dir = os.path.join(OUTPUT_BASE_DIR, "labels", split_type)
        file_prefix = f"{split_type}_{category}_{subject}_{filename}"

        cap = cv2.VideoCapture(video_path)
        frame_count = 0
        
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret: break
            frame_count += 1
            if frame_count % FRAME_INTERVAL != 0: continue 
            
            h, w, _ = frame.shape
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            
            # 1. Enhance Brightness (CLAHE) - Solusi Pencahayaan
            lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
            l, a, b = cv2.split(lab)
            clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
            cl = clahe.apply(l)
            limg = cv2.merge((cl,a,b))
            frame_enhanced = cv2.cvtColor(limg, cv2.COLOR_LAB2BGR)
            frame_rgb_enhanced = cv2.cvtColor(frame_enhanced, cv2.COLOR_BGR2RGB)

            # Process MediaPipe on Enhanced Image
            results_face = face_mesh.process(frame_rgb_enhanced)
            results_hands = hands_detector.process(frame_rgb_enhanced)
            
            if not results_face.multi_face_landmarks: continue 
            
            landmarks = results_face.multi_face_landmarks[0].landmark
            labels = []
            
            # --- LOGIC UTAMA ---
            
            # 1. Face (Selalu ada)
            labels.append(f"{CLS_FACE} {get_bbox_str(landmarks, h, w)}")
            
            # 2. Cek Head Pose (Side View)
            pose = check_head_pose(landmarks)
            
            left_idxs = [33, 133, 160, 144, 158, 153]
            right_idxs = [362, 263, 385, 380, 387, 373]
            
            # 3. Proses Mata (Dengan Cek Side View & Sunglasses)
            
            # Mata Kiri (Hanya proses jika pose frontal atau menoleh ke kiri/menampilkan sisi kiri)
            if pose in ['frontal', 'left']:
                # Cek Kacamata Hitam (Pixel check)
                is_dark = is_region_dark(frame, [landmarks[i] for i in left_idxs], h, w)
                if not is_dark:
                    # Hitung EAR
                    l_ear = (calc_dist(landmarks[160], landmarks[144]) + calc_dist(landmarks[158], landmarks[153])) / (2 * calc_dist(landmarks[33], landmarks[133]))
                    cls = CLS_C_EYES if l_ear < EAR_THRESH else CLS_O_EYES
                    labels.append(f"{cls} {get_bbox_str([landmarks[i] for i in left_idxs], h, w)}")
            
            # Mata Kanan (Hanya proses jika pose frontal atau menoleh ke kanan)
            if pose in ['frontal', 'right']:
                is_dark = is_region_dark(frame, [landmarks[i] for i in right_idxs], h, w)
                if not is_dark:
                    r_ear = (calc_dist(landmarks[385], landmarks[380]) + calc_dist(landmarks[387], landmarks[373])) / (2 * calc_dist(landmarks[362], landmarks[263]))
                    cls = CLS_C_EYES if r_ear < EAR_THRESH else CLS_O_EYES
                    labels.append(f"{cls} {get_bbox_str([landmarks[i] for i in right_idxs], h, w)}")

            # 4. Proses Mulut (Dengan Cek Covered Mouth & Pose)
            # Mulut biasanya terlihat dari kedua sisi kecuali toleh ekstrim
            mouth_idxs = [61, 291, 39, 181, 0, 17]
            
            if is_hand_covering_mouth(landmarks, results_hands):
                labels.append(f"{CLS_COVERED_MOUTH} {get_bbox_str([landmarks[i] for i in mouth_idxs], h, w)}")
            else:
                # Normal Mouth Logic
                mouth_h = calc_dist(landmarks[13], landmarks[14])
                mouth_w = calc_dist(landmarks[61], landmarks[291])
                mar = mouth_h / mouth_w
                cls = CLS_O_MOUTH if mar > MAR_THRESH else CLS_C_MOUTH
                labels.append(f"{cls} {get_bbox_str([landmarks[i] for i in mouth_idxs], h, w)}")
            
            # --- Simpan ---
            save_name = f"{file_prefix}_fr{frame_count}"
            # Simpan frame ASLI (bukan enhanced) untuk training agar model belajar variasi cahaya
            # ATAU simpan enhanced jika ingin model fokus ke struktur. 
            # Rekomendasi: Simpan ASLI agar model robust di malam hari.
            cv2.imwrite(os.path.join(img_out_dir, save_name + ".jpg"), frame)
            
            with open(os.path.join(lbl_out_dir, save_name + ".txt"), "w") as f:
                f.write("\n".join(labels))
            
        cap.release()

    print("Selesai! Dataset Advanced Siap.")

if __name__ == "__main__":
    process_dataset()
```

### Penjelasan Fitur di Script Ini:

1.  **CLAHE (`cv2.createCLAHE`)**:
    *   Meningkatkan kecerahan/kontras gambar sebelum masuk MediaPipe. Ini membantu mendeteksi wajah di video gelap.
    *   *Catatan:* Saya menyimpan gambar **ASLI** (`frame`) ke dataset, bukan gambar enhanced. Kenapa? Supaya YOLO belajar melihat dalam gelap. Enhanced image hanya dipakai untuk membantu MediaPipe membuat label (koordinat) yang akurat.

2.  **`check_head_pose` (Side View Logic)**:
    *   Jika wajah menoleh ke Kiri, script **TIDAK AKAN** membuat label untuk Mata Kanan (karena mungkin tersembunyi/distorsi).
    *   Ini mencegah model belajar dari data sampah.

3.  **`is_region_dark` (Sunglasses Logic)**:
    *   Mengecek rata-rata warna pixel di dalam kotak mata. Jika terlalu gelap (< threshold), script **TIDAK AKAN** membuat label mata.
    *   Jadi untuk orang berkacamata hitam, labelnya hanya `face` dan `mouth`.

4.  **`is_hand_covering_mouth`**:
    *   Melabeli `covered_mouth` jika tangan terdeteksi di depan bibir.

Script ini adalah "pabrik data" yang cerdas. Jalankan ini, dan dataset YOLO Anda akan berkualitas sangat tinggi.

In [None]:
import cv2
import mediapipe as mp
import os
import glob
import numpy as np
from tqdm import tqdm

# --- KONFIGURASI ---
ROOT_TRAIN_DIR = "data/raw/train/" 
OUTPUT_BASE_DIR = "data/yolo_dataset_advanced/"

FRAME_INTERVAL = 10
VAL_SUBJECTS = ['subject_13', 'subject_14', 'subject_15']

# Thresholds
EAR_THRESH = 0.25
MAR_THRESH = 0.50
DARKNESS_THRESH = 40  # Ambang batas kegelapan pixel (0-255) untuk deteksi kacamata hitam

# Class ID
CLS_FACE = 0
CLS_O_EYES = 1
CLS_C_EYES = 2
CLS_O_MOUTH = 3
CLS_C_MOUTH = 4
CLS_COVERED_MOUTH = 5

# Init MediaPipe
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, refine_landmarks=True, min_detection_confidence=0.5)

mp_hands = mp.solutions.hands
hands_detector = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5)

def calc_dist(p1, p2):
    return np.linalg.norm(np.array([p1.x, p1.y]) - np.array([p2.x, p2.y]))

def get_bbox_str(landmarks, h, w):
    x_c = [lm.x for lm in landmarks]; y_c = [lm.y for lm in landmarks]
    x_min, x_max = min(x_c), max(x_c)
    y_min, y_max = min(y_c), max(y_c)
    
    # Padding
    pad_x = (x_max - x_min) * 0.2
    pad_y = (y_max - y_min) * 0.2
    x_min = max(0, x_min - pad_x); y_min = max(0, y_min - pad_y)
    x_max = min(1, x_max + pad_x); y_max = min(1, y_max + pad_y)

    x_center = (x_min + x_max) / 2
    y_center = (y_min + y_max) / 2
    width = x_max - x_min
    height = y_max - y_min
    return f"{x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}"

def is_region_dark(image, landmarks, h, w):
    """Cek apakah area landmarks (mata) gelap (kacamata hitam)"""
    # Ambil bounding box area mata
    x_c = [int(lm.x * w) for lm in landmarks]
    y_c = [int(lm.y * h) for lm in landmarks]
    x1, x2 = min(x_c), max(x_c)
    y1, y2 = min(y_c), max(y_c)
    
    # Safety check image bounds
    x1, y1 = max(0, x1), max(0, y1)
    x2, y2 = min(w, x2), min(h, y2)
    
    if x2 <= x1 or y2 <= y1: return False

    roi = image[y1:y2, x1:x2]
    gray_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    avg_brightness = np.mean(gray_roi)
    
    # Jika rata-rata brightness sangat rendah -> Hitam/Gelap
    return avg_brightness < DARKNESS_THRESH

def check_head_pose(landmarks):
    """
    Return: 'frontal', 'left', 'right'
    Logic: Bandingkan jarak hidung ke pipi kiri vs kanan
    """
    nose = landmarks[1]
    cheek_left = landmarks[234]
    cheek_right = landmarks[454]
    
    dist_L = calc_dist(nose, cheek_left)
    dist_R = calc_dist(nose, cheek_right)
    
    if dist_R == 0: return 'frontal'
    ratio = dist_L / dist_R
    
    if ratio < 0.2: return 'left'   # Menoleh Kiri (Pipi kiri dekat hidung)
    if ratio > 5.0: return 'right'  # Menoleh Kanan
    return 'frontal'

def is_hand_covering_mouth(face_lms, hand_results):
    if not hand_results.multi_hand_landmarks: return False
    mouth_idxs = [61, 291, 39, 181, 0, 17]
    m_x = [face_lms[i].x for i in mouth_idxs]
    m_y = [face_lms[i].y for i in mouth_idxs]
    min_mx, max_mx = min(m_x), max(m_x)
    min_my, max_my = min(m_y), max(m_y)
    
    for hand_lms in hand_results.multi_hand_landmarks:
        for idx in [0, 5, 9, 13, 17, 8, 12, 16, 20]: # Cek semua titik penting tangan
            hx, hy = hand_lms.landmark[idx].x, hand_lms.landmark[idx].y
            if (min_mx - 0.05 < hx < max_mx + 0.05) and (min_my - 0.05 < hy < max_my + 0.05):
                return True
    return False

def process_dataset():
    for split in ['train', 'val']:
        os.makedirs(os.path.join(OUTPUT_BASE_DIR, "images", split), exist_ok=True)
        os.makedirs(os.path.join(OUTPUT_BASE_DIR, "labels", split), exist_ok=True)

    video_files = glob.glob(os.path.join(ROOT_TRAIN_DIR, "**", "*.*"), recursive=True)
    # Filter ekstensi video
    video_files = [f for f in video_files if f.lower().endswith(('.mp4', '.avi', '.mov', '.mkv'))]

    print(f"Total Video: {len(video_files)}")

    for video_path in tqdm(video_files):
        # Parse Path
        path_parts = video_path.split(os.sep)
        # Sesuaikan index ini dengan struktur folder Anda!
        # Misal: data/raw/train/drowsiness/subject_01/vid.mp4
        try:
            category = path_parts[-3] 
            subject = path_parts[-2]  
        except:
            category = "unknown"
            subject = "unknown"
            
        filename = os.path.splitext(os.path.basename(video_path))[0]
        
        split_type = "val" if subject in VAL_SUBJECTS else "train"
        img_out_dir = os.path.join(OUTPUT_BASE_DIR, "images", split_type)
        lbl_out_dir = os.path.join(OUTPUT_BASE_DIR, "labels", split_type)
        file_prefix = f"{split_type}_{category}_{subject}_{filename}"

        cap = cv2.VideoCapture(video_path)
        frame_count = 0
        
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret: break
            frame_count += 1
            if frame_count % FRAME_INTERVAL != 0: continue 
            
            h, w, _ = frame.shape
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            
            # 1. Enhance Brightness (CLAHE) - Solusi Pencahayaan
            lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
            l, a, b = cv2.split(lab)
            clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
            cl = clahe.apply(l)
            limg = cv2.merge((cl,a,b))
            frame_enhanced = cv2.cvtColor(limg, cv2.COLOR_LAB2BGR)
            frame_rgb_enhanced = cv2.cvtColor(frame_enhanced, cv2.COLOR_BGR2RGB)

            # Process MediaPipe on Enhanced Image
            results_face = face_mesh.process(frame_rgb_enhanced)
            results_hands = hands_detector.process(frame_rgb_enhanced)
            
            if not results_face.multi_face_landmarks: continue 
            
            landmarks = results_face.multi_face_landmarks[0].landmark
            labels = []
            
            # --- LOGIC UTAMA ---
            
            # 1. Face (Selalu ada)
            labels.append(f"{CLS_FACE} {get_bbox_str(landmarks, h, w)}")
            
            # 2. Cek Head Pose (Side View)
            pose = check_head_pose(landmarks)
            
            left_idxs = [33, 133, 160, 144, 158, 153]
            right_idxs = [362, 263, 385, 380, 387, 373]
            
            # 3. Proses Mata (Dengan Cek Side View & Sunglasses)
            
            # Mata Kiri (Hanya proses jika pose frontal atau menoleh ke kiri/menampilkan sisi kiri)
            if pose in ['frontal', 'left']:
                # Cek Kacamata Hitam (Pixel check)
                is_dark = is_region_dark(frame, [landmarks[i] for i in left_idxs], h, w)
                if not is_dark:
                    # Hitung EAR
                    l_ear = (calc_dist(landmarks[160], landmarks[144]) + calc_dist(landmarks[158], landmarks[153])) / (2 * calc_dist(landmarks[33], landmarks[133]))
                    cls = CLS_C_EYES if l_ear < EAR_THRESH else CLS_O_EYES
                    labels.append(f"{cls} {get_bbox_str([landmarks[i] for i in left_idxs], h, w)}")
            
            # Mata Kanan (Hanya proses jika pose frontal atau menoleh ke kanan)
            if pose in ['frontal', 'right']:
                is_dark = is_region_dark(frame, [landmarks[i] for i in right_idxs], h, w)
                if not is_dark:
                    r_ear = (calc_dist(landmarks[385], landmarks[380]) + calc_dist(landmarks[387], landmarks[373])) / (2 * calc_dist(landmarks[362], landmarks[263]))
                    cls = CLS_C_EYES if r_ear < EAR_THRESH else CLS_O_EYES
                    labels.append(f"{cls} {get_bbox_str([landmarks[i] for i in right_idxs], h, w)}")

            # 4. Proses Mulut (Dengan Cek Covered Mouth & Pose)
            # Mulut biasanya terlihat dari kedua sisi kecuali toleh ekstrim
            mouth_idxs = [61, 291, 39, 181, 0, 17]
            
            if is_hand_covering_mouth(landmarks, results_hands):
                labels.append(f"{CLS_COVERED_MOUTH} {get_bbox_str([landmarks[i] for i in mouth_idxs], h, w)}")
            else:
                # Normal Mouth Logic
                mouth_h = calc_dist(landmarks[13], landmarks[14])
                mouth_w = calc_dist(landmarks[61], landmarks[291])
                mar = mouth_h / mouth_w
                cls = CLS_O_MOUTH if mar > MAR_THRESH else CLS_C_MOUTH
                labels.append(f"{cls} {get_bbox_str([landmarks[i] for i in mouth_idxs], h, w)}")
            
            # --- Simpan ---
            save_name = f"{file_prefix}_fr{frame_count}"
            # Simpan frame ASLI (bukan enhanced) untuk training agar model belajar variasi cahaya
            # ATAU simpan enhanced jika ingin model fokus ke struktur. 
            # Rekomendasi: Simpan ASLI agar model robust di malam hari.
            cv2.imwrite(os.path.join(img_out_dir, save_name + ".jpg"), frame)
            
            with open(os.path.join(lbl_out_dir, save_name + ".txt"), "w") as f:
                f.write("\n".join(labels))
            
        cap.release()

    print("Selesai! Dataset Advanced Siap.")

if __name__ == "__main__":
    process_dataset()

### **KODE YANG SUDAH FINAL**

In [None]:
import cv2
import mediapipe as mp
import os
import glob
import numpy as np
from tqdm import tqdm

# ==========================================
# KONFIGURASI UTAMA
# ==========================================
ROOT_TRAIN_DIR = "data/raw/train/" 
OUTPUT_BASE_DIR = "data/processed" 

FRAME_INTERVAL = 5 
VAL_SUBJECTS = ['subject_13', 'subject_14', 'subject_15'] 

EAR_THRESH = 0.10
MAR_THRESH = 0.50

# --- KONFIGURASI POSE & OKLUSI ---
SIDE_VIEW_RATIO_THRESH = 0.40 
HEAD_DOWN_RATIO_THRESH = 0.35
# Jika brightness mata < 50% dari brightness pipi -> Kacamata Hitam
SUNGLASSES_RATIO_THRESH = 0.50
MASK_HIST_DIFF_THRESH = 0.50    # Batas perbedaan warna Dahi vs Dagu (0=Sama, 1=Beda Jauh) 

# --- DEFINISI KELAS (7 KELAS) ---
CLS_FACE = 0
CLS_O_EYES = 1
CLS_C_EYES = 2
CLS_O_MOUTH = 3
CLS_C_MOUTH = 4
CLS_COVERED_MOUTH = 5
CLS_SUNGLASSES = 6  

# ==========================================
# INISIALISASI MEDIAPIPE
# ==========================================
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
    static_image_mode=False, max_num_faces=1, 
    refine_landmarks=True, min_detection_confidence=0.5
)

mp_hands = mp.solutions.hands
hands_detector = mp_hands.Hands(
    static_image_mode=False, max_num_hands=2,
    min_detection_confidence=0.5
)




# ==========================================
#             FUNGSI UTILITAS
# ==========================================

def apply_clahe(image):
    """Memperbaiki pencahayaan video"""
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    cl = clahe.apply(l)
    limg = cv2.merge((cl,a,b))
    return cv2.cvtColor(limg, cv2.COLOR_LAB2BGR)

def calc_dist(p1, p2):
    return np.linalg.norm(np.array([p1.x, p1.y]) - np.array([p2.x, p2.y]))

def get_bbox_str(landmarks, h, w):
    """Format Output YOLO: x_center y_center width height"""
    x_c = [lm.x for lm in landmarks]
    y_c = [lm.y for lm in landmarks]
    x_min, x_max = min(x_c), max(x_c)
    y_min, y_max = min(y_c), max(y_c)
    
    pad_x = (x_max - x_min) * 0.2
    pad_y = (y_max - y_min) * 0.2
    x_min = max(0, x_min - pad_x)
    y_min = max(0, y_min - pad_y)
    x_max = min(1, x_max + pad_x)
    y_max = min(1, y_max + pad_y)

    x_center = (x_min + x_max) / 2
    y_center = (y_min + y_max) / 2
    width = x_max - x_min
    height = y_max - y_min
    
    return f"{x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}"

def get_roi_brightness(image, landmarks, indices, h, w):
    """Menghitung rata-rata kecerahan (V channel HSV) pada area landmark tertentu"""
    try:
        x_c = [int(landmarks[i].x * w) for i in indices]
        y_c = [int(landmarks[i].y * h) for i in indices]
        x_min, x_max = min(x_c), max(x_c)
        y_min, y_max = min(y_c), max(y_c)
        
        # Crop area
        roi = image[y_min:y_max, x_min:x_max]
        if roi.size == 0: return 0
        
        # Convert ke HSV dan ambil V (Brightness)
        hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
        brightness = np.mean(hsv[:, :, 2])
        return brightness
    except:
        return 0

def is_wearing_sunglasses(image, landmarks, h, w):
    """Cek Kacamata Hitam (Pixel Brightness Ratio)"""
    try:
        # Area Mata Kiri & Pipi Kiri
        eye_idx = [33, 133, 159, 145]
        cheek_idx = [234, 93, 132, 58]
        
        def get_val(indices):
            x_c = [int(landmarks[i].x * w) for i in indices]
            y_c = [int(landmarks[i].y * h) for i in indices]
            roi = image[min(y_c):max(y_c), min(x_c):max(x_c)]
            if roi.size == 0: return 0
            return np.mean(cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)[:,:,2])

        eye_v = get_val(eye_idx)
        cheek_v = get_val(cheek_idx)
        
        if cheek_v > 50 and (eye_v / cheek_v) < SUNGLASSES_RATIO_THRESH:
            return True
        return False
    except: return False

def analyze_head_pose(landmarks):
    """Cek apakah Menunduk (Head Down) atau Menoleh (Side View)"""
    nose = landmarks[1]
    left_cheek = landmarks[234]
    right_cheek = landmarks[454]
    chin = landmarks[152]
    forehead = landmarks[10]

    # 1. Cek Menunduk
    face_height = calc_dist(forehead, chin)
    nose_to_chin = calc_dist(nose, chin)
    is_head_down = False
    if face_height > 0:
        if (nose_to_chin / face_height) < HEAD_DOWN_RATIO_THRESH:
            is_head_down = True

    # 2. Cek Menoleh (Yaw)
    dist_left = calc_dist(nose, left_cheek)
    dist_right = calc_dist(nose, right_cheek)
    side_status = 'frontal'
    
    if dist_right > 0 and dist_left > 0:
        if dist_left < dist_right * SIDE_VIEW_RATIO_THRESH:
            side_status = 'look_left'  # Pipi kiri dekat kamera
        elif dist_right < dist_left * SIDE_VIEW_RATIO_THRESH:
            side_status = 'look_right' # Pipi kanan dekat kamera

    return is_head_down, side_status

def is_mouth_occluded(image, landmarks, hand_results, h, w):
    """
    Cek apakah mulut tertutup:
    1. Oleh Tangan (Geometry Check)
    2. Oleh Masker (Color Histogram Check)
    """
    mouth_idxs = [61, 291, 39, 181, 0, 17]
    
    # --- CEK 1: TANGAN ---
    if hand_results.multi_hand_landmarks:
        m_x = [landmarks[i].x for i in mouth_idxs]
        m_y = [landmarks[i].y for i in mouth_idxs]
        min_mx, max_mx = min(m_x), max(m_x)
        min_my, max_my = min(m_y), max(m_y)
        tol = 0.05
        
        for hand_lms in hand_results.multi_hand_landmarks:
            # Ujung jari & buku jari
            for idx in [0, 5, 9, 13, 17, 8, 12, 16, 20]:
                hx, hy = hand_lms.landmark[idx].x, hand_lms.landmark[idx].y
                if (min_mx-tol < hx < max_mx+tol) and (min_my-tol < hy < max_my+tol):
                    return True # Kena Tangan

    # --- CEK 2: MASKER (Warna) ---
    try:
        # Dahi vs Dagu
        forehead_idx = [10, 338, 297, 67]
        chin_idx = [164, 18, 200, 152]
        
        def get_roi(indices):
            x_c = [int(landmarks[i].x * w) for i in indices]
            y_c = [int(landmarks[i].y * h) for i in indices]
            return image[min(y_c):max(y_c), min(x_c):max(x_c)]

        f_roi = get_roi(forehead_idx)
        c_roi = get_roi(chin_idx)
        
        if f_roi.size > 0 and c_roi.size > 0:
            f_hsv = cv2.cvtColor(f_roi, cv2.COLOR_BGR2HSV)
            c_hsv = cv2.cvtColor(c_roi, cv2.COLOR_BGR2HSV)
            
            # Bandingkan Hue & Saturation
            hist_f = cv2.calcHist([f_hsv], [0, 1], None, [180, 256], [0, 180, 0, 256])
            hist_c = cv2.calcHist([c_hsv], [0, 1], None, [180, 256], [0, 180, 0, 256])
            cv2.normalize(hist_f, hist_f, 0, 1, cv2.NORM_MINMAX)
            cv2.normalize(hist_c, hist_c, 0, 1, cv2.NORM_MINMAX)
            
            diff = cv2.compareHist(hist_f, hist_c, cv2.HISTCMP_BHATTACHARYYA)
            if diff > MASK_HIST_DIFF_THRESH:
                return True # Beda warna jauh (Masker)
    except: pass
    
    return False

def save_data(img_dir, lbl_dir, prefix, count, img, labels):
    save_name = f"{prefix}_fr{count}"
    cv2.imwrite(os.path.join(img_dir, save_name + ".jpg"), img)
    with open(os.path.join(lbl_dir, save_name + ".txt"), "w") as f:
        f.write("\n".join(labels))







# ==========================================
# MAIN PROCESS
# ==========================================
def process_dataset():
    # Buat struktur folder
    for split in ['train', 'val']:
        os.makedirs(os.path.join(OUTPUT_BASE_DIR, "images", split), exist_ok=True)
        os.makedirs(os.path.join(OUTPUT_BASE_DIR, "labels", split), exist_ok=True)

    # Cari video
    video_files = glob.glob(os.path.join(ROOT_TRAIN_DIR, "**", "*.*"), recursive=True)
    valid_exts = ['.mp4', '.avi', '.mov', '.mkv']
    video_files = [f for f in video_files if os.path.splitext(f)[1].lower() in valid_exts]

    print(f"Memproses {len(video_files)} video...")

    for video_path in tqdm(video_files):
        path_parts = video_path.split(os.sep)
        category = path_parts[-3] 
        subject = path_parts[-2]  
        filename = os.path.splitext(os.path.basename(video_path))[0]
        
        # Tentukan Split
        split_type = "val" if subject in VAL_SUBJECTS else "train"
        img_out_dir = os.path.join(OUTPUT_BASE_DIR, "images", split_type)
        lbl_out_dir = os.path.join(OUTPUT_BASE_DIR, "labels", split_type)
        file_prefix = f"{split_type}_{category}_{subject}_{filename}"

        cap = cv2.VideoCapture(video_path)
        frame_count = 0
        
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret: break
            frame_count += 1
            if frame_count % FRAME_INTERVAL != 0: continue 
            
            h, w, _ = frame.shape
            
            # 1. Preprocessing
            frame_enhanced = apply_clahe(frame)
            frame_rgb = cv2.cvtColor(frame_enhanced, cv2.COLOR_BGR2RGB)
            
            # 2. Inference MediaPipe
            results_face = face_mesh.process(frame_rgb)
            results_hands = hands_detector.process(frame_rgb)
            
            if not results_face.multi_face_landmarks: continue 
            landmarks = results_face.multi_face_landmarks[0].landmark
            
            labels = []
            
            # 3. Label WAJAH (Selalu ada)
            labels.append(f"{CLS_FACE} {get_bbox_str(landmarks, h, w)}")
            
            # 4. Cek Pose Kepala
            is_head_down, side_status = analyze_head_pose(landmarks)
            
            if is_head_down:
                # Jika menunduk parah, jangan labeli mata/mulut (data tidak valid)
                save_data(img_out_dir, lbl_out_dir, file_prefix, frame_count, frame_enhanced, labels)
                continue

            # 5. Label MATA (Cek Kacamata & Side View)
            has_sunglasses = is_wearing_sunglasses(frame_enhanced, landmarks, h, w)
            left_idxs = [33, 133, 160, 144, 158, 153]
            right_idxs = [362, 263, 385, 380, 387, 373]

            if has_sunglasses:
                # Pakai Kacamata Hitam -> Labeli Sunglasses
                labels.append(f"{CLS_SUNGLASSES} {get_bbox_str([landmarks[i] for i in left_idxs], h, w)}")
                labels.append(f"{CLS_SUNGLASSES} {get_bbox_str([landmarks[i] for i in right_idxs], h, w)}")
            else:
                # Normal -> Hitung EAR & Cek Side View
                l_ear = (calc_dist(landmarks[160], landmarks[144]) + calc_dist(landmarks[158], landmarks[153])) / (2 * calc_dist(landmarks[33], landmarks[133]))
                r_ear = (calc_dist(landmarks[385], landmarks[380]) + calc_dist(landmarks[387], landmarks[373])) / (2 * calc_dist(landmarks[362], landmarks[263]))
                
                l_cls = CLS_C_EYES if l_ear < EAR_THRESH else CLS_O_EYES
                r_cls = CLS_C_EYES if r_ear < EAR_THRESH else CLS_O_EYES
                
                # Hanya labeli mata yang terlihat jelas (tidak terhalang hidung karena miring)
                if side_status == 'frontal':
                    labels.append(f"{l_cls} {get_bbox_str([landmarks[i] for i in left_idxs], h, w)}")
                    labels.append(f"{r_cls} {get_bbox_str([landmarks[i] for i in right_idxs], h, w)}")
                elif side_status == 'look_left': # Kiri dekat, Kanan jauh/hilang
                    labels.append(f"{l_cls} {get_bbox_str([landmarks[i] for i in left_idxs], h, w)}")
                elif side_status == 'look_right': # Kanan dekat, Kiri jauh/hilang
                    labels.append(f"{r_cls} {get_bbox_str([landmarks[i] for i in right_idxs], h, w)}")

            # 6. Label MULUT (Cek Oklusi Tangan & Masker)
            mouth_idxs = [61, 291, 39, 181, 0, 17]
            
            if is_mouth_occluded(frame_enhanced, landmarks, results_hands, h, w):
                # Tertutup -> Labeli Covered Mouth
                labels.append(f"{CLS_COVERED_MOUTH} {get_bbox_str([landmarks[i] for i in mouth_idxs], h, w)}")
            else:
                # Normal -> Hitung MAR
                mouth_h = calc_dist(landmarks[13], landmarks[14])
                mouth_w = calc_dist(landmarks[61], landmarks[291])
                mar = mouth_h / mouth_w
                m_cls = CLS_O_MOUTH if mar > MAR_THRESH else CLS_C_MOUTH
                labels.append(f"{m_cls} {get_bbox_str([landmarks[i] for i in mouth_idxs], h, w)}")
            
            # Simpan
            save_data(img_out_dir, lbl_out_dir, file_prefix, frame_count, frame_enhanced, labels)
            
        cap.release()

    print("PROSES SELESAI: Dataset Final telah dibuat")

if __name__ == "__main__":
    process_dataset()

I0000 00:00:1764033751.750046  548915 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1764033751.755308  573808 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 25.0.7-0ubuntu0.24.04.2), renderer: Mesa Intel(R) HD Graphics 620 (KBL GT2)
I0000 00:00:1764033751.781885  548915 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
W0000 00:00:1764033751.790367  573802 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1764033751.791812  573815 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 25.0.7-0ubuntu0.24.04.2), renderer: Mesa Intel(R) HD Graphics 620 (KBL GT2)


Memproses 120 video...


  0%|          | 0/120 [00:00<?, ?it/s]W0000 00:00:1764033751.874840  573803 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1764033751.901846  573810 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1764033751.972518  573810 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
100%|██████████| 120/120 [31:22<00:00, 15.69s/it]

PROSES SELESAI: Dataset Final telah dibuat di 'data/yolo_dataset_master/'





In [None]:
import cv2
import os
import glob

# ==========================================
# KONFIGURASI
# ==========================================
# Arahkan ke folder dataset yang baru saja Anda generate
DATASET_DIR = "data/yolo_dataset_master/images/train" 
LABEL_DIR   = "data/yolo_dataset_master/labels/train"

# Definisi Kelas (Harus urut sesuai prepare_data.py)
CLASS_NAMES = [
    'Face',           # 0
    'Open Eyes',      # 1
    'Closed Eyes',    # 2
    'Open Mouth',     # 3
    'Closed Mouth',   # 4
    'Covered Mouth',  # 5 (Tangan/Masker)
    'Sunglasses'      # 6 (Kacamata Hitam)
]

# Warna untuk setiap kelas (B-G-R)
COLORS = [
    (255, 255, 255), # Face (Putih)
    (0, 255, 0),     # Open Eyes (Hijau)
    (0, 0, 255),     # Closed Eyes (Merah)
    (0, 255, 255),   # Open Mouth (Kuning)
    (255, 0, 0),     # Closed Mouth (Biru)
    (0, 165, 255),   # Covered (Oranye)
    (50, 50, 50)     # Sunglasses (Hitam/Abu)
]

def view_dataset():
    # Ambil semua file gambar
    image_paths = sorted(glob.glob(os.path.join(DATASET_DIR, "*.jpg")))
    
    if not image_paths:
        print(f"Tidak ada gambar ditemukan di {DATASET_DIR}")
        return

    print(f"Ditemukan {len(image_paths)} gambar.")
    print("NAVIGASI: [D] Next | [A] Previous | [Q] Quit")

    idx = 0
    while True:
        img_path = image_paths[idx]
        
        # Cari file label yang sesuai
        # Asumsi struktur folder: images/train/x.jpg -> labels/train/x.txt
        filename = os.path.basename(img_path).replace(".jpg", ".txt")
        lbl_path = os.path.join(LABEL_DIR, filename)

        frame = cv2.imread(img_path)
        h, w, _ = frame.shape

        # Baca Label
        if os.path.exists(lbl_path):
            with open(lbl_path, 'r') as f:
                lines = f.readlines()
                
            for line in lines:
                parts = line.strip().split()
                cls_id = int(parts[0])
                x_c, y_c, bw, bh = map(float, parts[1:])

                # Konversi YOLO (Norm) ke Pixel
                x1 = int((x_c - bw / 2) * w)
                y1 = int((y_c - bh / 2) * h)
                x2 = int((x_c + bw / 2) * w)
                y2 = int((y_c + bh / 2) * h)

                # Gambar Kotak & Teks
                color = COLORS[cls_id] if cls_id < len(COLORS) else (255,255,255)
                label_text = CLASS_NAMES[cls_id] if cls_id < len(CLASS_NAMES) else str(cls_id)

                cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                cv2.putText(frame, label_text, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
        else:
            cv2.putText(frame, "NO LABEL FILE", (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2)

        # Tampilkan Info File
        cv2.putText(frame, f"File: {idx+1}/{len(image_paths)}", (10, h - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1)
        cv2.imshow("Dataset Viewer", frame)

        # Input Keyboard
        key = cv2.waitKey(0) & 0xFF
        if key == ord('d'): # Next
            idx = (idx + 1) % len(image_paths)
        elif key == ord('a'): # Prev
            idx = (idx - 1) % len(image_paths)
        elif key == ord('q'): # Quit
            break

    cv2.destroyAllWindows()

if __name__ == "__main__":
    view_dataset()