# Data Exploration - Driver Drowsiness Dataset

This notebook explores the Driver Drowsiness Dataset (DDD) to understand:
- Dataset structure and organization
- Image statistics and properties
- Class distribution
- Sample images from each class


In [None]:
import os
from pathlib import Path
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from collections import Counter

# Set project root
PROJECT_ROOT = Path().resolve().parent
DATA_DIR = PROJECT_ROOT / "Data"

print(f"Project root: {PROJECT_ROOT}")
print(f"Data directory: {DATA_DIR}")
print(f"Data directory exists: {DATA_DIR.exists()}")


In [None]:
# Explore dataset structure
drowsy_dir = DATA_DIR / "Drowsy"
non_drowsy_dir = DATA_DIR / "Non Drowsy"

print(f"Drowsy directory: {drowsy_dir}")
print(f"Non Drowsy directory: {non_drowsy_dir}")

# Count images
drowsy_images = list(drowsy_dir.glob("*.png"))
non_drowsy_images = list(non_drowsy_dir.glob("*.png"))

print(f"\nNumber of Drowsy images: {len(drowsy_images)}")
print(f"Number of Non Drowsy images: {len(non_drowsy_images)}")
print(f"Total images: {len(drowsy_images) + len(non_drowsy_images)}")


In [None]:
# Analyze image properties
sample_drowsy = Image.open(drowsy_images[0])
sample_non_drowsy = Image.open(non_drowsy_images[0])

print(f"Drowsy sample image size: {sample_drowsy.size}")
print(f"Drowsy sample image mode: {sample_drowsy.mode}")
print(f"\nNon Drowsy sample image size: {sample_non_drowsy.size}")
print(f"Non Drowsy sample image mode: {sample_non_drowsy.mode}")

# Check if all images are 227x227
sizes_drowsy = [Image.open(img).size for img in drowsy_images[:100]]
sizes_non_drowsy = [Image.open(img).size for img in non_drowsy_images[:100]]

print(f"\nDrowsy image sizes (sample): {set(sizes_drowsy)}")
print(f"Non Drowsy image sizes (sample): {set(sizes_non_drowsy)}")


In [None]:
# Visualize sample images
fig, axes = plt.subplots(2, 4, figsize=(16, 8))

# Drowsy samples
for i in range(4):
    img = Image.open(drowsy_images[i])
    axes[0, i].imshow(img)
    axes[0, i].set_title(f"Drowsy Sample {i+1}")
    axes[0, i].axis("off")

# Non Drowsy samples
for i in range(4):
    img = Image.open(non_drowsy_images[i])
    axes[1, i].imshow(img)
    axes[1, i].set_title(f"Non Drowsy Sample {i+1}")
    axes[1, i].axis("off")

plt.tight_layout()
plt.show()
