# Driver Drowsiness Detection - CNN Model
## Complete Data Science Lifecycle Implementation

This notebook implements a comprehensive CNN-based driver drowsiness detection system covering:
- Complete EDA and data visualization
- Data preprocessing and augmentation
- Model development with MobileNetV2
- Training and evaluation
- Interactive dashboard with Gradio
- Model interpretation and deployment


## 1. Installation and Setup


In [None]:
# Install required packages
!pip install tensorflow matplotlib scikit-learn keras_cv gradio pandas seaborn plotly opencv-python -q


In [None]:
# Mount Google Drive (if using Colab)
try:
    from google.colab import drive
    drive.mount('/content/drive')
    BASE_DIR = '/content/drive/MyDrive/cs163_ds'
except:
    # Local path - adjust as needed
    BASE_DIR = '/Users/spartan/Downloads/cs163 Modules/project/cs163_ds'

print(f"Dataset directory: {BASE_DIR}")


## 2. Data Loading and Exploration


In [None]:
# Import libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
from collections import Counter
import cv2
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")


In [None]:
# Dataset exploration
def explore_dataset(base_dir):
    """Comprehensive dataset exploration"""
    folders = [f for f in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, f))]

    dataset_info = {}
    total_images = 0

    for folder in folders:
        folder_path = os.path.join(base_dir, folder)
        images = [f for f in os.listdir(folder_path)
                 if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
        dataset_info[folder] = {
            'count': len(images),
            'images': images[:5]  # Sample image names
        }
        total_images += len(images)

    return folders, dataset_info, total_images

folders, dataset_info, total_images = explore_dataset(BASE_DIR)

print("="*60)
print("DATASET EXPLORATION")
print("="*60)
for folder, info in dataset_info.items():
    print(f"{folder:20s}: {info['count']:5d} images")
print("="*60)
print(f"Total Images: {total_images}")
print(f"Number of Classes: {len(folders)}")


## 3. Exploratory Data Analysis (EDA)


In [None]:
# 3.1 Class Distribution Visualization
counts = Counter([dataset_info[f]['count'] for f in folders])
plt.figure(figsize=(14, 6))
plt.bar(folders, [dataset_info[f]['count'] for f in folders], color=sns.color_palette("husl", len(folders)))
plt.xticks(rotation=45)
plt.title("Class Distribution in Dataset", fontsize=14, fontweight='bold')
plt.ylabel("Number of Images")
plt.xlabel("Class")
plt.tight_layout()
plt.savefig('class_distribution.png', dpi=300, bbox_inches='tight')
plt.show()


## 4. Data Preprocessing and Augmentation


In [None]:
# Data preprocessing and augmentation
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.utils.class_weight import compute_class_weight

img_height, img_width = 224, 224
batch_size = 32

# Data augmentation
datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    horizontal_flip=True,
    rotation_range=25,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    brightness_range=[0.7,1.3],
    fill_mode='nearest'
)

train_data = datagen.flow_from_directory(
    BASE_DIR, target_size=(img_height, img_width), batch_size=batch_size,
    class_mode='categorical', subset='training', shuffle=True
)
val_data = datagen.flow_from_directory(
    BASE_DIR, target_size=(img_height, img_width), batch_size=batch_size,
    class_mode='categorical', subset='validation', shuffle=False
)
labels = list(train_data.class_indices.keys())
print(f"Classes: {labels}")
print(f"Training samples: {train_data.samples}")
print(f"Validation samples: {val_data.samples}")


In [None]:
# Class weights for binary drowsiness
drowsy_labels = ['Yawn', 'closed']
train_bin = np.array([1 if labels[i] in drowsy_labels else 0 for i in train_data.classes])
class_weights = compute_class_weight(class_weight='balanced', classes=np.array([0,1]), y=train_bin)
cw_dict = {0: class_weights[0], 1: class_weights[1]}
print("Class weights for binary drowsiness:", cw_dict)


## 5. CNN Model Development (MobileNetV2)


In [None]:
# Build CNN model with MobileNetV2
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras import layers, models, optimizers, regularizers
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

base_model = MobileNetV2(input_shape=(img_height, img_width, 3), include_top=False, weights='imagenet')
base_model.trainable = False

model_cnn = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dropout(0.5),
    layers.Dense(96, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    layers.Dropout(0.5),
    layers.Dense(train_data.num_classes, activation='softmax')
])
model_cnn.compile(optimizer=optimizers.Adam(1e-4), loss='categorical_crossentropy', metrics=['accuracy'])
model_cnn.summary()


## 6. Model Training


## 6.5 Training History Visualization & Overfitting Analysis


In [None]:
# Visualize training history and detect overfitting
def plot_training_history(history):
    """Plot training curves and analyze overfitting"""
    fig, axes = plt.subplots(1, 2, figsize=(16, 6))

    epochs = range(1, len(history.history['accuracy']) + 1)

    # Accuracy plot
    axes[0].plot(epochs, history.history['accuracy'], 'o-', label='Train Accuracy', linewidth=2, markersize=8)
    axes[0].plot(epochs, history.history['val_accuracy'], 's-', label='Val Accuracy', linewidth=2, markersize=8)
    axes[0].set_title('Model Accuracy Over Epochs', fontsize=14, fontweight='bold')
    axes[0].set_xlabel('Epoch', fontsize=12)
    axes[0].set_ylabel('Accuracy', fontsize=12)
    axes[0].legend(fontsize=11)
    axes[0].grid(alpha=0.3)
    axes[0].set_ylim([0.95, 1.01])

    # Loss plot
    axes[1].plot(epochs, history.history['loss'], 'o-', label='Train Loss', linewidth=2, markersize=8)
    axes[1].plot(epochs, history.history['val_loss'], 's-', label='Val Loss', linewidth=2, markersize=8)
    axes[1].set_title('Model Loss Over Epochs', fontsize=14, fontweight='bold')
    axes[1].set_xlabel('Epoch', fontsize=12)
    axes[1].set_ylabel('Loss', fontsize=12)
    axes[1].legend(fontsize=11)
    axes[1].grid(alpha=0.3)

    plt.tight_layout()
    plt.savefig('training_history.png', dpi=300, bbox_inches='tight')
    plt.show()

    # Analyze overfitting
    print("="*60)
    print("OVERFITTING ANALYSIS")
    print("="*60)

    train_acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    train_loss = history.history['loss']
    val_loss = history.history['val_loss']

    # Find best epoch
    best_epoch = np.argmax(val_acc) + 1
    best_val_acc = max(val_acc)
    best_train_acc = train_acc[np.argmax(val_acc)]
    best_val_loss = val_loss[np.argmax(val_acc)]
    best_train_loss = train_loss[np.argmax(val_acc)]

    # Final epoch metrics
    final_train_acc = train_acc[-1]
    final_val_acc = val_acc[-1]
    final_train_loss = train_loss[-1]
    final_val_loss = val_loss[-1]

    # Calculate gaps
    acc_gap_best = best_train_acc - best_val_acc
    acc_gap_final = final_train_acc - final_val_acc
    loss_gap_best = abs(best_train_loss - best_val_loss)
    loss_gap_final = abs(final_train_loss - final_val_loss)

    print(f"Best Model Performance (Epoch {best_epoch}):")
    print(f"  Train Accuracy: {best_train_acc*100:.2f}%")
    print(f"  Val Accuracy:   {best_val_acc*100:.2f}%")
    print(f"  Accuracy Gap:   {acc_gap_best*100:.2f}%")
    print(f"  Train Loss:     {best_train_loss:.4f}")
    print(f"  Val Loss:       {best_val_loss:.4f}")
    print(f"  Loss Gap:       {loss_gap_best:.4f}")

    print(f"\nFinal Epoch Performance:")
    print(f"  Train Accuracy: {final_train_acc*100:.2f}%")
    print(f"  Val Accuracy:   {final_val_acc*100:.2f}%")
    print(f"  Accuracy Gap:   {acc_gap_final*100:.2f}%")
    print(f"  Train Loss:     {final_train_loss:.4f}")
    print(f"  Val Loss:       {final_val_loss:.4f}")
    print(f"  Loss Gap:       {loss_gap_final:.4f}")

    # Overfitting indicators
    print(f"\n" + "="*60)
    print("OVERFITTING INDICATORS:")
    print("="*60)

    overfitting_signs = []

    if final_train_acc >= 1.0:
        overfitting_signs.append("‚ö†Ô∏è  Training accuracy reached 100% (perfect memorization)")

    if acc_gap_final > acc_gap_best + 0.01:
        overfitting_signs.append(f"‚ö†Ô∏è  Accuracy gap increased by {abs(acc_gap_final - acc_gap_best)*100:.2f}%")

    if final_val_loss > best_val_loss:
        overfitting_signs.append(f"‚ö†Ô∏è  Validation loss increased from {best_val_loss:.4f} to {final_val_loss:.4f}")

    if final_val_acc < best_val_acc:
        overfitting_signs.append(f"‚ö†Ô∏è  Validation accuracy decreased from {best_val_acc*100:.2f}% to {final_val_acc*100:.2f}%")

    if len(overfitting_signs) == 0:
        print("‚úì No significant overfitting detected!")
        print("  The model generalizes well to validation data.")
    else:
        print(f"Found {len(overfitting_signs)} indicator(s) of overfitting:")
        for sign in overfitting_signs:
            print(f"  {sign}")

        if len(overfitting_signs) >= 2:
            severity = "MODERATE"
        else:
            severity = "MILD"

        print(f"\nüìä Overfitting Severity: {severity}")
        print(f"\nüí° Recommendations:")
        print("  1. Use the model from Epoch {} (best validation performance)".format(best_epoch))
        print("  2. Increase dropout rates (currently 0.5)")
        print("  3. Add more data augmentation")
        print("  4. Increase L2 regularization")
        print("  5. Reduce model capacity if needed")
        print("  6. Use early stopping (already implemented)")

    print("="*60)

    return best_epoch, best_val_acc

# Plot and analyze
best_epoch, best_val_acc = plot_training_history(cnn_hist)


## 6.6 Load Best Model (Non-Overfit Version)


In [None]:
# The ModelCheckpoint callback already saved the best model (with lowest val_loss)
# Reload it to ensure we're using the non-overfit version
print(f"Loading best model saved at: {checkpoint_path}")
print(f"This model has the best validation performance (Epoch {best_epoch})")
print(f"Validation Accuracy: {best_val_acc*100:.2f}%")


## 5.5 Alternative: Improved Model with Anti-Overfitting Techniques


In [None]:
# OPTIONAL: Improved model with stronger regularization to reduce overfitting
# Uncomment and use this if you want to retrain with anti-overfitting measures

"""
# Build improved CNN model with stronger regularization
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras import layers, models, optimizers, regularizers
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

base_model_improved = MobileNetV2(input_shape=(img_height, img_width, 3), include_top=False, weights='imagenet')
base_model_improved.trainable = False

model_cnn_improved = models.Sequential([
    base_model_improved,
    layers.GlobalAveragePooling2D(),
    layers.Dropout(0.6),  # Increased from 0.5
    layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.02)),  # Stronger L2
    layers.BatchNormalization(),  # Added batch normalization
    layers.Dropout(0.6),  # Increased dropout
    layers.Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.02)),
    layers.Dropout(0.5),
    layers.Dense(train_data.num_classes, activation='softmax')
])

model_cnn_improved.compile(
    optimizer=optimizers.Adam(learning_rate=1e-4),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model_cnn_improved.summary()

# This improved model uses:
# - Higher dropout rates (0.6 instead of 0.5)
# - Stronger L2 regularization (0.02 instead of 0.01)
# - Batch normalization for stability
# - Additional dense layer for better feature extraction
"""


In [None]:
# Training callbacks
checkpoint_path = 'best_cnn_model.keras'
callbacks = [
    EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True, verbose=1),
    ModelCheckpoint(filepath=checkpoint_path, save_best_only=True, monitor='val_loss', verbose=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=1e-6, verbose=1)
]

# Train model
cnn_hist = model_cnn.fit(
    train_data,
    validation_data=val_data,
    epochs=20,
    callbacks=callbacks,
    verbose=1
)


## 7. Model Evaluation and Metrics


In [None]:
# Evaluation
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

val_data.reset()
Y_pred = model_cnn.predict(val_data, verbose=1)
y_pred = np.argmax(Y_pred, axis=1)
y_true = val_data.classes

print("Multiclass Classification Report:")
print(classification_report(y_true, y_pred, target_names=labels))

# Binary drowsiness evaluation
y_true_bin = [1 if labels[i] in drowsy_labels else 0 for i in y_true]
y_pred_bin = [1 if labels[i] in drowsy_labels else 0 for i in y_pred]
print("\nBinary Drowsiness Classification Report:")
print(classification_report(y_true_bin, y_pred_bin, target_names=['Not Drowsy', 'Drowsy']))


## 8. Interactive Dashboard with Gradio


In [None]:
# Install Gradio if needed
%pip install gradio -q

import gradio as gr
from tensorflow.keras.preprocessing import image

def predict_image(model, img_path):
    img = image.load_img(img_path, target_size=(img_height, img_width))
    img_array = image.img_to_array(img) / 255.
    img_array = np.expand_dims(img_array, 0)
    pred = model.predict(img_array, verbose=0)
    return labels[np.argmax(pred)], pred

def classify_image_gradio(img_path):
    if img_path is None:
        return None, None, None

    pred_class, raw_predictions = predict_image(model_cnn, img_path)
    binary_status = "Drowsy" if pred_class in drowsy_labels else "Not Drowsy"
    class_probs = {labels[i]: float(raw_predictions[0][i]) for i in range(len(labels))}

    output_text = f"**Predicted Class:** {pred_class}\\n**Drowsiness Status:** {binary_status}"
    return output_text, pred_class, class_probs

# Create Gradio interface
iface = gr.Interface(
    fn=classify_image_gradio,
    inputs=gr.Image(type="filepath"),
    outputs=[
        gr.Markdown(label="Prediction Results"),
        gr.Textbox(label="Predicted Class"),
        gr.Label(label="Class Probabilities")
    ],
    title="üöó Driver Drowsiness Detection Dashboard",
    description="Upload an image to predict the driver's activity and drowsiness status."
)

iface.launch(share=False, server_name="0.0.0.0")
