# PPE Compliance Detection using YOLOv8

Train YOLOv8 to detect PPE violations using Kaggle/Colab. Dataset stays remote, model weights saved locally.

## Cell 0: Colab Setup (Optional - Run First if Using Colab)

In [None]:
import os
import sys

IS_COLAB = 'google.colab' in str(get_ipython())

if IS_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    
    BASE_DIR = '/content/drive/My Drive/safety-surveillance'
    os.makedirs(BASE_DIR, exist_ok=True)
    
    DATASET_DIR = os.path.join(BASE_DIR, 'datasets')
    MODELS_DIR = os.path.join(BASE_DIR, 'models')
    RESULTS_DIR = os.path.join(BASE_DIR, 'results')
    
    os.makedirs(DATASET_DIR, exist_ok=True)
    os.makedirs(MODELS_DIR, exist_ok=True)
    os.makedirs(RESULTS_DIR, exist_ok=True)
    
    print("Colab setup complete")
    print(f"Base directory: {BASE_DIR}")
else:
    BASE_DIR = os.getcwd()
    DATASET_DIR = os.path.join(BASE_DIR, 'datasets')
    MODELS_DIR = os.path.join(BASE_DIR, 'models')
    RESULTS_DIR = os.path.join(BASE_DIR, 'results')
    print("Running locally")

In [None]:
import subprocess
import sys

subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", "ultralytics"])
subprocess.check_call([sys.executable, "-m", "pip", "install", "opencv-python"])

print("Packages installed")

In [None]:
import os
import cv2
import numpy as np
import torch
import yaml
from ultralytics import YOLO
from PIL import Image
import matplotlib.pyplot as plt

print(f"GPU: {torch.cuda.is_available()}")

## Cell 2: Dataset Verification

In [None]:
IS_KAGGLE = os.path.exists("/kaggle/input")
IS_COLAB = 'google.colab' in str(get_ipython())

if IS_KAGGLE:
    DATASET_PATH = "/kaggle/input/ppe-dataset-yolov8"
    print("Running on Kaggle")
elif IS_COLAB:
    DATASET_PATH = "/content/datasets/ppe"
    print("Running on Colab")
else:
    DATASET_PATH = "./datasets/ppe"
    print("Running locally")

print(f"Dataset path: {DATASET_PATH}")
print(f"Dataset exists: {os.path.exists(DATASET_PATH)}")

In [None]:
yaml_path = os.path.join(DATASET_PATH, 'data.yaml')

if os.path.exists(yaml_path):
    with open(yaml_path, 'r') as f:
        data_yaml = yaml.safe_load(f)
    print("data.yaml loaded")
else:
    data_yaml = {
        'path': DATASET_PATH,
        'train': 'images/train',
        'val': 'images/val',
        'test': 'images/test',
        'nc': 6,
        'names': ['helmet', 'vest', 'gloves', 'safety_shoes', 'goggles', 'face_shield']
    }
    print("Using default PPE structure")

## Cell 3: Configuration

In [None]:
CLASS_NAMES = {
    0: 'helmet', 1: 'vest', 2: 'gloves',
    3: 'safety_shoes', 4: 'goggles', 5: 'face_shield'
}

DATASET_CONFIG = {
    'path': DATASET_PATH,
    'class_names': CLASS_NAMES,
    'num_classes': len(CLASS_NAMES)
}

print(f"Classes: {list(CLASS_NAMES.values())}")

## Cell 4: Load Model

In [None]:
MODEL_SIZE = 's'
model = YOLO(f'yolov8{MODEL_SIZE}.pt')
print(f"Model loaded: YOLOv8{MODEL_SIZE}")

## Cell 5: Train

In [None]:
results = model.train(
    data=yaml_path if os.path.exists(yaml_path) else f'{DATASET_PATH}/data.yaml',
    epochs=100,
    imgsz=640,
    batch=16,
    patience=20,
    device=0 if torch.cuda.is_available() else 'cpu',
    save=True,
    augment=True,
    optimizer='SGD',
    lr0=0.01,
    lrf=0.01,
    momentum=0.937,
    weight_decay=0.0005,
    warmup_epochs=3,
    box=7.5,
    cls=0.5,
    dfl=1.5,
    name='ppe_detector',
    project='runs/detect',
    exist_ok=True
)

print("Training completed")

## Cell 6: Evaluate

In [None]:
best_model_path = 'runs/detect/ppe_detector/weights/best.pt'
model = YOLO(best_model_path)

val_results = model.val(
    data=yaml_path if os.path.exists(yaml_path) else f'{DATASET_PATH}/data.yaml',
    imgsz=640,
    batch=16,
    device=0 if torch.cuda.is_available() else 'cpu'
)

if hasattr(val_results, 'box'):
    print(f"mAP50: {val_results.box.map50:.4f}")
    print(f"mAP50-95: {val_results.box.map:.4f}")

## Cell 7: Save Model

In [None]:
import shutil
import json
from datetime import datetime

TRAINED_MODEL_SOURCE = 'runs/detect/ppe_detector/weights/best.pt'
MODELS_DIR = 'models/ppe' if not IS_COLAB else os.path.join(MODELS_DIR, 'ppe')
FINAL_MODEL_PATH = os.path.join(MODELS_DIR, 'best.pt')

os.makedirs(MODELS_DIR, exist_ok=True)

if os.path.exists(TRAINED_MODEL_SOURCE):
    shutil.copy2(TRAINED_MODEL_SOURCE, FINAL_MODEL_PATH)
    file_size_mb = os.path.getsize(FINAL_MODEL_PATH) / (1024 * 1024)
    print(f"Model saved: {FINAL_MODEL_PATH} ({file_size_mb:.2f} MB)")
    
    metadata = {
        'model_name': 'PPE Detection YOLOv8s',
        'classes': CLASS_NAMES,
        'training_date': datetime.now().isoformat(),
        'framework': 'YOLOv8'
    }
    
    with open(os.path.join(MODELS_DIR, 'model_metadata.json'), 'w') as f:
        json.dump(metadata, f, indent=2)

## Cell 8: Image Inference

In [None]:
inference_model = YOLO(FINAL_MODEL_PATH)

def predict_ppe_on_image(image_path, conf_threshold=0.5):
    if not os.path.exists(image_path):
        return None
    
    results = inference_model.predict(
        source=image_path,
        conf=conf_threshold,
        imgsz=640,
        device=0 if torch.cuda.is_available() else 'cpu'
    )
    return results

test_dir = os.path.join(DATASET_PATH, 'images', 'val')
if os.path.exists(test_dir):
    sample_images = [f for f in os.listdir(test_dir) if f.lower().endswith(('.jpg', '.png'))]
    if sample_images:
        results = predict_ppe_on_image(os.path.join(test_dir, sample_images[0]))
        if results and len(results[0].boxes) > 0:
            print(f"Detections: {len(results[0].boxes)}")
            for box in results[0].boxes:
                class_id = int(box.cls.item())
                print(f"  {CLASS_NAMES.get(class_id)}: {box.conf.item():.2%}")

## Cell 9: Video Inference

In [None]:
def process_video_for_ppe_detection(video_path, output_path, conf_threshold=0.5, frame_skip=1, max_frames=None):
    if not os.path.exists(video_path):
        print(f"Video not found: {video_path}")
        return False
    
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Failed to open video")
        return False
    
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    print(f"Video: {width}x{height} @ {fps}fps ({total_frames} frames)")
    
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    frame_count = 0
    detection_count = 0
    frame_with_detections = 0
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        frame_count += 1
        
        if frame_count % frame_skip != 0:
            out.write(frame)
            continue
        
        if max_frames and frame_count > max_frames:
            break
        
        try:
            results = inference_model.predict(
                source=frame,
                conf=conf_threshold,
                imgsz=640,
                device=0 if torch.cuda.is_available() else 'cpu',
                verbose=False
            )
            
            annotated_frame = results[0].plot()
            num_detections = len(results[0].boxes)
            
            if num_detections > 0:
                detection_count += num_detections
                frame_with_detections += 1
            
            if frame_count % 30 == 0:
                print(f"Processed {frame_count}/{total_frames}")
        except Exception as e:
            print(f"Error at frame {frame_count}: {e}")
            annotated_frame = frame
        
        out.write(annotated_frame)
    
    cap.release()
    out.release()
    
    print(f"Done: {frame_count} frames, {frame_with_detections} with detections")
    print(f"Saved: {output_path}")
    return True

RESULTS_DIR = 'results/videos' if not IS_COLAB else os.path.join(RESULTS_DIR, 'videos')
os.makedirs(RESULTS_DIR, exist_ok=True)

video_dir = 'videos'
if os.path.exists(video_dir):
    videos = [f for f in os.listdir(video_dir) if f.lower().endswith(('.mp4', '.avi', '.mov', '.mkv'))]
    if videos:
        sample_video = os.path.join(video_dir, videos[0])
        output_video = os.path.join(RESULTS_DIR, 'ppe_detection_output.mp4')
        process_video_for_ppe_detection(sample_video, output_video, conf_threshold=0.5, frame_skip=2, max_frames=300)

## Summary

Complete PPE detection pipeline:
1. Environment setup
2. Dataset verification
3. Configuration
4. Model loading
5. Training
6. Evaluation
7. Model saving
8. Image inference
9. Video inference

Outputs: trained model at `models/ppe/best.pt`, metadata, annotated results