# PPE Detection with YOLOv8 — End-to-End

This notebook trains and tests a YOLOv8 model for PPE (Personal Protective Equipment) detection on a **local Windows dataset**, and includes **real‑time** testing (webcam / video).

### What you need
- A dataset in **YOLO format** (images + labels) at the path you gave:
- `C:\Users\gopeami\OneDrive - Vesuvius\Desktop\PhD13- 2025-2026\ML Practice\PPE-Detection\YOLO_V8`
  - Expected structure (adjust if yours differs):
    ```text
    YOLO_V8/
      images/
        train/ ... .jpg|.png
        val/   ... .jpg|.png
        # (optional) test/ ... .jpg|.png
      labels/
        train/ ... .txt   # YOLO labels
        val/   ... .txt
        # (optional) test/ ... .txt
    ```

> If your folders or class names are different, just edit the **Data YAML** cell in this notebook before training.


In [None]:
# 1) Install dependencies (run once)
%pip -q install ultralytics opencv-python==4.* roboflow  # roboflow optional; handy utils

import os, sys, shutil, textwrap, yaml, glob
from pathlib import Path
from ultralytics import YOLO

print("Ultralytics version:", YOLO.__version__ if hasattr(YOLO, "__version__") else "OK")
print("Python:", sys.version)


In [None]:
# 2) Define paths and create data.yaml

# Use a raw string for Windows path to avoid issues with spaces and backslashes
WINDOWS_DATASET_ROOT = r"""C:\Users\gopeami\OneDrive - Vesuvius\Desktop\PhD13- 2025-2026\ML Practice\PPE-Detection\YOLO_V8"""

# We'll mirror that path as a string inside the YAML. YOLO accepts absolute Windows paths.
images_train = str(Path(WINDOWS_DATASET_ROOT) / "images" / "train")
images_val   = str(Path(WINDOWS_DATASET_ROOT) / "images" / "val")
images_test  = str(Path(WINDOWS_DATASET_ROOT) / "images" / "test")  # optional

# Make a temp working directory (local to the notebook runtime) where we put the YAML, runs, etc.
WORKDIR = Path.cwd() / "yolo_ppe_work"
WORKDIR.mkdir(exist_ok=True, parents=True)

# === EDIT YOUR CLASS NAMES HERE IF NEEDED ===
names = ['person', 'helmet', 'vest', 'gloves', 'boots', 'mask']

data_yaml = {
    "path": None,                      # not used when absolute paths provided
    "train": images_train,
    "val": images_val,
    "test": images_test if Path(images_test).exists() else None,
    "names": names
}

data_yaml_path = WORKDIR / "ppe_data.yaml"
with open(data_yaml_path, "w", encoding="utf-8") as f:
    yaml.safe_dump(data_yaml, f, sort_keys=False, allow_unicode=True)

print("Wrote data YAML ->", data_yaml_path)
print("\nYAML preview:\n", Path(data_yaml_path).read_text())


In [None]:
# 3) Quick dataset sanity checks
from pathlib import Path

def count_files(folder, exts={".jpg",".jpeg",".png",".bmp"}):
    p = Path(folder)
    if not p.exists():
        return 0
    return sum(1 for x in p.rglob("*") if x.suffix.lower() in exts)

def count_labels(folder):
    p = Path(folder)
    if not p.exists():
        return 0
    return sum(1 for x in p.rglob("*.txt"))

print("Train images:", count_files(Path(data_yaml["train"])))
print("Val images  :", count_files(Path(data_yaml["val"])))
if data_yaml.get("test"):
    print("Test images :", count_files(Path(data_yaml["test"])))

lbl_train = count_labels(Path(data_yaml["train"]).with_name("labels") / "train")
lbl_val   = count_labels(Path(data_yaml["val"]).with_name("labels") / "val")
print("Train labels:", lbl_train)
print("Val labels  :", lbl_val)

assert count_files(Path(data_yaml["train"])) > 0, "No training images found — check the path in data.yaml"
assert lbl_train > 0, "No training labels found — check YOLO `labels/train` files"


In [None]:
# 4) Train YOLOv8 (choose model size: n, s, m, l, x)
model = YOLO(f"yolov8n.pt")  # pre-trained COCO backbone

results = model.train(
    data=str(data_yaml_path),
    epochs=50,            # adjust based on dataset size
    imgsz=640,            # 640 is standard; increase for more accuracy / more VRAM
    batch=16,             # tune for your GPU VRAM; on CPU keep smaller
    device=0 if (not os.environ.get("KAGGLE_KERNEL_RUN_TYPE")) else "cpu",  # try GPU if available
    patience=20,          # early stopping
    name="ppe_yolov8",
    project=str(WORKDIR / "runs"),
)


In [None]:
# 5) Validate / metrics
metrics = model.val(data=str(data_yaml_path), imgsz=640)
print(metrics.results_dict)


In [None]:
# 6) Predict on a few validation images and show results
import random, shutil
from IPython.display import display
from PIL import Image

val_images = list(Path(data_yaml["val"]).rglob("*"))
sample_images = [str(p) for p in val_images if p.suffix.lower() in {".jpg",".jpeg",".png"}]
random.shuffle(sample_images)
sample_images = sample_images[:6]  # pick a few

pred = model.predict(
    source=sample_images,
    imgsz=640,
    conf=0.25,
    save=True,
    project=str(WORKDIR / "runs"),
    name="ppe_preds",
)

# Display first few results inline
pred_dir = Path(pred[0].save_dir)
display_images = sorted([p for p in pred_dir.glob("*") if p.suffix.lower() in {".jpg",".png",".jpeg"}])[:6]
for p in display_images:
    display(Image.open(p))
print("Predictions saved in:", pred_dir)


In [None]:
# 7) Real‑time detection — Webcam (source=0)
# NOTE: This opens a native OpenCV window; press 'q' to quit.
# If running on a remote notebook without a camera, skip this cell.
import cv2
from ultralytics.utils.plotting import Annotator, colors

def webcam_inference(model, source=0, conf=0.25):
    cap = cv2.VideoCapture(source)
    if not cap.isOpened():
        raise RuntimeError("Could not open video source (webcam).")

    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break

            results = model.predict(frame, imgsz=640, conf=conf, verbose=False)
            res = results[0]
            annotator = Annotator(frame, line_width=2)
            if res.boxes is not None and len(res.boxes) > 0:
                for box in res.boxes:
                    b = box.xyxy[0].cpu().numpy().astype(int)
                    cls_id = int(box.cls[0].item())
                    confv = float(box.conf[0].item())
                    label = f"{model.names.get(cls_id, cls_id)} {confv:.2f}"
                    annotator.box_label(b, label, color=colors(cls_id, True))

            cv2.imshow("YOLOv8 PPE — Webcam (press q to quit)", annotator.result())
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
    finally:
        cap.release()
        cv2.destroyAllWindows()

# Uncomment to run locally:
# webcam_inference(model, source=0, conf=0.25)


In [None]:
# 8) Real‑time from video file or RTSP stream
# Examples:
#   video_path = r"C:\path\to\your\ppe_video.mp4"
#   video_path = "rtsp://user:pass@<ip>:<port>/stream"
# Uncomment and set your source below, then run.

# video_path = r"C:\path\to\your\ppe_video.mp4"
# webcam_inference(model, source=video_path, conf=0.25)


In [None]:
# 9) Export model (ONNX, TensorRT, etc.) — optional
# See https://docs.ultralytics.com/modes/export/
# Example: ONNX export
# model.export(format="onnx", opset=12, imgsz=640)


### Tips & Gotchas
- **Class order** in `names` **must match** your label IDs in the `.txt` files.
- For spaces in Windows paths, use **raw strings** like `r"C:\Users\...\OneDrive - Vesuvius\..."`.
- If your dataset folders differ, **edit** the `data_yaml` cell to point to the correct `images/*` and `labels/*` roots.
- Start with `yolov8n.pt` for quick feedback; then try `yolov8s.pt` / `yolov8m.pt` for higher accuracy.
