# YOLOv8 PPE — Train Using Existing `data.yaml`

This notebook **uses your own `data.yaml` directly** from Windows, so you don't have to recreate it.

**Your YAML path (edit if needed):**
```
C:\Users\gopeami\OneDrive - Vesuvius\Desktop\PhD13- 2025-2026\ML Practice\PPE-Detection\YOLO_V8\data.yaml
```


In [None]:
# 0) Install dependencies
%pip -q install ultralytics opencv-python==4.* pyyaml

import os, sys, yaml
from pathlib import Path
from ultralytics import YOLO

DATA_YAML = r"""C:\Users\gopeami\OneDrive - Vesuvius\Desktop\PhD13- 2025-2026\ML Practice\PPE-Detection\YOLO_V8\data.yaml"""
print("Ultralytics:", getattr(YOLO, "__version__", "OK"))
print("Python     :", sys.version)
print("DATA_YAML  :", DATA_YAML)
assert Path(DATA_YAML).exists(), "data.yaml not found — check the path above."


In [None]:
# 1) Inspect YAML and sanity checks
from pathlib import Path

with open(DATA_YAML, "r", encoding="utf-8") as f:
    cfg = yaml.safe_load(f)

print("YAML loaded:")
print(cfg)

def count_imgs(p):
    p = Path(p)
    if not p.exists():
        return 0
    return sum(1 for x in p.rglob("*") if x.suffix.lower() in {".jpg",".jpeg",".png",".bmp"})

def count_lbls(p):
    p = Path(p)
    if not p.exists():
        return 0
    return sum(1 for x in p.rglob("*.txt"))

train_dir = Path(cfg.get("train", ""))
val_dir   = Path(cfg.get("val", ""))
test_dir  = Path(cfg.get("test", "")) if cfg.get("test") else None

print("\nTrain images:", count_imgs(train_dir))
print("Val images  :", count_imgs(val_dir))
if test_dir:
    print("Test images :", count_imgs(test_dir))

# Try to infer labels/<split> if not explicitly present in YAML; warn if counts look off.
labels_root = train_dir.parent.parent / "labels"
lbl_train = count_lbls(labels_root / "train")
lbl_val   = count_lbls(labels_root / "val")
print("\nTrain labels:", lbl_train)
print("Val labels  :", lbl_val)

assert count_imgs(train_dir) > 0, "No training images found — verify 'train' path in YAML."
assert lbl_train > 0, "No training labels found — verify YOLO labels exist under labels/train."
names = cfg.get("names", [])
print("\nClasses (names):", names, " (n=", len(names), ")")


In [None]:
# 2) Train — choose a model size (n, s, m, l, x). Start with 'n' for speed.
model = YOLO("yolov8n.pt")
results = model.train(
    data=DATA_YAML,
    epochs=50,
    imgsz=640,
    batch=16,
    device=0 if not os.environ.get("KAGGLE_KERNEL_RUN_TYPE") else "cpu",
    patience=20,
    name="ppe_yolov8_yaml",
    project="yolo_ppe_work/runs"
)


In [None]:
# 3) Validate
metrics = model.val(data=DATA_YAML, imgsz=640)
print(metrics.results_dict)


In [None]:
# 4) Predict a few samples from the validation set
import random
from pathlib import Path
from IPython.display import display
from PIL import Image

val_images = [p for p in Path(cfg["val"]).rglob("*") if p.suffix.lower() in {".jpg",".jpeg",".png"}]
random.shuffle(val_images)
sample_images = [str(p) for p in val_images[:6]]

pred = model.predict(
    source=sample_images,
    imgsz=640,
    conf=0.25,
    save=True,
    project="yolo_ppe_work/runs",
    name="ppe_preds_yaml",
)

pred_dir = Path(pred[0].save_dir)
display_imgs = sorted([p for p in pred_dir.glob("*") if p.suffix.lower() in {".jpg",".png",".jpeg"}])[:6]
for p in display_imgs:
    display(Image.open(p))

print("Predictions saved in:", pred_dir)


In [None]:
# 5) Realtime — webcam/video
import cv2
from ultralytics.utils.plotting import Annotator, colors

def stream_inference(weights, source=0, conf=0.25):
    model = YOLO(weights)
    cap = cv2.VideoCapture(source)
    if not cap.isOpened():
        raise RuntimeError(f"Could not open source: {source}")
    try:
        while True:
            ok, frame = cap.read()
            if not ok:
                break
            results = model.predict(frame, imgsz=640, conf=conf, verbose=False)
            res = results[0]
            annotator = Annotator(frame, line_width=2)
            if res.boxes is not None and len(res.boxes) > 0:
                for box in res.boxes:
                    b = box.xyxy[0].cpu().numpy().astype(int)
                    cls_id = int(box.cls[0].item())
                    confv = float(box.conf[0].item())
                    label = f"{model.names.get(cls_id, cls_id)} {confv:.2f}"
                    annotator.box_label(b, label, color=colors(cls_id, True))

            cv2.imshow("YOLOv8 PPE — Realtime (q to quit)", annotator.result())
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
    finally:
        cap.release()
        cv2.destroyAllWindows()

# Example usage after training:
# stream_inference("yolo_ppe_work/runs/ppe_yolov8_yaml/weights/best.pt", source=0, conf=0.25)
# stream_inference("yolo_ppe_work/runs/ppe_yolov8_yaml/weights/best.pt", source=r"C:\path\to\video.mp4")
