# Horizon-HUD: YOLOv8n Safety Detection Training

**Setup:**
1. Upload BDD100K images as a Kaggle dataset (e.g. `bdd100k-images`) with `train/` and `val/` folders
2. Upload BDD100K labels as a Kaggle dataset (e.g. `bdd100k-labels`) with `train/` and `val/` folders
3. Add both as inputs to this notebook (right sidebar > Add Data)
4. Set Accelerator to **GPU P100** or **T4 x2** in Settings
5. Enable **Internet** in Settings
6. Update the paths in Cell 2 below

**Resume after timeout:** See Cell 2

In [4]:
!pip install -q ultralytics

In [None]:
# ============================================================
# CONFIGURATION
# ============================================================
import os
from pathlib import Path

# Show available datasets (top 3 levels only, no deep scan)
print("Available inputs:")
base = Path("/kaggle/input")
for d1 in sorted(base.iterdir()):
    print(f"  {d1}/")
    if d1.is_dir():
        for d2 in sorted(d1.iterdir()):
            print(f"    {d2.name}/")
            if d2.is_dir():
                for d3 in sorted(d2.iterdir()):
                    if d3.is_dir():
                        print(f"      {d3.name}/  -> {sorted(os.listdir(d3))[:5]}")
print()

# ---- SET THESE TO MATCH THE OUTPUT ABOVE ----
IMAGES_ROOT = "/kaggle/input/100k-labels/100k/100k"
LABELS_ROOT = "/kaggle/input/100k-labels/100k_labels/100k_labels"

# Resume: to continue training after a timeout:
# 1. Download last.pt from previous run output
# 2. Upload it as a new Kaggle dataset (e.g. "horizon-checkpoint")
# 3. Add it as input and set the path here:
RESUME_FROM = None  # e.g. "/kaggle/input/horizon-checkpoint/last.pt"

# ============================================================
# ADVANCED (usually no need to change)
# ============================================================
EPOCHS = 100
IMGSZ = 640
BATCH = -1       # -1 = auto-detect best batch size for your GPU
WORKERS = 2

Scanning /kaggle/input/ ...


In [None]:
# ============================================================
# PREFLIGHT CHECKS
# ============================================================
import os, shutil, json, torch
from pathlib import Path

WORK = Path("/kaggle/working")
DATASET = WORK / "dataset"
MODELS = WORK / "models"
DOWNLOAD = WORK / "download"

errors = []

# GPU
if torch.cuda.is_available():
    gpu = torch.cuda.get_device_name(0)
    vram = torch.cuda.get_device_properties(0).total_memory / 1e9
    print(f"GPU: {gpu} ({vram:.1f} GB)")
else:
    errors.append("No GPU detected. Go to Settings > Accelerator and select GPU.")

# Disk
disk = shutil.disk_usage(str(WORK))
free_gb = disk.free / 1e9
print(f"Disk free: {free_gb:.1f} GB")
if free_gb < 5:
    errors.append(f"Low disk space: {free_gb:.1f} GB free, need at least 5 GB.")

# Inputs
if not RESUME_FROM:
    for name, root in [("Images", IMAGES_ROOT), ("Labels", LABELS_ROOT)]:
        r = Path(root)
        if not r.exists():
            errors.append(f"{name} root not found: {r}  -- Check the dataset name in Add Data.")
            continue
        for split in ["train", "val"]:
            d = r / split
            if not d.exists():
                errors.append(f"{name} missing {split}/ subfolder in {r}")
else:
    p = Path(RESUME_FROM)
    if not p.exists():
        errors.append(f"Resume checkpoint not found: {p}")

if errors:
    print("\n=== ERRORS ===")
    for e in errors:
        print(f"  - {e}")
    raise RuntimeError("Fix the errors above before continuing.")

if not RESUME_FROM:
    imgs_root = Path(IMAGES_ROOT)
    lbls_root = Path(LABELS_ROOT)
    n_train_img = len(list((imgs_root / "train").glob("*.jpg")))
    n_val_img = len(list((imgs_root / "val").glob("*.jpg")))
    n_train_lbl = len(list((lbls_root / "train").glob("*.json")))
    n_val_lbl = len(list((lbls_root / "val").glob("*.json")))
    print(f"Train: {n_train_img} images, {n_train_lbl} label files")
    print(f"Val:   {n_val_img} images, {n_val_lbl} label files")
    if n_train_img == 0:
        raise RuntimeError("No training images found. Check that your images dataset has train/*.jpg")
else:
    print(f"Resuming from: {RESUME_FROM}")

print("\nAll checks passed.")

In [None]:
# ============================================================
# PREPARE YOLO DATASET (skipped on resume)
# ============================================================
from concurrent.futures import ProcessPoolExecutor, as_completed

BDD_TO_HORIZON = {
    "car": 0, "bus": 0, "truck": 0, "train": 0,
    "person": 1,
    "rider": 2, "bike": 2, "motor": 2, "motorcycle": 2, "bicycle": 2,
    "traffic sign": 3, "traffic light": 3,
}
IMG_W, IMG_H = 1280, 720

def convert_one(args):
    label_path, out_dir = args
    try:
        with open(label_path) as f:
            ann = json.load(f)
    except Exception:
        return -1
    frames = ann.get("frames", [])
    if not frames:
        Path(out_dir / (label_path.stem + ".txt")).write_text("")
        return 0
    lines = []
    for obj in frames[0].get("objects", []):
        cls_id = BDD_TO_HORIZON.get(obj.get("category", "").lower())
        if cls_id is None:
            continue
        box = obj.get("box2d", {})
        if not box:
            continue
        x1 = max(0.0, float(box.get("x1", 0)))
        y1 = max(0.0, float(box.get("y1", 0)))
        x2 = min(float(IMG_W), float(box.get("x2", 0)))
        y2 = min(float(IMG_H), float(box.get("y2", 0)))
        if x2 <= x1 or y2 <= y1:
            continue
        cx = ((x1 + x2) / 2.0) / IMG_W
        cy = ((y1 + y2) / 2.0) / IMG_H
        w = (x2 - x1) / IMG_W
        h = (y2 - y1) / IMG_H
        lines.append(f"{cls_id} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}")
    Path(out_dir / (label_path.stem + ".txt")).write_text(
        "\n".join(lines) + ("\n" if lines else "")
    )
    return len(lines)

if RESUME_FROM:
    print("Skipping dataset prep (resuming).")
else:
    imgs_root = Path(IMAGES_ROOT)
    lbls_root = Path(LABELS_ROOT)
    total_objects = 0
    total_errors = 0

    for split in ["train", "val"]:
        img_dst = DATASET / "images" / split
        lbl_dst = DATASET / "labels" / split
        img_dst.mkdir(parents=True, exist_ok=True)
        lbl_dst.mkdir(parents=True, exist_ok=True)

        # Symlink each image file (keeps /images/ in the path for YOLO)
        src_dir = imgs_root / split
        existing = set(os.listdir(img_dst))
        jpgs = sorted(src_dir.glob("*.jpg"))
        linked = 0
        for jpg in jpgs:
            if jpg.name not in existing:
                os.symlink(str(jpg), str(img_dst / jpg.name))
                linked += 1
        print(f"{split}: linked {linked} images (skipped {len(existing)} existing)")

        # Convert labels
        json_files = sorted((lbls_root / split).glob("*.json"))
        print(f"{split}: converting {len(json_files)} labels...")
        work = [(p, lbl_dst) for p in json_files]
        with ProcessPoolExecutor(max_workers=4) as pool:
            futures = {pool.submit(convert_one, w): w for w in work}
            for fut in as_completed(futures):
                n = fut.result()
                if n < 0:
                    total_errors += 1
                else:
                    total_objects += n

    print(f"\nConverted: {total_objects} objects")
    if total_errors > 0:
        print(f"Warning: {total_errors} label files failed to parse (skipped)")

    # Verify
    n_img = len(list((DATASET / "images/train").glob("*.jpg")))
    n_lbl = len(list((DATASET / "labels/train").glob("*.txt")))
    print(f"Verification: {n_img} train images, {n_lbl} train labels")
    if n_lbl == 0:
        raise RuntimeError("No labels were created. Check your labels dataset structure.")

In [None]:
# ============================================================
# DATASET YAML (always recreated so resume works even if /working was cleared)
# ============================================================
dataset_yaml = DATASET / "dataset.yaml"

if not RESUME_FROM:
    dataset_yaml.parent.mkdir(parents=True, exist_ok=True)
    dataset_yaml.write_text(f"""path: {DATASET}
train: images/train
val: images/val

nc: 4
names:
  0: vehicle
  1: pedestrian
  2: cyclist
  3: road_obstacle
""")
    print(f"Created: {dataset_yaml}")
else:
    print("Dataset yaml not needed for resume (config is inside last.pt)")

In [None]:
# ============================================================
# TRAIN
# ============================================================
from ultralytics import YOLO

train_ok = False
try:
    if RESUME_FROM:
        print(f"Resuming from {RESUME_FROM}")
        model = YOLO(RESUME_FROM)
        model.train(resume=True)
    else:
        model = YOLO("yolov8n.pt")
        model.train(
            data=str(dataset_yaml),
            epochs=EPOCHS,
            imgsz=IMGSZ,
            batch=BATCH,
            device=0,
            project=str(MODELS),
            name="horizon_v1",
            exist_ok=True,

            # Safety-critical: maximize recall
            conf=0.001,
            iou=0.6,

            # Augmentation for road scenes
            hsv_h=0.015,
            hsv_s=0.7,
            hsv_v=0.4,
            degrees=0.0,
            translate=0.1,
            scale=0.5,
            fliplr=0.5,
            flipud=0.0,
            mosaic=1.0,
            mixup=0.1,

            # Training
            optimizer="AdamW",
            lr0=0.001,
            lrf=0.01,
            warmup_epochs=3,
            weight_decay=0.0005,
            patience=20,
            save=True,
            save_period=5,
            val=True,
            plots=True,
            verbose=True,
            workers=WORKERS,
        )
    train_ok = True
    print("\nTraining completed successfully.")
except Exception as e:
    print(f"\nTraining stopped: {e}")
    print("Attempting to save whatever checkpoint exists...")

In [None]:
# ============================================================
# SAVE OUTPUTS (runs even if training was interrupted)
# ============================================================
DOWNLOAD.mkdir(exist_ok=True)

weights_dir = MODELS / "horizon_v1" / "weights"
if not weights_dir.exists():
    # Check default ultralytics save location
    alt = Path("/kaggle/working/runs/detect/horizon_v1/weights")
    if alt.exists():
        weights_dir = alt

saved = []
if weights_dir.exists():
    for name in ["best.pt", "last.pt"]:
        src = weights_dir / name
        if src.exists():
            shutil.copy(src, DOWNLOAD / name)
            size_mb = src.stat().st_size / 1e6
            saved.append(f"{name} ({size_mb:.1f} MB)")
else:
    print("No weights directory found.")

# Training plots
results_dir = weights_dir.parent if weights_dir.exists() else None
if results_dir and results_dir.exists():
    for f in results_dir.glob("*.png"):
        shutil.copy(f, DOWNLOAD / f.name)
        saved.append(f.name)
    for f in results_dir.glob("*.csv"):
        shutil.copy(f, DOWNLOAD / f.name)
        saved.append(f.name)

print(f"Saved to {DOWNLOAD}:")
for s in saved:
    print(f"  {s}")

if not saved:
    print("Nothing saved - training may not have produced any checkpoints.")
elif "last.pt" in [s.split()[0] for s in saved]:
    print("\nTo resume: upload last.pt as a Kaggle dataset and set RESUME_FROM in Cell 2.")

In [None]:
# ============================================================
# EXPORT TO TFLITE (only if training completed)
# ============================================================
best_path = DOWNLOAD / "best.pt"
if best_path.exists():
    try:
        best = YOLO(str(best_path))
        best.export(format="tflite", imgsz=320, int8=True)
        # Find the exported file and copy to download
        for f in Path(".").rglob("*_saved_model/*.tflite"):
            shutil.copy(f, DOWNLOAD / "horizon_v1_int8.tflite")
            print(f"TFLite exported: {DOWNLOAD / 'horizon_v1_int8.tflite'}")
            break
        else:
            for f in Path(".").rglob("*.tflite"):
                shutil.copy(f, DOWNLOAD / f.name)
                print(f"TFLite exported: {DOWNLOAD / f.name}")
                break
    except Exception as e:
        print(f"TFLite export failed: {e}")
        print("You can export locally: yolo export model=best.pt format=tflite imgsz=320 int8")
else:
    print("No best.pt found, skipping export.")

In [None]:
# ============================================================
# QUICK VISUAL TEST (only if best.pt exists)
# ============================================================
if best_path.exists():
    import glob
    val_dir = DATASET / "images" / "val"
    if not val_dir.exists():
        val_dir = Path(IMAGES_ROOT) / "val"
    test_imgs = sorted(glob.glob(str(val_dir / "*.jpg")))[:5]
    if test_imgs:
        best = YOLO(str(best_path))
        results = best.predict(
            source=test_imgs, conf=0.25, iou=0.6, imgsz=640,
            save=True, project=str(DOWNLOAD), name="test_predictions", exist_ok=True
        )
        for r in results:
            n = len(r.boxes) if r.boxes is not None else 0
            print(f"{Path(r.path).name}: {n} detections")
    else:
        print("No val images found for testing.")
else:
    print("No best.pt, skipping test.")

In [None]:
# ============================================================
# FINAL SUMMARY
# ============================================================
print("=" * 50)
print("DOWNLOAD FILES:")
if DOWNLOAD.exists():
    for f in sorted(DOWNLOAD.rglob("*")):
        if f.is_file():
            size = f.stat().st_size / 1e6
            print(f"  {f.relative_to(DOWNLOAD)}  ({size:.1f} MB)")
print("=" * 50)
if (DOWNLOAD / "last.pt").exists() and not train_ok:
    print("\nTraining was interrupted!")
    print("To resume:")
    print("  1. Download last.pt from this notebook's output")
    print("  2. Create a new Kaggle dataset from it")
    print("  3. Set RESUME_FROM in Cell 2 to the path")
    print("  4. Run all cells again")
elif train_ok:
    print("\nTraining completed! Download best.pt and the .tflite for your Pi5.")