# Billboard Segmentation v2 - Curated Dataset

- 458 hand-picked images with SAM segmentation masks
- YOLOv8l-seg (large model) at 1280px resolution
- Colab Pro GPU (A100/V100)

**Set runtime:** Runtime > Change runtime type > A100 GPU

In [None]:
# Step 1: Setup
import torch, os
assert torch.cuda.is_available(), "No GPU!"
print(f"GPU: {torch.cuda.get_device_name(0)}")
print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

os.system('pip install -q ultralytics')

# Download curated dataset
os.system('wget -q https://github.com/fxsBulqit/billboard-segmentation/releases/download/v2.0-curated/curated_dataset.zip')

import zipfile
with zipfile.ZipFile('curated_dataset.zip', 'r') as z:
    z.extractall('.')

n_img = len(os.listdir('data/curated_dataset/images'))
n_lbl = len(os.listdir('data/curated_dataset/labels'))
print(f"Images: {n_img}, Labels: {n_lbl}")
print("Done!")

In [None]:
# Step 2: Split into train/val/test
import shutil, random
from pathlib import Path

SRC_IMG = Path('data/curated_dataset/images')
SRC_LBL = Path('data/curated_dataset/labels')
DS = Path('billboard_seg_v2')

if DS.exists():
    shutil.rmtree(DS)

for split in ['train', 'val', 'test']:
    (DS / split / 'images').mkdir(parents=True)
    (DS / split / 'labels').mkdir(parents=True)

names = [f.stem for f in sorted(SRC_LBL.glob('*.txt'))]
random.seed(42)
random.shuffle(names)

n = len(names)
splits = {
    'train': names[:int(0.80 * n)],
    'val': names[int(0.80 * n):int(0.95 * n)],
    'test': names[int(0.95 * n):],
}

for split, split_names in splits.items():
    for name in split_names:
        img = SRC_IMG / f'{name}.jpg'
        lbl = SRC_LBL / f'{name}.txt'
        if img.exists():
            shutil.copy2(img, DS / split / 'images' / f'{name}.jpg')
        if lbl.exists():
            shutil.copy2(lbl, DS / split / 'labels' / f'{name}.txt')
    print(f"{split}: {len(split_names)}")

# data.yaml
yaml = f"""path: /content/billboard_seg_v2
train: train/images
val: val/images
test: test/images

nc: 1
names: ['billboard']
"""
with open(DS / 'data.yaml', 'w') as f:
    f.write(yaml)
print("Dataset ready!")

In [None]:
# Step 3: Train YOLOv8l-seg at 1280px
from ultralytics import YOLO

model = YOLO('yolov8l-seg.pt')

results = model.train(
    data='/content/billboard_seg_v2/data.yaml',
    epochs=150,
    imgsz=1280,
    batch=8,
    patience=20,
    device=0,
    workers=4,
    name='billboard_seg_v2',
    # Augmentation
    hsv_h=0.015,
    hsv_s=0.5,
    hsv_v=0.3,
    degrees=5.0,
    translate=0.1,
    scale=0.3,
    flipud=0.0,
    fliplr=0.5,
    mosaic=0.8,
    copy_paste=0.1,
)

In [None]:
# Step 4: View results
from IPython.display import Image, display
from pathlib import Path

run_dir = sorted(Path('runs/segment').glob('billboard_seg_v2*'))[-1]
print(f"Results: {run_dir}")

if (run_dir / 'results.png').exists():
    display(Image(filename=str(run_dir / 'results.png'), width=900))

for name in ['val_batch0_pred.jpg', 'val_batch1_pred.jpg']:
    p = run_dir / name
    if p.exists():
        print(f"\n{name}:")
        display(Image(filename=str(p), width=900))

In [None]:
# Step 5: Test on held-out images
import cv2, numpy as np
import matplotlib.pyplot as plt

best = YOLO(str(run_dir / 'weights' / 'best.pt'))
test_imgs = sorted(Path('billboard_seg_v2/test/images').glob('*.jpg'))[:8]

fig, axes = plt.subplots(2, 4, figsize=(24, 12))
for idx, img_path in enumerate(test_imgs):
    r = best(str(img_path), verbose=False, imgsz=1280)[0]
    ann = cv2.cvtColor(r.plot(), cv2.COLOR_BGR2RGB)
    axes[idx // 4][idx % 4].imshow(ann)
    axes[idx // 4][idx % 4].axis('off')
    n = len(r.masks.xy) if r.masks else 0
    axes[idx // 4][idx % 4].set_title(f"{n} billboard(s)")
plt.suptitle('v2 Segmentation Results (1280px)', fontsize=16)
plt.tight_layout()
plt.show()

In [None]:
# Step 6: Download model
import shutil
shutil.copy2(run_dir / 'weights' / 'best.pt', 'billboard_seg_v2_best.pt')
size = os.path.getsize('billboard_seg_v2_best.pt') / 1e6
print(f"Model: {size:.1f} MB")

shutil.make_archive('training_results_v2', 'zip', str(run_dir))

from google.colab import files
files.download('billboard_seg_v2_best.pt')
files.download('training_results_v2.zip')
print("Downloading!")