# SAM Billboard Mask Generator
Runs SAM on 1200 billboard images, saves overlay previews + seg labels, downloads as zip.

**Set runtime to T4 GPU first:** Runtime > Change runtime type > T4 GPU

In [None]:
# Step 1: Setup everything
import os
import torch
print(f"GPU: {torch.cuda.is_available()}")
assert torch.cuda.is_available(), "No GPU! Change runtime to T4."
print(f"GPU: {torch.cuda.get_device_name(0)}")

# Install SAM
os.system('pip install -q git+https://github.com/facebookresearch/segment-anything.git')

# Download SAM model
os.system('wget -q https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth -O sam_vit_b.pth')

# Download dataset
os.system('wget -q https://github.com/fxsBulqit/billboard-segmentation/releases/download/v1.0-dataset/clean_dataset.zip')
import zipfile
with zipfile.ZipFile('clean_dataset.zip', 'r') as z:
    z.extractall('.')
print(f"Images: {len(os.listdir('clean_dataset/images'))}")
print(f"Labels: {len(os.listdir('clean_dataset/labels'))}")
print("Setup done!")

In [None]:
# Step 2: Run SAM on all images
import cv2
import numpy as np
from pathlib import Path
from segment_anything import sam_model_registry, SamPredictor
from tqdm.notebook import tqdm

# Load SAM
sam = sam_model_registry["vit_b"](checkpoint="sam_vit_b.pth")
sam.to("cuda")
predictor = SamPredictor(sam)
print("SAM loaded!")

# Folders
IMAGES = Path("clean_dataset/images")
LABELS = Path("clean_dataset/labels")
OVERLAYS = Path("output/overlays")
SEG_LABELS = Path("output/seg_labels")
OVERLAYS.mkdir(parents=True, exist_ok=True)
SEG_LABELS.mkdir(parents=True, exist_ok=True)


def bbox_yolo_to_xyxy(bbox, w, h):
    cx, cy, bw, bh = bbox
    return [
        max(0, int((cx - bw / 2) * w)),
        max(0, int((cy - bh / 2) * h)),
        min(w, int((cx + bw / 2) * w)),
        min(h, int((cy + bh / 2) * h))
    ]


def mask_to_polygon(mask, eps=0.005):
    h, w = mask.shape
    contours, _ = cv2.findContours(
        mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
    )
    if not contours:
        return None
    c = max(contours, key=cv2.contourArea)
    simplified = cv2.approxPolyDP(c, eps * cv2.arcLength(c, True), True)
    if len(simplified) < 4:
        return None
    pts = simplified.reshape(-1, 2).astype(float)
    pts[:, 0] /= w
    pts[:, 1] /= h
    return np.clip(pts, 0, 1)


# Process
images = sorted(IMAGES.glob("*.jpg"))
print(f"Processing {len(images)} images...")

for img_path in tqdm(images, desc="SAM"):
    lbl_path = LABELS / img_path.name.replace('.jpg', '.txt')
    if not lbl_path.exists():
        continue

    img = cv2.imread(str(img_path))
    if img is None:
        continue
    h, w = img.shape[:2]
    predictor.set_image(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

    with open(lbl_path) as f:
        lines = f.read().strip().split('\n')

    overlay = img.copy()
    seg_lines = []

    for line in lines:
        parts = line.strip().split()
        if len(parts) < 5:
            continue
        bbox = [float(x) for x in parts[1:5]]
        x1, y1, x2, y2 = bbox_yolo_to_xyxy(bbox, w, h)

        masks, scores, _ = predictor.predict(
            box=np.array([x1, y1, x2, y2]),
            multimask_output=True
        )
        mask = masks[scores.argmax()]
        polygon = mask_to_polygon(mask)

        if polygon is not None:
            pts_px = (polygon * [w, h]).astype(np.int32)
            cv2.fillPoly(overlay, [pts_px], (0, 255, 0))
            cv2.polylines(overlay, [pts_px], True, (0, 0, 255), 2)
            pts_str = " ".join(f"{p[0]:.6f} {p[1]:.6f}" for p in polygon)
            seg_lines.append(f"{parts[0]} {pts_str}")

    result = cv2.addWeighted(img, 0.6, overlay, 0.4, 0)
    cv2.imwrite(str(OVERLAYS / img_path.name), result)

    if seg_lines:
        with open(SEG_LABELS / img_path.name.replace('.jpg', '.txt'), 'w') as f:
            f.write('\n'.join(seg_lines) + '\n')

n_ov = len(list(OVERLAYS.glob('*.jpg')))
n_lb = len(list(SEG_LABELS.glob('*.txt')))
print(f"\nDone! {n_ov} overlays, {n_lb} seg labels")

In [None]:
# Step 3: Zip and download
import os
os.system('zip -q -r sam_results.zip output/')
size_mb = os.path.getsize('sam_results.zip') / 1e6
print(f"Zip size: {size_mb:.1f} MB")

from google.colab import files
files.download('sam_results.zip')
print("Downloading!")