# Costco Label OBB Detection — Training on Colab

This notebook trains a YOLOv26-OBB model to detect Costco price labels.

**Runtime → Change runtime type → T4 GPU** before running.

In [None]:
# Check GPU
!nvidia-smi

In [None]:
# Clone repo and set working directory
import os

REPO_URL = "https://github.com/ankurkhurana03/Label_extractor_costco.git"
REPO_DIR = "/content/Label_extractor_costco"

if not os.path.exists(REPO_DIR):
    !git clone $REPO_URL $REPO_DIR
else:
    print("Repo already cloned, pulling latest...")
    !git -C $REPO_DIR pull

os.chdir(REPO_DIR)
print(f"Working directory: {os.getcwd()}")

In [None]:
# Install latest ultralytics (YOLO26 requires a recent version)
!pip install -q --upgrade ultralytics albumentations
import ultralytics
print(f"ultralytics version: {ultralytics.__version__}")

In [None]:
from pathlib import Path
from ultralytics import YOLO

ROOT = Path("/content/Label_extractor_costco")

# Verify dataset
assert (ROOT / "dataset.yaml").exists(), f"dataset.yaml not found in {ROOT}"
train_imgs = list((ROOT / "dataset" / "images" / "train").glob("*.*"))
test_imgs = list((ROOT / "dataset" / "images" / "test").glob("*.*"))
print(f"ROOT: {ROOT}")
print(f"Train images: {len(train_imgs)} (before augmentation), Test images: {len(test_imgs)}")

In [None]:
# Offline augmentation: generate 4x copies of each labeled training image
# This multiplies the dataset from ~64 labeled images to ~320, giving the
# model more gradient updates per epoch and smoother learning.

import cv2
import numpy as np
from pathlib import Path
import shutil

MULTIPLIER = 4  # number of augmented copies per image
IMG_DIR = ROOT / "dataset" / "images" / "train"
LBL_DIR = ROOT / "dataset" / "labels" / "train"

rng = np.random.default_rng(42)

def augment_image(img):
    """Apply random geometric + color augmentations to an image."""
    h, w = img.shape[:2]
    out = img.copy()

    # Random horizontal flip
    if rng.random() > 0.5:
        out = cv2.flip(out, 1)

    # Random vertical flip
    if rng.random() > 0.5:
        out = cv2.flip(out, 0)

    # Random rotation (-25 to +25 degrees)
    angle = rng.uniform(-25, 25)
    M = cv2.getRotationMatrix2D((w / 2, h / 2), angle, 1.0)
    out = cv2.warpAffine(out, M, (w, h), borderMode=cv2.BORDER_REFLECT_101)

    # Random brightness/contrast
    alpha = rng.uniform(0.7, 1.3)  # contrast
    beta = rng.uniform(-30, 30)     # brightness
    out = np.clip(alpha * out.astype(np.float32) + beta, 0, 255).astype(np.uint8)

    # Random HSV shift
    hsv = cv2.cvtColor(out, cv2.COLOR_BGR2HSV).astype(np.float32)
    hsv[:, :, 0] = (hsv[:, :, 0] + rng.uniform(-10, 10)) % 180
    hsv[:, :, 1] = np.clip(hsv[:, :, 1] * rng.uniform(0.7, 1.3), 0, 255)
    hsv[:, :, 2] = np.clip(hsv[:, :, 2] * rng.uniform(0.7, 1.3), 0, 255)
    out = cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2BGR)

    # Random Gaussian blur
    if rng.random() > 0.5:
        ksize = rng.choice([3, 5])
        out = cv2.GaussianBlur(out, (ksize, ksize), 0)

    return out

def rotate_obb_points(points, angle_deg, cx, cy):
    """Rotate OBB polygon points around center (normalized coords)."""
    angle = np.radians(-angle_deg)  # negative because cv2 rotates counterclockwise
    cos_a, sin_a = np.cos(angle), np.sin(angle)
    rotated = []
    for x, y in points:
        dx, dy = x - cx, y - cy
        rx = cos_a * dx - sin_a * dy + cx
        ry = sin_a * dx + cos_a * dy + cy
        rotated.append((np.clip(rx, 0, 1), np.clip(ry, 0, 1)))
    return rotated

def augment_label(label_path, h_flip, v_flip, angle):
    """Transform OBB label coordinates to match augmented image."""
    if not label_path.exists():
        return ""
    text = label_path.read_text().strip()
    if not text:
        return ""
    lines = []
    for line in text.splitlines():
        parts = line.strip().split()
        if len(parts) != 9:
            continue
        cls_id = parts[0]
        coords = list(map(float, parts[1:]))
        points = [(coords[i], coords[i+1]) for i in range(0, 8, 2)]

        if h_flip:
            points = [(1.0 - x, y) for x, y in points]
        if v_flip:
            points = [(x, 1.0 - y) for x, y in points]
        if abs(angle) > 0.1:
            points = rotate_obb_points(points, angle, 0.5, 0.5)

        coord_str = " ".join(f"{x:.6f} {y:.6f}" for x, y in points)
        lines.append(f"{cls_id} {coord_str}")
    return "\n".join(lines)

# Only augment labeled images (skip neg_ and synth_ prefixes)
labeled_imgs = sorted([
    p for p in IMG_DIR.glob("*.*")
    if p.suffix.lower() in {".jpg", ".jpeg", ".png", ".webp"}
    and not p.name.startswith(("neg_", "synth_"))
])

print(f"Found {len(labeled_imgs)} labeled training images")
print(f"Generating {MULTIPLIER}x augmented copies...")

generated = 0
for img_path in labeled_imgs:
    lbl_path = LBL_DIR / (img_path.stem + ".txt")
    img = cv2.imread(str(img_path))
    if img is None:
        continue

    for i in range(MULTIPLIER):
        # Track transforms for label adjustment
        h_flip = rng.random() > 0.5
        v_flip = rng.random() > 0.5
        angle = rng.uniform(-25, 25)

        # Apply transforms to image
        h, w = img.shape[:2]
        out = img.copy()
        if h_flip:
            out = cv2.flip(out, 1)
        if v_flip:
            out = cv2.flip(out, 0)
        M = cv2.getRotationMatrix2D((w / 2, h / 2), angle, 1.0)
        out = cv2.warpAffine(out, M, (w, h), borderMode=cv2.BORDER_REFLECT_101)

        # Color augmentation (doesn't affect labels)
        alpha = rng.uniform(0.7, 1.3)
        beta = rng.uniform(-30, 30)
        out = np.clip(alpha * out.astype(np.float32) + beta, 0, 255).astype(np.uint8)
        hsv = cv2.cvtColor(out, cv2.COLOR_BGR2HSV).astype(np.float32)
        hsv[:, :, 0] = (hsv[:, :, 0] + rng.uniform(-10, 10)) % 180
        hsv[:, :, 1] = np.clip(hsv[:, :, 1] * rng.uniform(0.7, 1.3), 0, 255)
        hsv[:, :, 2] = np.clip(hsv[:, :, 2] * rng.uniform(0.7, 1.3), 0, 255)
        out = cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2BGR)
        if rng.random() > 0.5:
            out = cv2.GaussianBlur(out, (rng.choice([3, 5]),) * 2, 0)

        # Save augmented image and label
        aug_name = f"aug{i}_{img_path.stem}"
        cv2.imwrite(str(IMG_DIR / f"{aug_name}.jpg"), out)
        aug_label = augment_label(lbl_path, h_flip, v_flip, angle)
        (LBL_DIR / f"{aug_name}.txt").write_text(aug_label)
        generated += 1

total = len(list(IMG_DIR.glob("*.*")))
print(f"Generated {generated} augmented images")
print(f"Total training images now: {total}")

In [None]:
# Train with improved hyperparameters
model = YOLO("yolo26n-obb.pt")

results = model.train(
    data=str(ROOT / "dataset.yaml"),
    epochs=300,
    imgsz=640,
    batch=16,         # T4 has 16GB VRAM — can handle batch=16
    patience=50,
    device=0,         # GPU 0
    project=str(ROOT / "runs"),
    name="costco_label_obb",
    exist_ok=True,
    # Augmentation tuned for small dataset
    mosaic=1.0,
    flipud=0.5,
    fliplr=0.5,
    degrees=30.0,
    scale=0.5,
    translate=0.2,
    mixup=0.3,
    copy_paste=0.3,
    # Color augmentation
    hsv_h=0.015,
    hsv_s=0.7,
    hsv_v=0.4,
    # Regularization
    weight_decay=0.001,
    dropout=0.1,
)

print(f"\nBest model: {ROOT / 'runs' / 'costco_label_obb' / 'weights' / 'best.pt'}")

In [None]:
# View training curves
from IPython.display import Image, display

results_img = ROOT / "runs" / "costco_label_obb" / "results.png"
if results_img.exists():
    display(Image(filename=str(results_img), width=800))

In [None]:
# Validate on test set
best = YOLO(str(ROOT / "runs" / "costco_label_obb" / "weights" / "best.pt"))
metrics = best.val(data=str(ROOT / "dataset.yaml"), device=0)
print(f"\nmAP50: {metrics.box.map50:.4f}")
print(f"mAP50-95: {metrics.box.map:.4f}")

In [None]:
# Run predictions on a few test images
import random

test_dir = ROOT / "dataset" / "images" / "test"
sample_imgs = random.sample(list(test_dir.glob("*.*")), min(6, len(list(test_dir.glob("*.*")))))

preds = best.predict(source=sample_imgs, imgsz=640, device=0, save=True,
                      project=str(ROOT / "runs"), name="predict_samples", exist_ok=True)

# Display predictions
pred_dir = ROOT / "runs" / "predict_samples"
for img_path in sorted(pred_dir.glob("*.jpg"))[:6]:
    display(Image(filename=str(img_path), width=500))
    print(img_path.name)
    print("---")

In [None]:
# Sanity check: model should NOT fire on blank/noise images
import numpy as np

def count_detections(results, conf=0.5):
    count, max_conf = 0, 0.0
    for r in results:
        if r.obb is not None and len(r.obb):
            confs = r.obb.conf.cpu().numpy()
            count += len(confs[confs >= conf])
            if len(confs): max_conf = max(max_conf, float(confs.max()))
    return count, max_conf

print("Synthetic sanity checks (conf=0.5):")
for name, img in [
    ("Blank white", np.ones((640,640,3), dtype=np.uint8)*255),
    ("Blank black", np.zeros((640,640,3), dtype=np.uint8)),
    ("Random noise", np.random.default_rng(42).integers(0,256,(640,640,3), dtype=np.uint8)),
]:
    res = best.predict(img, conf=0.5, verbose=False)
    n, mc = count_detections(res)
    status = "PASS" if n == 0 else "FAIL"
    print(f"  [{status}] {name}: {n} detections (max conf {mc:.3f})")

# Real test images summary
test_imgs = sorted(test_dir.glob("*.*"))
detected = 0
for p in test_imgs:
    res = best.predict(str(p), conf=0.5, verbose=False)
    n, _ = count_detections(res)
    if n > 0: detected += 1
print(f"\nReal test set: {detected}/{len(test_imgs)} images had detections ({detected/len(test_imgs)*100:.0f}%)")

In [None]:
# Download the best model
from google.colab import files

best_pt = ROOT / "runs" / "costco_label_obb" / "weights" / "best.pt"
files.download(str(best_pt))