In [2]:
import json
import logging
from pathlib import Path
import cv2
import numpy as np
import shutil
import random
import re
from typing import Dict, Any, List

# Configure logging for better debug information
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Base directory for batches
BATCHES_DIR = Path("../data/rotation/batches")
# Predefined angles for rotation
ANGLES: List[int] = [0, 90, 180, 270]

In [None]:


def load_coco(json_path: Path) -> Dict[str, Any]:
    """
    Load a COCO-format JSON from disk.
    """
    logger.info(f"Loading COCO JSON from {json_path}")
    with open(json_path, 'r', encoding='utf-8') as f:
        coco = json.load(f)
    return coco


def create_obb_tuple(anns: Dict[str, Any]) -> None:
    """
    Convert axis-aligned bbox [x, y, w, h] to oriented bbox [cx, cy, w, h, angle].
    Added validation and debug logging in place of print statements.
    """
    bbox = anns.get("bbox")
    if isinstance(bbox, list) and len(bbox) == 4:
        x, y, w, h = bbox
        cx = x + w / 2
        cy = y + h / 2
        angle = anns.get("attributes", {}).get("rotation", 0.0)
        anns["bbox"] = [cx, cy, w, h, angle]
        logger.debug(f"Converted bbox to OBB: {anns['bbox']} for annotation id {anns.get('id')}")
    else:
        logger.warning(f"Unexpected bbox format for annotation id {anns.get('id')}: {bbox}")

In [None]:
def replace_obb(coco: Dict[str, Any], batch_dir: Path) -> None:
    """
    Apply create_obb_tuple to all annotations and write updated JSON efficiently.
    """
    logger.info("Replacing OBBs in annotations")
    for anns in coco.get('annotations', []):
        create_obb_tuple(anns)

    out_path = batch_dir / "annotations" / "instances_updated.json"
    logger.info(f"Writing updated annotations to {out_path}")
    with open(out_path, 'w', encoding='utf-8') as f:
        # Use json.dump for more efficient writing
        json.dump(coco, f, ensure_ascii=False)


def convert_all_batches() -> None:
    """
    Iterate over all batch directories and update OBBs.
    """
    for p in BATCHES_DIR.iterdir():
        if "rotation" in p.name:
            json_path = p / "annotations" / "instances_default.json"
            if json_path.exists():
                logger.info(f"Processing batch: {p.name}")
                coco = load_coco(json_path)
                replace_obb(coco, p)
            else:
                logger.warning(f"Missing JSON at {json_path}")


def crop_oriented_bbox(img: np.ndarray, cx: float, cy: float, w: float, h: float, theta: float) -> np.ndarray:
    """
    Rotate the image around (cx, cy) by theta then crop the axis-aligned box.
    """
    M = cv2.getRotationMatrix2D((cx, cy), theta, 1.0)
    rotated = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]))

    x1 = int(cx - w/2)
    y1 = int(cy - h/2)
    x2 = int(cx + w/2)
    y2 = int(cy + h/2)
    x1, y1 = max(0, x1), max(0, y1)
    x2, y2 = min(img.shape[1], x2), min(img.shape[0], y2)

    return rotated[y1:y2, x1:x2]


In [None]:






def crop_all_boxes() -> None:
    """
    Crop all oriented bounding boxes for each batch, caching images and using a lookup dict for metadata.
    """
    for p in BATCHES_DIR.iterdir():
        if "rotation" in p.name:
            ann_path = p / "annotations" / "instances_updated.json"
            img_dir = p / "images" / "default"
            if not ann_path.exists() or not img_dir.exists():
                continue

            logger.info(f"Cropping boxes for batch {p.name}")
            coco = load_coco(ann_path)

            # Pre-build image metadata lookup for O(1) access
            image_lookup = {img["id"]: img for img in coco.get("images", [])}
            # Cache for loaded images to avoid redundant I/O
            img_cache: Dict[str, np.ndarray] = {}

            dest_dir = p / "images" / "boxes"
            dest_dir.mkdir(parents=True, exist_ok=True)

            for anns in coco.get("annotations", []):
                cx, cy, w, h, theta = anns["bbox"]
                image_id = anns["image_id"]
                img_meta = image_lookup.get(image_id)
                if not img_meta:
                    logger.warning(f"No image metadata for id {image_id}")
                    continue

                file_name = img_meta.get("file_name")
                if file_name not in img_cache:
                    path = img_dir / file_name
                    img = cv2.imread(str(path))
                    if img is None:
                        logger.error(f"Failed to load image {path}")
                        continue
                    img_cache[file_name] = img

                img_arr = img_cache[file_name]
                rotated_box = crop_oriented_bbox(img_arr, cx, cy, w, h, theta)

                out_file = dest_dir / f"{Path(file_name).stem}_{anns.get('id')}.png"
                cv2.imwrite(str(out_file), rotated_box)
                logger.debug(f"Wrote cropped box to {out_file}")


def rotate_patch(patch: np.ndarray, angle: int) -> np.ndarray:
    """
    Rotate a patch by angle, preserving alpha if present.
    """
    h, w = patch.shape[:2]
    M = cv2.getRotationMatrix2D((w/2, h/2), angle, 1.0)
    cos, sin = abs(M[0,0]), abs(M[0,1])
    new_w = int(h*sin + w*cos)
    new_h = int(h*cos + w*sin)
    M[0,2] += new_w/2 - w/2
    M[1,2] += new_h/2 - h/2

    # Choose border value based on alpha channel presence
    border = (0,0,0,0) if patch.shape[2] == 4 else (255,255,255)
    return cv2.warpAffine(
        patch,
        M,
        (new_w, new_h),
        flags=cv2.INTER_LINEAR,
        borderMode=cv2.BORDER_CONSTANT,
        borderValue=border
    )


In [None]:


def rotate_all_batches() -> None:
    """
    Rotate each cropped box by predefined angles, skipping existing outputs to save I/O.
    """
    for p in BATCHES_DIR.iterdir():
        if "rotation" in p.name:
            boxes_dir = p / "images" / "boxes"
            if not boxes_dir.exists():
                continue

            logger.info(f"Rotating all boxes in {p.name}")
            for box_path in sorted(boxes_dir.iterdir()):
                img_arr = cv2.imread(str(box_path), cv2.IMREAD_UNCHANGED)
                if img_arr is None:
                    logger.error(f"Failed to load box image {box_path}")
                    continue

                stem = box_path.stem
                for angle in ANGLES:
                    out_path = boxes_dir / f"{stem}_{angle}.png"
                    # Skip if already exists
                    if out_path.exists():
                        logger.debug(f"Skipping existing file {out_path}")
                        continue

                    rotated = rotate_patch(img_arr, angle)
                    cv2.imwrite(str(out_path), rotated)
                    logger.debug(f"Wrote rotated box {out_path}")


def organize_into_classes(dataset_path: str, out_base_path: str, train_ratio: float=0.8) -> None:
    """
    Organize images into train/test splits and class folders by angle using regex matching.
    """
    dataset_path = Path(dataset_path)
    out_base_path = Path(out_base_path)

    # Prepare output dirs
    for split in ("train", "test"):
        for angle in ANGLES:
            (out_base_path / split / str(angle)).mkdir(parents=True, exist_ok=True)

    # Regex to extract angle suffix
    pattern = re.compile(r".*_(\d+)\.png$")

    for img_file in dataset_path.glob("*.png"):
        match = pattern.match(img_file.name)
        if not match:
            continue
        angle = int(match.group(1))
        split = "train" if random.random() < train_ratio else "test"
        target = out_base_path / split / str(angle) / img_file.name
        shutil.copy(img_file, target)
        logger.debug(f"Copied {img_file} to {target}")


if __name__ == "__main__":
    convert_all_batches()
    crop_all_boxes()
    rotate_all_batches()


INFO:__main__:Processing batch: rotation_20250715_02
INFO:__main__:Loading COCO JSON from ../data/rotation/batches/rotation_20250715_02/annotations/instances_default.json
INFO:__main__:Replacing OBBs in annotations
INFO:__main__:Writing updated annotations to ../data/rotation/batches/rotation_20250715_02/annotations/instances_updated.json
INFO:__main__:Processing batch: rotation_20250715_01
INFO:__main__:Loading COCO JSON from ../data/rotation/batches/rotation_20250715_01/annotations/instances_default.json
INFO:__main__:Replacing OBBs in annotations
INFO:__main__:Writing updated annotations to ../data/rotation/batches/rotation_20250715_01/annotations/instances_updated.json
INFO:__main__:Cropping boxes for batch rotation_20250715_02
INFO:__main__:Loading COCO JSON from ../data/rotation/batches/rotation_20250715_02/annotations/instances_updated.json
INFO:__main__:Cropping boxes for batch rotation_20250715_01
INFO:__main__:Loading COCO JSON from ../data/rotation/batches/rotation_20250715_