
## README / Quick‑Start / Rotation Pipeline

**Directory layout expected**

```
rotation/
└── batches/
    ├── rotation_2025MMDD_01/          # <- renamed input folder
    │   ├── images/
    |   |   |__ boxes/(all crops)
    │   │   └── default/*.png
    │   └── annotations/
    │       └── instances_default.json
            |__ instances_updaated.json
    └── ...
```

> ⚠️ If your raw data are still in `rotation/batches/images/default`  
> run section **1 – Rename batches** first.



In [9]:
import cv2, os, math, sys
from pathlib import Path
from typing import List, Tuple
import numpy as np
from tqdm import tqdm
from pathlib import Path
from typing import List, Dict, Any
import os, json, shutil, random, math, datetime as dt
import pandas as pd

from multiprocessing import Pool



In [10]:

# Where am I?
print("Working dir :", Path.cwd())

# Show the absolute target
BATCHES_DIR = Path("../data/rotation/batches")               # or Path('/absolute/path/to/project')
print("Batch dir   :", BATCHES_DIR)

# Does it exist?
print("Exists?     :", BATCHES_DIR.exists())
print("Contents    :", list(BATCHES_DIR.iterdir())[:5])  # peek first 5 entries


Working dir : /Users/gerhardkarbeutz/cerpro/ocr-rec-lab/pipeline
Batch dir   : ../data/rotation/batches
Exists?     : True
Contents    : [PosixPath('../data/rotation/batches/rotation_20250708_01'), PosixPath('../data/rotation/batches/rotation_20250708_02')]


In [11]:

def rename_batches(batches_dir: Path, prefix: str = 'rotation', date_fmt: str = '%Y%m%d') -> None:
    today = dt.datetime.today().strftime(date_fmt)
    index = 1
    
    unnamed = []

    for p in batches_dir.iterdir():
        pname = p.name
        
        if (p.is_dir() and p.name not in ('images','annotations') and pname.find(prefix) == -1):
            unnamed.append(p)
    
    
    # also handle loose images/annotations sitting directly
    if (batches_dir/'images').exists() and (batches_dir/'annotations').exists():
        unnamed.append(batches_dir)
    if not unnamed:
        print('Nothing to rename – folders already structured ✔️')
        return
    for src in unnamed:
        target = batches_dir/f"{prefix}_{today}_{index:02d}"
        index += 1
        target.mkdir(exist_ok=True)
        for sub in ('images', 'annotations'):
            sub_path = src/sub
            if sub_path.exists():
                shutil.move(str(sub_path), target/ sub)
        # remove empty src folder if it wasn't batches_dir
        if src != batches_dir:
            try:
                src.rmdir()
            except OSError:
                pass
        print(f"Moved {src} -> {target}")


In [None]:
# Uncomment to execute
#rename_batches(BATCHES_DIR)

In [12]:


def load_coco(json_path: Path) -> Dict[str, Any]:
    with open(json_path, 'r', encoding='utf-8') as f:
        coco = json.load(f)
    return coco


def create_obb_tuple(anns):
    bbox = anns.get("bbox")
    if (
        isinstance(bbox, (list, tuple)) and
        len(bbox) == 4 and
        all(isinstance(v, (int, float)) for v in bbox)
    ):
        x, y, w, h = bbox
        cx = x + w / 2
        cy = y + h / 2
        angle = anns.get("attributes", {}).get("rotation", 0.0)
        anns["bbox"] = [cx, cy, w, h, angle]


def process_annotation(anns):
    create_obb_tuple(anns)
    return anns


def replace_obb(coco, batchdir):
    with Pool() as pool:
        coco['annotations'] = pool.map(process_annotation, coco['annotations'])

    output_path = batchdir / "annotations" / "instances_updated.json"
    with open(output_path, "w", encoding="utf-8") as file:
        json.dump(coco, file, ensure_ascii=False)


def convert_all_batches():
    for p in BATCHES_DIR.iterdir():
        if "rotation" in p.name:
            json_path = p / "annotations" / "instances_default.json"
            replace_obb(load_coco(json_path), p)


if __name__ == "__main__":
    BATCHES_DIR = Path("../data/rotation/batches")  # Adjust if needed
    convert_all_batches()


Process SpawnPoolWorker-181:
Process SpawnPoolWorker-182:
Process SpawnPoolWorker-183:
Traceback (most recent call last):
  File "/Users/gerhardkarbeutz/.pyenv/versions/3.10.0/lib/python3.10/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Users/gerhardkarbeutz/.pyenv/versions/3.10.0/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/gerhardkarbeutz/.pyenv/versions/3.10.0/lib/python3.10/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/Users/gerhardkarbeutz/.pyenv/versions/3.10.0/lib/python3.10/multiprocessing/queues.py", line 368, in get
    return _ForkingPickler.loads(res)
AttributeError: Can't get attribute 'process_annotation' on <module '__main__' (built-in)>
Traceback (most recent call last):
  File "/Users/gerhardkarbeutz/.pyenv/versions/3.10.0/lib/python3.10/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Users/gerhardkarbeutz/.p

KeyboardInterrupt: 

In [None]:
def crop_oriented_bbox(img, cx, cy, w, h, theta):
    # Step 1: Rotate the entire image around the bbox center
    M = cv2.getRotationMatrix2D((cx, cy), theta, 1.0)
    rotated = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]))
    
    # Step 2: Crop the now-aligned rectangle
    x1 = int(cx - w/2)
    y1 = int(cy - h/2)
    x2 = int(cx + w/2)
    y2 = int(cy + h/2)
    
    # Ensure bounds are within image
    x1, y1 = max(0, x1), max(0, y1)
    x2, y2 = min(img.shape[1], x2), min(img.shape[0], y2)
    
    cropped = rotated[y1:y2, x1:x2]
    return cropped

In [None]:
def crop_all_boxes():
    for p in BATCHES_DIR.iterdir():
        if "rotation" in p.name and (p/ "annotations" / "instances_updated.json").exists() and (p/ "images").exists():
            
            image_path = p/ "images" / "default"  
            
            coco = load_coco(p/ "annotations" / "instances_updated.json")
            
            
            DEST_IMG_DIR = Path(f'../data/rotation/batches/{p.name}/images/boxes')
            DEST_IMG_DIR.mkdir(parents=True, exist_ok=True)

            for anns in coco["annotations"]:
                    cx, cy, w, h, theta = anns["bbox"]
                    image_id = anns["image_id"]

                    img_meta = next((img for img in coco["images"] if img["id"] == image_id), None)
                    file_name = img_meta.get('file_name')
                    
                    file_number = file_name.replace('.png', "")
                    
                    img_arr = cv2.imread(image_path / file_name)
                                        
                    
                    rotated_box = crop_oriented_bbox(img_arr, cx, cy, w, h, theta)
                    
                    cv2.imwrite(f'{p}/images/boxes/{file_number}_{anns["id"]}.png', rotated_box)


        
            
crop_all_boxes()

In [None]:
import cv2
import numpy as np

def rotate_patch(patch, angle):
    h, w = patch.shape[:2]
    M = cv2.getRotationMatrix2D((w/2, h/2), angle, 1.0)
    cos, sin = abs(M[0,0]), abs(M[0,1])
    new_w, new_h = int(h*sin + w*cos), int(h*cos + w*sin)
    M[0,2] += new_w/2 - w/2
    M[1,2] += new_h/2 - h/2

        # Kein Alphakanal → normal mit weißem Hintergrund
    rotated = cv2.warpAffine(
        patch,
        M,
        (new_w, new_h),
        flags=cv2.INTER_LINEAR,
        borderMode=cv2.BORDER_CONSTANT,
        borderValue=(255, 255, 255))
        

    return rotated


In [None]:
img_arr = cv2.imread("../data/rotation/batches/rotation_20250708_01/images/boxes/10000_1.png")


ANGLES: List[int]   = [0, 90, 180, 270,]


def rotate_all_batches():
    for p in BATCHES_DIR.iterdir():
        if 'rotation' in p.name:
            
            boxes_file = p / 'images' / 'boxes'
            for box_path in sorted(boxes_file.iterdir()):
                img_arr = cv2.imread(str(box_path), cv2.IMREAD_UNCHANGED)
                if img_arr is None:
                    print(f"Could not load {box_path}")
                    continue

                box_nr = box_path.stem
                for angle in ANGLES:
                    rotated_box = rotate_patch(img_arr, angle)
                    out_path = boxes_file / f"{box_nr}_{angle}.png"
                    cv2.imwrite(str(out_path), rotated_box)

        
        
rotate_all_batches()

## Alle Klassen in train/test umschreiben 

In [None]:
import random
from typing import List


def organize_into_classes(dataset_path, out_base_path, train_ratio=0.8):
    dataset_path = Path(dataset_path)
    out_base_path = Path(out_base_path)

    for split in ['train', 'test']:
        for angle in ANGLES:
            (out_base_path / split / str(angle)).mkdir(parents=True, exist_ok=True)

    for img_file in dataset_path.glob("*.png"):
        for angle in ANGLES:
            if f"_{angle}.png" in img_file.name:
                split = "train" if random.random() < train_ratio else "test"
                target_dir = out_base_path / split / str(angle)
                shutil.copy(img_file, target_dir / img_file.name)


In [None]:
organize_into_classes(
    dataset_path='../data/rotation/batches/rotation_20250708_01/images/boxes/',
    out_base_path=Path("../data/rotation/classification"),
    train_ratio=0.8
)