
## README / Quick‑Start / Rotation Pipeline

**Directory layout expected**

```
rotation/
└── batches/
    ├── batch_20250115_01/          # <- renamed input folder
    │   ├── images/
    │   │   └── default/*.png
    │   └── annotations/
    │       └── instances_default.json
    └── ...
```

> ⚠️ If your raw data are still in `rotation/batches/images/default`  
> run section **1 – Rename batches** first.



In [2]:

import os, json, shutil, random, math, datetime as dt
from pathlib import Path
from typing import List, Dict, Any
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle, Polygon

import json, copy
import math, collections


plt.rcParams['figure.dpi'] = 140  # sharper inline figs


In [3]:

# Where am I?
print("Working dir :", Path.cwd())

# Show the absolute target
BATCHES_DIR = Path("../data/rotation/batches")               # or Path('/absolute/path/to/project')
print("Batch dir   :", BATCHES_DIR)

# Does it exist?
print("Exists?     :", BATCHES_DIR.exists())
print("Contents    :", list(BATCHES_DIR.iterdir())[:5])  # peek first 5 entries


Working dir : /Users/gerhardkarbeutz/cerpro/ocr-rec-lab/pipeline
Batch dir   : ../data/rotation/batches
Exists?     : True
Contents    : [PosixPath('../data/rotation/batches/.DS_Store'), PosixPath('../data/rotation/batches/rotation_20250708_01'), PosixPath('../data/rotation/batches/rotation_20250708_02')]


In [62]:

def rename_batches(batches_dir: Path, prefix: str = 'rotation', date_fmt: str = '%Y%m%d') -> None:
   
    today = dt.datetime.today().strftime(date_fmt)
    index = 1
    
    print(batches_dir.iterdir())
    print([p for p in batches_dir.iterdir()])
    
    unnamed = []

    
    for p in batches_dir.iterdir():
        print(f'P Name: {p.name}')
        print(f'P Type: {type(p.name)}')
        print(f'Prefix Type: {type(prefix)}')

        pname = p.name
        
        if (p.is_dir() and p.name not in ('images','annotations') and pname.find(prefix) == -1):
            unnamed.append(p)


    #unnamed = [p for p in batches_dir.iterdir() if p.is_dir() and p.name not in ('images','annotations') ]
    
    print(f'unnamed: {unnamed}')

    
    # also handle loose images/annotations sitting directly
    if (batches_dir/'images').exists() and (batches_dir/'annotations').exists():
        unnamed.append(batches_dir)
    if not unnamed:
        print('Nothing to rename – folders already structured ✔️')
        return
    for src in unnamed:
        target = batches_dir/f"{prefix}_{today}_{index:02d}"
        index += 1
        target.mkdir(exist_ok=True)
        for sub in ('images', 'annotations'):
            sub_path = src/sub
            if sub_path.exists():
                shutil.move(str(sub_path), target/ sub)
        # remove empty src folder if it wasn't batches_dir
        if src != batches_dir:
            try:
                src.rmdir()
            except OSError:
                pass
        print(f"Moved {src} -> {target}")


In [61]:


# -- Preview rename without executing (dry run) -------------------------------
# Uncomment to execute
rename_batches(BATCHES_DIR)


<generator object Path.iterdir at 0x1204e8cf0>
[PosixPath('../data/rotation/batches/.DS_Store'), PosixPath('../data/rotation/batches/rotation_20250708_01'), PosixPath('../data/rotation/batches/rotation_20250708_02')]
P Name: .DS_Store
P Type: <class 'str'>
Prefix Type: <class 'str'>
P Name: rotation_20250708_01
P Type: <class 'str'>
Prefix Type: <class 'str'>
P Name: rotation_20250708_02
P Type: <class 'str'>
Prefix Type: <class 'str'>
unnamed: []
Nothing to rename – folders already structured ✔️


## 2 – Explore a COCO JSON file

In [24]:

def load_coco(json_path: Path) -> Dict[str, Any]:
    with open(json_path, 'r', encoding='utf-8') as f:
        coco = json.load(f)
    return coco



def coco_summary(coco: Dict[str, Any]) -> None:
    print(f"Images      : {len(coco['images']):>5}")
    print(f"Annotations : {len(coco['annotations']):>5}")
    print(f"Categories  : {len(coco['categories']):>5}\n")
    cat_map = {c['id']: c['name'] for c in coco['categories']}
    counts = {}
    for ann in coco['annotations']:
        counts[cat_map[ann['category_id']]] = counts.get(cat_map[ann['category_id']], 0) + 1
    print('Top classes:')
    for k, v in sorted(counts.items(), key=lambda kv: kv[1], reverse=True)[:10]:
        print(f"  {k:<25} {v}")

def coco_to_df(coco: Dict[str, Any]) -> pd.DataFrame:
    img_lookup = {img['id']: img for img in coco['images']}
    rows = []
    for ann in coco['annotations']:
        img = img_lookup[ann['image_id']]
        row = {
            'image_id': ann['image_id'],
            'file_name': img['file_name'],
            'width': img['width'],
            'height': img['height'],
            'category_id': ann['category_id'],
            'bbox': ann['bbox'],
            'area': ann.get('area', None),
            'rotation': ann.get('attributes', {}).get('rotation', 0.0),
            'iscrowd': ann.get('iscrowd', 0)
        }
        rows.append(row)
    return pd.DataFrame(rows)


In [25]:
# ────────────────────────────────────────────────────────────────
# 3.  Pick (and sanity-check) a batch folder
# ────────────────────────────────────────────────────────────────
subdirs = sorted(p for p in BATCHES_DIR.iterdir() if p.is_dir())
if not subdirs:
    raise FileNotFoundError(f"No batch folders found in {BATCHES_DIR.resolve()}")

first_batch = subdirs[0]
print("Using batch :", first_batch.name)

# ────────────────────────────────────────────────────────────────
# 4.  Load, summarise, flatten
# ────────────────────────────────────────────────────────────────
json_path = first_batch / "annotations" / "instances_default.json"
coco      = load_coco(json_path)

coco_summary(coco)

df = coco_to_df(coco)
display(df.head())     

Using batch : batch_20250703_01
Images      :    97
Annotations :  4510
Categories  :    13

Top classes:
  text                      4510


Unnamed: 0,image_id,file_name,width,height,category_id,bbox,area,rotation,iscrowd
0,1,10000.png,3055,2160,7,"[2434.51, 533.13, 35.37, 32.47]",1148.4639,314.6,0
1,1,10000.png,3055,2160,7,"[1165.9, 1001.1, 49.4, 18.9]",933.66,0.0,0
2,1,10000.png,3055,2160,7,"[1165.7, 1021.8, 49.74, 19.2]",955.008,0.0,0
3,1,10000.png,3055,2160,7,"[2234.5, 1021.9, 34.1, 19.3]",658.13,0.0,0
4,1,10000.png,3055,2160,7,"[2175.8, 1006.5, 47.5, 37.8]",1795.5,0.0,0


In [26]:
# ────────────────────────────────────────────────────────────────
#  Convert "axis-aligned + angle" COCO → 5-tuple rotated COCO
#  (cx, cy, w, h, θ)  where θ is CLOCKWISE in DEGREES
# ────────────────────────────────────────────────────────────────


def save_coco(coco: Dict[str, Any], path: Path) -> None:
    path.parent.mkdir(parents=True, exist_ok=True)
    with path.open("w", encoding="utf-8") as f:
        json.dump(coco, f, ensure_ascii=False, indent=2)
    print(f"✅  wrote {path.relative_to(BATCHES_DIR)}")

def wrap_angle(angle: float) -> float:
    """Force angle into the [-180, 180) range (optional but tidy)."""
    while angle >= 180.0:
        angle -= 360.0
    while angle < -180.0:
        angle += 360.0
    return angle

def coco_to_obb(coco: Dict[str, Any],
                angle_key: str = "rotation",
                drop_original_angle: bool = False) -> Dict[str, Any]:
    """Return a *new* COCO dict with bbox → (cx, cy, w, h, θ)."""
    obb_coco = copy.deepcopy(coco)  # keep the original dict untouched

    for ann in obb_coco["annotations"]:
        # 1) read axis-aligned box
        x, y, w, h = ann["bbox"]
        # 2) convert to centre-based
        cx = x + w / 2.0
        cy = y + h / 2.0
        # 3) fetch angle  (default = 0 if missing)
        theta = ann.get("attributes", {}).get(angle_key, 0.0)
        theta = wrap_angle(theta)
        # 4) replace bbox
        ann["bbox"] = [cx, cy, w, h, theta]

        # (optional) tidy up the attributes
        if drop_original_angle and "attributes" in ann:
            ann["attributes"].pop(angle_key, None)

    return obb_coco

### !Alle files direkt in die funktion packen und erste chekcen ob die file schon exestiert


In [28]:



# ────────────────────────────────────────────────────────────────
#  USE IT
# ────────────────────────────────────────────────────────────────
# path to *your* original JSON (edit if needed)

in_json  = Path("../data/rotation/batches/batch_20250703_02/annotations/instances_default.json")
out_json = in_json.with_name("instances_updated.json")

coco_ax  = load_coco(in_json)
coco_obb = coco_to_obb(coco_ax,
                       angle_key="rotation",
                       drop_original_angle=False)   # keep the angle for traceability
save_coco(coco_obb, out_json)


✅  wrote batch_20250703_02/annotations/instances_updated.json
