In [None]:
import os
import json
import math
import cv2
import numpy as np
from pathlib import Path
from tqdm import tqdm

ROOT = "/mnt/Data-Work-RE/26_Agricultural_Engineering-RE/263_DP/00_Darwin/digital-production"
TARGET_WIDTH = 1024
TARGET_HEIGHT = 678

In [None]:

def pad_image(image, target_w, target_h):
    h, w = image.shape[:2]
    pad_bottom = target_h - h if h < target_h else 0
    pad_right = target_w - w if w < target_w else 0
    return cv2.copyMakeBorder(image, 0, pad_bottom, 0, pad_right, cv2.BORDER_CONSTANT, value=(0, 0, 0))

def tile_and_save(image_path, annotation_path, out_image_dir, out_ann_dir):
    with open(annotation_path) as f:
        ann_data = json.load(f)

    img = cv2.imread(image_path)
    h, w = img.shape[:2]
    base_name = Path(image_path).stem

    num_cols = math.ceil(w / TARGET_WIDTH)
    num_rows = math.ceil(h / TARGET_HEIGHT)

    for row in range(num_rows):
        for col in range(num_cols):
            x0 = col * TARGET_WIDTH
            y0 = row * TARGET_HEIGHT
            x1 = min(x0 + TARGET_WIDTH, w)
            y1 = min(y0 + TARGET_HEIGHT, h)

            tile_width = x1 - x0
            tile_height = y1 - y0

            # Skip tiles smaller than 50px in any dimension
            if tile_width < 50 or tile_height < 50:
                continue

            # Collect relevant annotations for this tile (rumex_plant only)
            new_annotations = []
            for anno in ann_data.get("annotations", []):
                if anno.get("name") != "rumex_plant":
                    continue  # skip non-rumex classes

                bb = anno["bounding_box"]
                bb_x, bb_y = bb["x"], bb["y"]
                bb_w, bb_h = bb["w"], bb["h"]

                # Check if top-left corner of bbox is inside tile
                if (x0 <= bb_x < x1) and (y0 <= bb_y < y1):
                    new_bb = {
                        "x": bb_x - x0,
                        "y": bb_y - y0,
                        "w": bb_w,
                        "h": bb_h
                    }
                    new_anno = anno.copy()
                    new_anno["bounding_box"] = new_bb
                    new_annotations.append(new_anno)

            # Skip if no rumex_plant annotations are in this tile
            if not new_annotations:
                continue

            # Save padded tile
            tile = img[y0:y1, x0:x1]
            tile = pad_image(tile, TARGET_WIDTH, TARGET_HEIGHT)
            tile_name = f"{base_name}_{col}_{row}.png"
            tile_path = os.path.join(out_image_dir, tile_name)
            cv2.imwrite(tile_path, tile)

            # Create annotation JSON
            new_ann_data = ann_data.copy()
            new_ann_data["item"]["name"] = tile_name
            if "item" in new_ann_data and "slots" in new_ann_data["item"]:
                new_ann_data["item"]["slots"][0]["width"] = TARGET_WIDTH
                new_ann_data["item"]["slots"][0]["height"] = TARGET_HEIGHT
            new_ann_data["annotations"] = new_annotations

            tile_ann_path = os.path.join(out_ann_dir, f"{base_name}_{col}_{row}.json")
            with open(tile_ann_path, "w") as f:
                json.dump(new_ann_data, f, indent=2)

def process_dataset(dataset_name):
    img_dir = os.path.join(ROOT, dataset_name, "images")
    ann_dir = os.path.join(ROOT, dataset_name, "releases", "1", "annotations")

    out_img_dir = os.path.join(ROOT, dataset_name, "images_splitted")
    print(out_img_dir)
    out_ann_dir = os.path.join(ROOT, dataset_name, "releases", "1", "annotations_splitted")
    print(out_ann_dir)

    os.makedirs(out_img_dir, exist_ok=True)
    os.makedirs(out_ann_dir, exist_ok=True)

    for ann_file in tqdm(os.listdir(ann_dir)):
        if ann_file.endswith(".json"):
            annotation_path = os.path.join(ann_dir, ann_file)
            with open(annotation_path) as f:
                data = json.load(f)
            image_name = data["item"]["name"]
            image_path = os.path.join(img_dir, image_name)
            if os.path.exists(image_path):
                tile_and_save(image_path, annotation_path, out_img_dir, out_ann_dir)


In [None]:
# Example usage
process_dataset("lightly")

In [None]:
process_dataset("haldennord10")

In [None]:
process_dataset("bildacher")