- From the YOLO detection results, the centroid (in pixel coordinates) and prediction_id of each object are extracted.
- For each object, a 3×3 tile patch (covering a 3072×3072 area) is constructed, centered around the tile containing the object.
- A 320×320 image is then cropped from this larger canvas, centered on the object’s centroid.
- Each cropped image is saved as a .png file.
- These cropped images will be used as inputs for the U-Net prediction model.

**PNG format only**

In [2]:
import os
import pandas as pd
import numpy as np
import rasterio
from tqdm import tqdm
from PIL import Image 

TILE_SIZE = 1024
CROP_SIZE = 320

TILE_FOLDER = "/shared/data/climateplus2025/Prediction_for_poster_July21/CapeTown_Image_2023_tiles_1024_for_prediction"
CSV_PATH = "/shared/data/climateplus2025/Prediction_for_poster_July21/processed_centroids_and_bbox.csv"
OUTPUT_FOLDER = "/shared/data/climateplus2025/Prediction_for_poster_July21/Cropped_Images_320_centered_from_YOLO_for_unet_prediction"

os.makedirs(OUTPUT_FOLDER, exist_ok=True)


df = pd.read_csv(CSV_PATH)


def parse_centroid(centroid_str):
    centroid_str = centroid_str.strip("()")
    x, y = map(float, centroid_str.split(","))
    return x, y


def get_tile_neighbors(base_name, tile_x, tile_y):
    offsets = {
        'topleft': (-1, -1), 'top': (0, -1), 'topright': (1, -1),
        'left': (-1, 0),     'center': (0, 0), 'right': (1, 0),
        'bottomleft': (-1, 1), 'bottom': (0, 1), 'bottomright': (1, 1)
    }
    neighbors = {
        key: f"{base_name}_tile_{tile_x + dx * TILE_SIZE}_{tile_y + dy * TILE_SIZE}.tif"
        for key, (dx, dy) in offsets.items()
    }
    return neighbors


def load_tile_or_black(tile_path):
    if os.path.exists(tile_path):
        with rasterio.open(tile_path) as src:
            return src.read()
    else:
        return np.zeros((3, TILE_SIZE, TILE_SIZE), dtype=np.uint8)


position_map = {
    "topleft": (0, 0), "top": (0, 1), "topright": (0, 2),
    "left": (1, 0),    "center": (1, 1), "right": (1, 2),
    "bottomleft": (2, 0), "bottom": (2, 1), "bottomright": (2, 2),
}


for idx, row in tqdm(df.iterrows(), total=len(df), desc="Processing crops"):
    pred_id = row["prediction_id"]
    tile_name = row["image_name"]
    cx, cy = parse_centroid(row["pixel_centroid"])

    try:
        base_name, tile_pos = tile_name.split("_tile_")
        tile_x, tile_y = map(int, tile_pos.replace(".tif", "").split("_"))

        neighbors = get_tile_neighbors(base_name, tile_x, tile_y)

        canvas = np.zeros((3, TILE_SIZE * 3, TILE_SIZE * 3), dtype=np.uint8)

        
        center_tile_path = os.path.join(TILE_FOLDER, neighbors["center"])
        with rasterio.open(center_tile_path) as src:
            _ = src.transform  
            _ = src.crs


        for key, (i, j) in position_map.items():
            tile_path = os.path.join(TILE_FOLDER, neighbors[key])
            tile = load_tile_or_black(tile_path)
            canvas[:, i * TILE_SIZE:(i + 1) * TILE_SIZE, j * TILE_SIZE:(j + 1) * TILE_SIZE] = tile

    
        stitched_cx = TILE_SIZE + cx
        stitched_cy = TILE_SIZE + cy

        half_crop = CROP_SIZE // 2
        x0 = int(stitched_cx - half_crop)
        y0 = int(stitched_cy - half_crop)
        x1 = x0 + CROP_SIZE
        y1 = y0 + CROP_SIZE

       
        crop = np.zeros((3, CROP_SIZE, CROP_SIZE), dtype=np.uint8)

        src_x0 = max(0, x0)
        src_y0 = max(0, y0)
        src_x1 = min(canvas.shape[2], x1)
        src_y1 = min(canvas.shape[1], y1)

        dst_x0 = src_x0 - x0
        dst_y0 = src_y0 - y0
        dst_x1 = dst_x0 + (src_x1 - src_x0)
        dst_y1 = dst_y0 + (src_y1 - src_y0)

        crop[:, dst_y0:dst_y1, dst_x0:dst_x1] = canvas[:, src_y0:src_y1, src_x0:src_x1]

      
        crop_rgb = np.transpose(crop, (1, 2, 0))  # (C, H, W) → (H, W, C)
        save_path = os.path.join(OUTPUT_FOLDER, f"{pred_id}.png")
        Image.fromarray(crop_rgb).save(save_path)

    except Exception as e:
        print(f"Error in {pred_id}: {e}")
        continue

print(f"\n All crops saved as PNG to: {OUTPUT_FOLDER}")


Processing crops: 100%|██████████| 810/810 [02:59<00:00,  4.52it/s]


 All crops saved as PNG to: /shared/data/climateplus2025/Prediction_for_poster_July21/Cropped_Images_320_centered_from_YOLO_for_unet_prediction





**(Optional) PNG and TIF**

In [None]:
import os
import pandas as pd
import numpy as np
import rasterio
from rasterio.transform import Affine
from tqdm import tqdm

# Configuration
TILE_SIZE = 1024
CROP_SIZE = 320

TILE_FOLDER = "/shared/data/climateplus2025/CapeTown_Image_2023_tiles_1024_for_prediction"
CSV_PATH = "/shared/data/climateplus2025/Prediction_for_poster/processed_centroids_and_bbox.csv"
OUTPUT_FOLDER = "/shared/data/climateplus2025/Cropped_Images_320_centered_from_YOLO_for_unet_prediction"

os.makedirs(OUTPUT_FOLDER, exist_ok=True)

# Load centroid CSV
df = pd.read_csv(CSV_PATH)

def parse_centroid(centroid_str):
    centroid_str = centroid_str.strip("()")
    x, y = map(float, centroid_str.split(","))
    return x, y

def get_tile_neighbors(base_name, tile_x, tile_y):
    offsets = {
        'topleft': (-1, -1), 'top': (0, -1), 'topright': (1, -1),
        'left': (-1, 0),     'center': (0, 0), 'right': (1, 0),
        'bottomleft': (-1, 1), 'bottom': (0, 1), 'bottomright': (1, 1)
    }
    neighbors = {
        key: f"{base_name}_tile_{tile_x + dx * TILE_SIZE}_{tile_y + dy * TILE_SIZE}.tif"
        for key, (dx, dy) in offsets.items()
    }
    return neighbors

def load_tile_or_black(tile_path):
    if os.path.exists(tile_path):
        with rasterio.open(tile_path) as src:
            return src.read()
    else:
        return np.zeros((3, TILE_SIZE, TILE_SIZE), dtype=np.uint8)

position_map = {
    "topleft": (0, 0), "top": (0, 1), "topright": (0, 2),
    "left": (1, 0),    "center": (1, 1), "right": (1, 2),
    "bottomleft": (2, 0), "bottom": (2, 1), "bottomright": (2, 2),
}

for idx, row in tqdm(df.iterrows(), total=len(df), desc="Processing crops"):
    pred_id = row["prediction_id"]
    tile_name = row["image_name"]
    cx, cy = parse_centroid(row["pixel_centroid"])

    try:
        base_name, tile_pos = tile_name.split("_tile_")
        tile_x, tile_y = map(int, tile_pos.replace(".tif", "").split("_"))

        neighbors = get_tile_neighbors(base_name, tile_x, tile_y)

        canvas = np.zeros((3, TILE_SIZE * 3, TILE_SIZE * 3), dtype=np.uint8)

        # Use center tile's CRS and transform for later geo info
        center_tile_path = os.path.join(TILE_FOLDER, neighbors["center"])
        with rasterio.open(center_tile_path) as src:
            base_transform = src.transform
            base_crs = src.crs

        for key, (i, j) in position_map.items():
            tile_path = os.path.join(TILE_FOLDER, neighbors[key])
            tile = load_tile_or_black(tile_path)
            canvas[:, i * TILE_SIZE:(i + 1) * TILE_SIZE, j * TILE_SIZE:(j + 1) * TILE_SIZE] = tile

        stitched_cx = TILE_SIZE + cx
        stitched_cy = TILE_SIZE + cy

        half_crop = CROP_SIZE // 2
        x0 = int(stitched_cx - half_crop)
        y0 = int(stitched_cy - half_crop)
        x1 = x0 + CROP_SIZE
        y1 = y0 + CROP_SIZE

        # Padding-aware crop
        crop = np.zeros((3, CROP_SIZE, CROP_SIZE), dtype=np.uint8)

        src_x0 = max(0, x0)
        src_y0 = max(0, y0)
        src_x1 = min(canvas.shape[2], x1)
        src_y1 = min(canvas.shape[1], y1)

        dst_x0 = src_x0 - x0
        dst_y0 = src_y0 - y0
        dst_x1 = dst_x0 + (src_x1 - src_x0)
        dst_y1 = dst_y0 + (src_y1 - src_y0)

        crop[:, dst_y0:dst_y1, dst_x0:dst_x1] = canvas[:, src_y0:src_y1, src_x0:src_x1]

        # Compute accurate transform for the cropped area
        offset_x = x0 - TILE_SIZE  # because center tile is in (1,1)
        offset_y = y0 - TILE_SIZE
        crop_transform = base_transform * Affine.translation(offset_x, offset_y)

        # Save the padded crop with geo metadata
        save_path = os.path.join(OUTPUT_FOLDER, f"{pred_id}.tif")
        profile = {
            "driver": "GTiff",
            "height": CROP_SIZE,
            "width": CROP_SIZE,
            "count": 3,
            "dtype": crop.dtype,
            "crs": base_crs,
            "transform": crop_transform,
            "compress": "lzw"
        }

        with rasterio.open(save_path, "w", **profile) as dst:
            dst.write(crop)

    except Exception as e:
        print(f"Error in {pred_id}: {e}")
        continue

print(f"\nAll crops saved to: {OUTPUT_FOLDER} (GeoTIFF with padding and metadata)")
