In [1]:
import os
import math
import csv
import numpy as np
import rasterio
from rasterio.windows import Window
from rasterio.transform import rowcol
from rasterio import features
import geopandas as gpd
from shapely.geometry import box

In [None]:
# Use your own path!!
BIG_TIF = "/Users/hexinyi/Desktop/temporary/S2_RGB8_2024-02-01_2024-10-31_RGB8.tif"                 
POLY_VEC = "/Users/hexinyi/Desktop/temporary/filterlot.gpkg"                 
ID_FIELD = "OBJECTID"                     

OUT_DIR = "/Users/hexinyi/Downloads/RGBtest4"             
PATCH_SMALL = 256                        
PATCH_LARGE = 512                    
MARGIN_PX = 20                           
TILE_OVERLAP = 256                        

PAD_TO_FULL = False                     

os.makedirs(OUT_DIR, exist_ok=True)

# open big tif
src = rasterio.open(BIG_TIF)
r_crs = src.crs
r_transform = src.transform
r_height, r_width = src.height, src.width
profile_base = src.profile


gdf = gpd.read_file(POLY_VEC)
if gdf.crs is None:
    raise ValueError("No CRS")
if gdf.crs != r_crs:
    gdf = gdf.to_crs(r_crs)


def geom_bbox_px(geom):
    minx, miny, maxx, maxy = geom.bounds  
    r0, c0 = rowcol(r_transform, minx, maxy) 
    r1, c1 = rowcol(r_transform, maxx, miny)
    ymin, ymax = min(r0, r1), max(r0, r1)
    xmin, xmax = min(c0, c1), max(c0, c1)

    xmin -= MARGIN_PX
    ymin -= MARGIN_PX
    xmax += MARGIN_PX
    ymax += MARGIN_PX

    xmin = max(0, xmin)
    ymin = max(0, ymin)
    xmax = min(r_width,  xmax)
    ymax = min(r_height, ymax)

    w = max(0, xmax - xmin)
    h = max(0, ymax - ymin)
    return int(xmin), int(ymin), int(w), int(h)

def write_patch(window, out_path):
    win_w = int(window.width)
    win_h = int(window.height)
    data = src.read(window=window)  # (bands, h, w)

    if PAD_TO_FULL:
        target_w, target_h = window.width, window.height
        if isinstance(target_w, float): target_w = int(target_w)
        if isinstance(target_h, float): target_h = int(target_h)
        pad_w = target_w
        pad_h = target_h
        if data.shape[1] != pad_h or data.shape[2] != pad_w:
            canvas = np.zeros((data.shape[0], pad_h, pad_w), dtype=data.dtype)
            canvas[:, :data.shape[1], :data.shape[2]] = data
            data = canvas
            win_w, win_h = pad_w, pad_h

    transform = rasterio.windows.transform(window, r_transform)
    profile = profile_base.copy()
    profile.update({
        "height": win_h,
        "width":  win_w,
        "transform": transform
    })
    with rasterio.open(out_path, "w", **profile) as dst:
        dst.write(data)

manifest_path = os.path.join(OUT_DIR, "manifest.csv")
with open(manifest_path, "w", newline="") as fcsv:
    writer = csv.writer(fcsv)
    writer.writerow(["patch_path","poly_id","x_off","y_off","width","height",
                     "patch_size","tile_idx"])

    for idx, feat in gdf.iterrows():
        geom = feat.geometry
        poly_id = str(feat.get(ID_FIELD, f"poly_{idx}"))

        xmin, ymin, bw, bh = geom_bbox_px(geom)
        if bw == 0 or bh == 0:
            continue  

        max_dim = max(bw, bh)

        if max_dim <= PATCH_SMALL:
            psize = PATCH_SMALL
            x_off = max(0, min(xmin + bw//2 - psize//2, r_width  - psize))
            y_off = max(0, min(ymin + bh//2 - psize//2, r_height - psize))
            win_w = min(psize, r_width  - x_off)
            win_h = min(psize, r_height - y_off)
            window = Window(x_off, y_off, win_w, win_h)
            out_name = os.path.join(OUT_DIR, 
                                    f"patch_{poly_id}_S_{x_off}_{y_off}.tif")
            write_patch(window, out_name)
            writer.writerow([out_name, poly_id, x_off, y_off, 
                             win_w, win_h, psize, 0])

        elif max_dim <= PATCH_LARGE:
            psize = PATCH_LARGE
            x_off = max(0, min(xmin + bw//2 - psize//2, r_width  - psize))
            y_off = max(0, min(ymin + bh//2 - psize//2, r_height - psize))
            win_w = min(psize, r_width  - x_off)
            win_h = min(psize, r_height - y_off)
            window = Window(x_off, y_off, win_w, win_h)
            out_name = os.path.join(OUT_DIR, 
                                    f"patch_{poly_id}_L_{x_off}_{y_off}.tif")
            write_patch(window, out_name)
            writer.writerow([out_name, poly_id, x_off, y_off, 
                             win_w, win_h, psize, 0])

        else:
            psize = PATCH_LARGE
            stride = psize - TILE_OVERLAP
            tiles = []
            for y_off in range(ymin, ymin + bh, stride):
                for x_off in range(xmin, xmin + bw, stride):
                    x_off_clamp = max(0, min(x_off, r_width  - psize))
                    y_off_clamp = max(0, min(y_off, r_height - psize))
                    win_w = min(psize, r_width  - x_off_clamp)
                    win_h = min(psize, r_height - y_off_clamp)
                    tiles.append((x_off_clamp, y_off_clamp, win_w, win_h))

            seen = set()
            unique_tiles = []
            for t in tiles:
                key = (t[0], t[1], t[2], t[3])
                if key not in seen:
                    seen.add(key)
                    unique_tiles.append(t)

            for ti, (x_off, y_off, win_w, win_h) in enumerate(unique_tiles):
                window = Window(x_off, y_off, win_w, win_h)
                out_name = os.path.join(OUT_DIR, 
                            f"patch_{poly_id}_T{ti:02d}_{x_off}_{y_off}.tif")
                write_patch(window, out_name)
                writer.writerow([out_name, poly_id, x_off, y_off, 
                                 win_w, win_h, psize, ti])

src.close()
print(f"✅ Complete。ouput path: {OUT_DIR}\n📄 manifest: {manifest_path}")


✅ Complete。ouput path: /Users/hexinyi/Downloads/RGBtest3
📄 manifest: /Users/hexinyi/Downloads/RGBtest3/manifest.csv


In [None]:
# add ID
import json
import os
from glob import glob

def add_polygon_ids_to_folder(folder_path, save_folder=None):
    if save_folder is None:
        save_folder = os.path.join(folder_path, "withID")
    os.makedirs(save_folder, exist_ok=True)

    json_files = glob(os.path.join(folder_path, "*.json"))
    print(f"find {len(json_files)} .JSON ")

    for json_path in json_files:
        with open(json_path, "r", encoding="utf-8") as f:
            data = json.load(f)

        for idx, shape in enumerate(data.get("shapes", [])):
            shape["polygon_id"] = idx

        filename = os.path.basename(json_path)
        save_path = os.path.join(save_folder, filename)

        with open(save_path, "w", encoding="utf-8") as f:
            json.dump(data, f, indent=2, ensure_ascii=False)

        print(f"complete ✅ {filename} → {save_path}")

    print("Complete all ✅")

# Use your own path
add_polygon_ids_to_folder("/Users/hexinyi/Desktop/9.17")


find 89 .JSON 
complete ✅ patch_1402030_S_1525_1997.json → /Users/hexinyi/Desktop/9.17/withID/patch_1402030_S_1525_1997.json
complete ✅ patch_1402002_S_1927_3040.json → /Users/hexinyi/Desktop/9.17/withID/patch_1402002_S_1927_3040.json
complete ✅ patch_1402059_S_1131_1230.json → /Users/hexinyi/Desktop/9.17/withID/patch_1402059_S_1131_1230.json
complete ✅ patch_1402216_S_2018_1739.json → /Users/hexinyi/Desktop/9.17/withID/patch_1402216_S_2018_1739.json
complete ✅ patch_1402024_S_1231_2377.json → /Users/hexinyi/Desktop/9.17/withID/patch_1402024_S_1231_2377.json
complete ✅ patch_1402087_S_2022_1940.json → /Users/hexinyi/Desktop/9.17/withID/patch_1402087_S_2022_1940.json
complete ✅ patch_1401983_S_769_2151.json → /Users/hexinyi/Desktop/9.17/withID/patch_1401983_S_769_2151.json
complete ✅ patch_1402364_S_2550_1600.json → /Users/hexinyi/Desktop/9.17/withID/patch_1402364_S_2550_1600.json
complete ✅ patch_1402034_S_988_2142.json → /Users/hexinyi/Desktop/9.17/withID/patch_1402034_S_988_2142.json