### Cell 1 â€” Imports & Paths

In [None]:
import json, os, random
from dataclasses import dataclass
from typing import List, Dict, Any, Optional, Tuple

import numpy as np
import rasterio
import cv2

RAW_DIR = "data/raw"
JSON_PATH = os.path.join(RAW_DIR, "TeamName_Subplot.json")
RGB_PATH = os.path.join(RAW_DIR, "0626_RGB.tif")

OUT_DIR = "data/processed/debug"
os.makedirs(OUT_DIR, exist_ok=True)

### Subplot record + JSON parser (bbox or polygon)

In [None]:
@dataclass
class SubplotRecord:
    subplot_id: str
    bbox_xyxy: Optional[Tuple[int,int,int,int]] = None   # (x1,y1,x2,y2)
    polygon_xy: Optional[List[Tuple[float,float]]] = None # [(x,y),...]

def _to_int_bbox(b):
    x1,y1,x2,y2 = b
    x1,y1 = int(round(x1)), int(round(y1))
    x2,y2 = int(round(x2)), int(round(y2))
    return (min(x1,x2), min(y1,y2), max(x1,x2), max(y1,y2))

def load_subplots(json_path: str) -> List[SubplotRecord]:
    with open(json_path, "r") as f:
        data = json.load(f)

    # ---- ADAPT THESE KEYS TO YOUR JSON ----
    # Expect something like: {"subplots":[{"id": "...", "bbox":[...]}...]}
    items = data.get("subplots", data if isinstance(data, list) else [])
    records = []
    for it in items:
        sid = str(it.get("subplot_id", it.get("id", it.get("tile_id", ""))))
        if not sid:
            continue

        bbox = None
        poly = None

        if "bbox" in it:
            bbox = _to_int_bbox(it["bbox"])
        elif "bbox_xyxy" in it:
            bbox = _to_int_bbox(it["bbox_xyxy"])
        elif "polygon" in it:
            # polygon is list of [x,y]
            poly = [(float(p[0]), float(p[1])) for p in it["polygon"]]
        elif "polygon_xy" in it:
            poly = [(float(p[0]), float(p[1])) for p in it["polygon_xy"]]

        records.append(SubplotRecord(subplot_id=sid, bbox_xyxy=bbox, polygon_xy=poly))
    return records

subplots = load_subplots(JSON_PATH)
len(subplots), subplots[0]

### Validate + stats

In [None]:
ids = [s.subplot_id for s in subplots]
print("n_subplots:", len(ids), "unique:", len(set(ids)))

bbox_count = sum(1 for s in subplots if s.bbox_xyxy is not None)
poly_count = sum(1 for s in subplots if s.polygon_xy is not None)
print("bbox:", bbox_count, "poly:", poly_count)

### Overlay preview on downsampled RGB

In [None]:
def read_rgb_preview(rgb_path: str, scale: float = 0.1):
    with rasterio.open(rgb_path) as ds:
        H, W = ds.height, ds.width
        out_w, out_h = int(W*scale), int(H*scale)
        # read first 3 bands windowed by decimation (cheap approach: read full then resize if manageable)
        rgb = ds.read([1,2,3])  # (3,H,W)
    img = np.transpose(rgb, (1,2,0))
    img = cv2.resize(img, (out_w, out_h), interpolation=cv2.INTER_AREA)
    return img, (H, W), scale

img, (H,W), scale = read_rgb_preview(RGB_PATH, scale=0.08)
vis = img.copy()

sample = random.sample(subplots, k=min(80, len(subplots)))
for s in sample:
    if s.bbox_xyxy:
        x1,y1,x2,y2 = s.bbox_xyxy
        x1 = int(x1*scale); y1=int(y1*scale); x2=int(x2*scale); y2=int(y2*scale)
        cv2.rectangle(vis, (x1,y1), (x2,y2), (0,255,0), 1)

out_path = os.path.join(OUT_DIR, "preview_subplots.png")
cv2.imwrite(out_path, cv2.cvtColor(vis, cv2.COLOR_RGB2BGR))
out_path