In [76]:
# ==============================================================
# 1) Setup & imports — Vectorization of monthly GEE mosaics
#    (MEM fix for GDAL/Rasterio included)
# ==============================================================

# If needed in your ArcGIS Pro conda kernel:
# !pip install -q rasterio geopandas shapely pyproj numpy pandas tqdm

from pathlib import Path
import re
from typing import Dict, List, Tuple, Optional

import os
import numpy as np
import pandas as pd
from tqdm import tqdm

import rasterio
from rasterio.mask import mask as rio_mask
from rasterio.features import shapes as rio_shapes
from rasterio.env import Env

import geopandas as gpd
from shapely.geometry import shape as shp_shape, mapping
from shapely.ops import unary_union, transform as shp_transform
from shapely import geometry as sgeom

from pyproj import CRS, Transformer, Geod
from contextlib import contextmanager

# --- Enable GDAL MEM 'DATAPOINTER' open (required by rasterio.mask/shapes on newer GDAL)
os.environ["GDAL_MEM_ENABLE_OPEN"] = "YES"

@contextmanager
def mem_ok():
    """Context manager: ensure GDAL MEM open is allowed for operations that need it."""
    with Env(GDAL_MEM_ENABLE_OPEN='YES'):
        yield


In [88]:
# ==============================================================
# 2) User settings — paths, options, outputs
# ==============================================================

# --- INPUTS ---
MOSAIC_DIR = Path(r"C:\Users\ibana\Desktop\JRC_Tanganica\GIS_Intermediate\Intermediate_files\SurfaceWater\GEE_SW\mosaics")
AOI_DIR    = Path(r"C:\Users\ibana\Desktop\JRC_Tanganica\GIS_Intermediate\Intermediate_files\SurfaceWater\aoi")

# HydroLAKES AOI shapefiles
AOI_FILES = {
    "Tanganyika": AOI_DIR / "HydroLAKES_polys_v10_Tanganyika.shp",
    "Kivu":       AOI_DIR / "HydroLAKES_polys_v10_Kivu.shp",
}

# --- OUTPUTS ---
VECTORS_ROOT = Path(r"C:\Users\ibana\Desktop\JRC_Tanganica\GIS_Intermediate\Intermediate_files\SurfaceWater\vectors")
OUT_SUBDIR   = VECTORS_ROOT / "GEE_SW"   # DEM_SW reserved for later steps
(OUT_SUBDIR / "Tanganyika").mkdir(parents=True, exist_ok=True)
(OUT_SUBDIR / "Kivu").mkdir(parents=True, exist_ok=True)

# --- PROCESSING SWITCHES ---
MODE = "BY_LAKE"            # "BY_LAKE" (clip by HydroLAKES+buffer) or "FULL" (entire raster)
BUFFER_KM = 5.0             # buffer around AOIs (km)
OVERWRITE = False
OUTPUT_FMT = "SHP"         # "GPKG" (recommended) or "SHP"
DISSOLVE_TO_SINGLE = False   # dissolve all parts into a single (multi)polygon
MIN_POLY_AREA_KM2 = 2000     # drop polygons smaller than this area (km²); 0 keeps all
CONNECTIVITY = 8            # 4 or 8 connectivity for polygonization
ALL_TOUCHED = False         # if True, treat any touched pixel as inside during polygonization
DIAG_VERBOSE = True         # print reshape/polygonization diagnostics

# Single-run testing: set e.g. "2024_07" to process only that month; or None to process all
RUN_ONLY_YYYY_MM = None     # e.g., "2024_07" or None

# Output filename template (extension set by driver)
OUT_NAME_TMPL = "sw_{YYYY}_{MM}_{LAKE}"

# Geodesic calculator on WGS84
GEOD = Geod(ellps="WGS84")


In [89]:
# ==============================================================
# 3) Helpers — parse dates, local equal-area buffering, geodesic area
# ==============================================================

def parse_yyyymm_from_name(path: Path) -> Tuple[Optional[int], Optional[int]]:
    """
    Expect: sw_YYYY_MM.tif  → returns (YYYY, MM)
    """
    m = re.match(r"sw_(\d{4})_(\d{2})\.tif$", path.name, flags=re.IGNORECASE)
    if not m:
        return None, None
    return int(m.group(1)), int(m.group(2))

def local_laea_crs(geom: sgeom.base.BaseGeometry) -> CRS:
    """Local Lambert Azimuthal Equal-Area, centered at geometry centroid (meters)."""
    c = geom.centroid
    return CRS.from_proj4(f"+proj=laea +lat_0={c.y} +lon_0={c.x} +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs")

def buffer_km_geodesic(aoi_wgs84: sgeom.base.BaseGeometry, buffer_km: float) -> sgeom.base.BaseGeometry:
    """
    Buffer AOI by buffer_km using local LAEA to preserve metric distances, then back to WGS84.
    """
    if buffer_km <= 0:
        return aoi_wgs84

    laea = local_laea_crs(aoi_wgs84)
    to_laea = Transformer.from_crs("EPSG:4326", laea, always_xy=True).transform
    to_wgs  = Transformer.from_crs(laea, "EPSG:4326", always_xy=True).transform

    aoi_laea = shp_transform(to_laea, aoi_wgs84)
    aoi_buf  = aoi_laea.buffer(buffer_km * 1000.0)
    return shp_transform(to_wgs, aoi_buf)

def geodesic_area_km2(geom_wgs84: sgeom.base.BaseGeometry) -> float:
    """
    Geodesic area on WGS84 ellipsoid (km²). Works for Polygon/MultiPolygon.
    """
    if geom_wgs84.is_empty:
        return 0.0

    def ring_area(coords):
        lons, lats = zip(*coords)
        area_m2, _ = GEOD.polygon_area_perimeter(lons, lats)
        return abs(area_m2) / 1e6  # m² → km²

    if isinstance(geom_wgs84, sgeom.Polygon):
        A = ring_area(list(geom_wgs84.exterior.coords))
        for hole in geom_wgs84.interiors:
            A -= ring_area(list(hole.coords))
        return max(0.0, A)

    if isinstance(geom_wgs84, sgeom.MultiPolygon):
        return sum(geodesic_area_km2(p) for p in geom_wgs84.geoms)

    # Fallback: take polygonal part
    poly = geom_wgs84.buffer(0)
    if isinstance(poly, (sgeom.Polygon, sgeom.MultiPolygon)):
        return geodesic_area_km2(poly)
    return 0.0



In [90]:
# ==============================================================
# 4) AOIs — load HydroLAKES, union, buffer (+5 km), WGS84
# ==============================================================

def load_aoi_union(aoi_path: Path) -> sgeom.base.BaseGeometry:
    gdf = gpd.read_file(aoi_path).to_crs("EPSG:4326")
    geom = unary_union(gdf.geometry)
    # normalize & fix minor invalidities
    geom = shp_shape(mapping(geom))
    geom = geom.buffer(0)
    return geom

AOIs_WGS84: Dict[str, sgeom.base.BaseGeometry] = {}
AOIs_BUFFERED: Dict[str, sgeom.base.BaseGeometry] = {}
for lake, p in AOI_FILES.items():
    base = load_aoi_union(p)
    AOIs_WGS84[lake] = base
    AOIs_BUFFERED[lake] = buffer_km_geodesic(base, BUFFER_KM)

print("Prepared buffered AOIs (km²):",
      {k: f"{geodesic_area_km2(v):.1f}" for k, v in AOIs_BUFFERED.items()})


Prepared buffered AOIs (km²): {'Tanganyika': '41200.9', 'Kivu': '4634.2'}


In [91]:
# ==============================================================
# 5) Core polygonization helpers — binarize & polygonize (MEM-safe, 2D-safe)
# ==============================================================

def raster_to_binary_water(arr_masked: np.ma.MaskedArray) -> np.ndarray:
    """
    Define water as: valid & value != 0  → uint8 mask {1=water, 0=else}.
    """
    valid = ~arr_masked.mask
    data  = arr_masked.filled(0)
    water = valid & (data != 0)
    return water.astype(np.uint8)

def _to_2d_uint8(arr: np.ndarray, ctx: str = "") -> Optional[np.ndarray]:
    """
    Coerce any array to a contiguous 2-D uint8 array (0/1).
    Returns None if array is empty or cannot be made 2-D.
    """
    a = np.asarray(arr)
    if a.ndim == 3 and a.shape[0] == 1:
        a = a[0]
    elif a.ndim != 2:
        a = np.squeeze(a)
    if a.ndim != 2:
        if DIAG_VERBOSE:
            print(f"[polygonize] Non-2D array in {ctx}: shape={getattr(a,'shape',None)} → skipping (empty output).")
        return None
    if a.size == 0 or a.shape[0] == 0 or a.shape[1] == 0:
        if DIAG_VERBOSE:
            print(f"[polygonize] Empty array in {ctx}: shape={a.shape} → skipping (empty output).")
        return None
    # Ensure 0/1 and contiguous uint8
    a = (a != 0).astype(np.uint8, copy=False)
    return np.ascontiguousarray(a)

def polygonize_water(mask_arr: np.ndarray, transform, connectivity=8, drop_small_km2=0.0,
                     ctx: str = "") -> List[sgeom.base.BaseGeometry]:
    """
    Convert a binary water mask into polygons (WGS84).
    Robust to weird shapes by coercing to strict 2-D.
    """
    arr2d = _to_2d_uint8(mask_arr, ctx=ctx)
    if arr2d is None:
        return []

    geoms = []
    # rasterio.features.shapes uses in-memory datasets; ensure MEM open is allowed
    with mem_ok():
        # Feed the 0/1 image and keep only value==1
        for geom, value in rio_shapes(arr2d, transform=transform, connectivity=CONNECTIVITY):
            if value != 1:
                continue
            poly = shp_shape(geom)  # WGS84
            if drop_small_km2 > 0.0 and geodesic_area_km2(poly) < drop_small_km2:
                continue
            geoms.append(poly)
    return geoms


In [92]:
# ==============================================================
# 6) Per-mosaic processor — clip (optional), polygonize, dissolve, save
#    (MEM-safe, robust masked-array handling, 2D coercion)
# ==============================================================

def process_mosaic_for_target(mosaic_path: Path,
                              lake_name: Optional[str],
                              aoi_geom_wgs84: Optional[sgeom.base.BaseGeometry],
                              out_dir: Path,
                              overwrite=False,
                              dissolve=True,
                              drop_small_km2=0.0) -> Optional[Path]:
    """
    Vectorize water polygons for either a lake AOI (clipped) or full raster.
    Returns output file path if written, else None.
    """
    yyyy, mm = parse_yyyymm_from_name(mosaic_path)
    if yyyy is None:
        print(f"[skip] Unrecognized mosaic name: {mosaic_path.name}")
        return None

    # Output path & driver
    name_bits = dict(YYYY=f"{yyyy:04d}", MM=f"{mm:02d}", LAKE=(lake_name or "full"))
    out_stem = OUT_NAME_TMPL.format(**name_bits)
    if OUTPUT_FMT.upper() == "SHP":
        out_path = out_dir / f"{out_stem}.shp"
        drv = "ESRI Shapefile"
    else:
        out_path = out_dir / f"{out_stem}.gpkg"
        drv = "GPKG"

    if out_path.exists() and not overwrite:
        return out_path

    with rasterio.open(mosaic_path) as ds:
        if aoi_geom_wgs84 is not None:
            # Clip to AOI for speed; IMPORTANT: use indexes=[1] (list) to force 3D shape (1,H,W)
            try:
                with mem_ok():
                    clipped, clipped_transform = rio_mask(
                        ds, [mapping(aoi_geom_wgs84)],
                        crop=True, filled=False, indexes=[1], all_touched=ALL_TOUCHED
                    )
            except ValueError as e:
                if "do not overlap" in str(e).lower():
                    gdf_empty = gpd.GeoDataFrame(
                        {"year":[yyyy], "month":[mm], "lake":[lake_name or "full"], "area_km2":[0.0]},
                        geometry=[sgeom.MultiPolygon([])], crs="EPSG:4326"
                    )
                    out_dir.mkdir(parents=True, exist_ok=True)
                    # NOTE: Shapefiles can’t store empty geometries; use GPKG in that case
                    if OUTPUT_FMT.upper() == "SHP":
                        alt_path = out_path.with_suffix(".gpkg")
                        gdf_empty.to_file(alt_path, driver="GPKG")
                    else:
                        gdf_empty.to_file(out_path, driver=drv)
                    return out_path

            # clipped is a MaskedArray with shape (1, H, W) because indexes=[1]
            arr_m = clipped[0]  # masked 2-D array (H, W)
            H, W = arr_m.shape

            # If dataset encodes nodata with a value, merge it into the mask
            if ds.nodata is not None and not np.isnan(ds.nodata):
                nodata_mask = (arr_m.data == ds.nodata)
                arr_m = np.ma.masked_array(arr_m.data, mask=(arr_m.mask | nodata_mask))

            bin_arr = raster_to_binary_water(arr_m)
            transform = clipped_transform
            ctx = f"{mosaic_path.name} / {lake_name}"
        else:
            # Full raster path as a MaskedArray
            arr_m = ds.read(1, masked=True)  # 2-D masked array (H x W)
            H, W = arr_m.shape
            if ds.nodata is not None and not np.isnan(ds.nodata):
                arr_m = np.ma.masked_equal(arr_m, ds.nodata)
            bin_arr = raster_to_binary_water(arr_m)
            transform = ds.transform
            ctx = f"{mosaic_path.name} / full"

        # --- Enforce strict 2-D shape (guard against unexpected flattening) ---
        bin_arr = np.asarray(bin_arr)
        if bin_arr.ndim == 1 and bin_arr.size == H * W:
            if DIAG_VERBOSE:
                print(f"[reshape] Coercing 1-D → 2-D for {ctx}: ({bin_arr.size},) → ({H},{W})")
            bin_arr = bin_arr.reshape((H, W))
        elif bin_arr.ndim != 2:
            if DIAG_VERBOSE:
                print(f"[polygonize] Non-2D array persists in {ctx}: shape={getattr(bin_arr,'shape',None)} → empty output.")
            gdf_empty = gpd.GeoDataFrame(
                {"year":[yyyy], "month":[mm], "lake":[lake_name or "full"], "area_km2":[0.0]},
                geometry=[sgeom.MultiPolygon([])], crs="EPSG:4326"
            )
            out_dir.mkdir(parents=True, exist_ok=True)
            if OUTPUT_FMT.upper() == "SHP":
                alt_path = out_path.with_suffix(".gpkg")
                gdf_empty.to_file(alt_path, driver="GPKG")
            else:
                gdf_empty.to_file(out_path, driver=drv)
            return out_path
        # ---------------------------------------------------------------------

        # Optional quick diagnostic: count water pixels
        if DIAG_VERBOSE:
            wpx = int((bin_arr != 0).sum())
            print(f"[diag] {ctx}: water_px={wpx}")

        # Polygonize (MEM-safe & 2D-safe)
        polys = polygonize_water(bin_arr, transform,
                                 connectivity=CONNECTIVITY, drop_small_km2=drop_small_km2,
                                 ctx=ctx)

        if not polys:
            gdf_empty = gpd.GeoDataFrame(
                {"year":[yyyy], "month":[mm], "lake":[lake_name or "full"], "area_km2":[0.0]},
                geometry=[sgeom.MultiPolygon([])], crs="EPSG:4326"
            )
            out_dir.mkdir(parents=True, exist_ok=True)
            if OUTPUT_FMT.upper() == "SHP":
                alt_path = out_path.with_suffix(".gpkg")
                gdf_empty.to_file(alt_path, driver="GPKG")
            else:
                gdf_empty.to_file(out_path, driver=drv)
            return out_path

        if dissolve:
            geom_diss = unary_union(polys)
            polys = [geom_diss] if not geom_diss.is_empty else []

        records = [{"year": yyyy, "month": mm, "lake": lake_name or "full",
                    "area_km2": float(geodesic_area_km2(g))} for g in polys]

        gdf = gpd.GeoDataFrame(records, geometry=polys, crs="EPSG:4326")
        out_dir.mkdir(parents=True, exist_ok=True)
        gdf.to_file(out_path, driver=drv)

    return out_path


In [93]:
# ==============================================================
# 7) Driver — iterate mosaics and generate vectors (single-run supported)
# ==============================================================

# Select mosaics (supports single-file test via RUN_ONLY_YYYY_MM)
if RUN_ONLY_YYYY_MM is None:
    mosaics = sorted(MOSAIC_DIR.glob("sw_????_??.tif"))
    print(f"Found {len(mosaics)} monthly mosaics in {MOSAIC_DIR}")
else:
    m = re.match(r"^\d{4}_\d{2}$", RUN_ONLY_YYYY_MM)
    if not m:
        raise ValueError("RUN_ONLY_YYYY_MM must be like '2024_07' or None.")
    target = MOSAIC_DIR / f"sw_{RUN_ONLY_YYYY_MM}.tif"
    if not target.exists():
        raise FileNotFoundError(f"Requested single-run file not found: {target}")
    mosaics = [target]
    print(f"[single-run] Processing only: {target.name}")

written: List[Path] = []

if MODE.upper() == "BY_LAKE":
    targets = [
        ("Tanganyika", AOIs_BUFFERED["Tanganyika"], OUT_SUBDIR / "Tanganyika"),
        ("Kivu",       AOIs_BUFFERED["Kivu"],       OUT_SUBDIR / "Kivu")
    ]
else:
    targets = [(None, None, OUT_SUBDIR)]  # Full raster, no clipping

for m in tqdm(mosaics, desc="Vectorizing mosaics", unit="month"):
    for lake_name, aoi_geom, odir in targets:
        p = process_mosaic_for_target(
            m, lake_name, aoi_geom, odir,
            overwrite=OVERWRITE,
            dissolve=DISSOLVE_TO_SINGLE,
            drop_small_km2=MIN_POLY_AREA_KM2
        )
        if p is not None:
            written.append(p)

print(f"Completed: wrote {len(written)} vector file(s).")


Found 43 monthly mosaics in C:\Users\ibana\Desktop\JRC_Tanganica\GIS_Intermediate\Intermediate_files\SurfaceWater\GEE_SW\mosaics


Vectorizing mosaics:   0%|          | 0/43 [00:00<?, ?month/s]

[diag] sw_2022_01.tif / Tanganyika: water_px=331888004
[diag] sw_2022_01.tif / Kivu: water_px=24288104


Vectorizing mosaics:   2%|▏         | 1/43 [00:44<31:11, 44.56s/month]

[diag] sw_2022_02.tif / Tanganyika: water_px=331961259
[diag] sw_2022_02.tif / Kivu: water_px=24275370


Vectorizing mosaics:   5%|▍         | 2/43 [01:27<29:46, 43.56s/month]

[diag] sw_2022_03.tif / Tanganyika: water_px=332004139
[diag] sw_2022_03.tif / Kivu: water_px=24286316


Vectorizing mosaics:   7%|▋         | 3/43 [02:09<28:39, 42.99s/month]

[diag] sw_2022_04.tif / Tanganyika: water_px=331986971
[diag] sw_2022_04.tif / Kivu: water_px=24282305


Vectorizing mosaics:   9%|▉         | 4/43 [02:51<27:44, 42.67s/month]

[diag] sw_2022_05.tif / Tanganyika: water_px=332025728
[diag] sw_2022_05.tif / Kivu: water_px=24285914


Vectorizing mosaics:  12%|█▏        | 5/43 [03:36<27:23, 43.26s/month]

[diag] sw_2022_06.tif / Tanganyika: water_px=332051337
[diag] sw_2022_06.tif / Kivu: water_px=24289845


Vectorizing mosaics:  14%|█▍        | 6/43 [04:20<26:50, 43.53s/month]

[diag] sw_2022_07.tif / Tanganyika: water_px=332053668
[diag] sw_2022_07.tif / Kivu: water_px=24291115


Vectorizing mosaics:  16%|█▋        | 7/43 [05:05<26:26, 44.06s/month]

[diag] sw_2022_08.tif / Tanganyika: water_px=332038520
[diag] sw_2022_08.tif / Kivu: water_px=24290907


Vectorizing mosaics:  19%|█▊        | 8/43 [05:49<25:37, 43.92s/month]

[diag] sw_2022_09.tif / Tanganyika: water_px=332018252
[diag] sw_2022_09.tif / Kivu: water_px=24272664


Vectorizing mosaics:  21%|██        | 9/43 [06:34<25:11, 44.45s/month]

[diag] sw_2022_10.tif / Tanganyika: water_px=331982815
[diag] sw_2022_10.tif / Kivu: water_px=24289430


Vectorizing mosaics:  23%|██▎       | 10/43 [07:19<24:31, 44.58s/month]

[diag] sw_2022_11.tif / Tanganyika: water_px=331822231
[diag] sw_2022_11.tif / Kivu: water_px=24273389


Vectorizing mosaics:  26%|██▌       | 11/43 [08:05<23:55, 44.86s/month]

[diag] sw_2022_12.tif / Tanganyika: water_px=331992700
[diag] sw_2022_12.tif / Kivu: water_px=24266672


Vectorizing mosaics:  28%|██▊       | 12/43 [08:49<23:09, 44.84s/month]

[diag] sw_2023_01.tif / Tanganyika: water_px=332042663
[diag] sw_2023_01.tif / Kivu: water_px=24285769


Vectorizing mosaics:  30%|███       | 13/43 [09:35<22:30, 45.03s/month]

[diag] sw_2023_02.tif / Tanganyika: water_px=332010367
[diag] sw_2023_02.tif / Kivu: water_px=24283020


Vectorizing mosaics:  33%|███▎      | 14/43 [10:23<22:09, 45.85s/month]

[diag] sw_2023_03.tif / Tanganyika: water_px=332046259
[diag] sw_2023_03.tif / Kivu: water_px=24275687


Vectorizing mosaics:  35%|███▍      | 15/43 [11:11<21:43, 46.57s/month]

[diag] sw_2023_04.tif / Tanganyika: water_px=332069638
[diag] sw_2023_04.tif / Kivu: water_px=24285713


Vectorizing mosaics:  37%|███▋      | 16/43 [11:54<20:30, 45.58s/month]

[diag] sw_2023_05.tif / Tanganyika: water_px=332079100
[diag] sw_2023_05.tif / Kivu: water_px=24289019


Vectorizing mosaics:  40%|███▉      | 17/43 [12:36<19:17, 44.53s/month]

[diag] sw_2023_06.tif / Tanganyika: water_px=332110841
[diag] sw_2023_06.tif / Kivu: water_px=24287048


Vectorizing mosaics:  42%|████▏     | 18/43 [13:18<18:13, 43.76s/month]

[diag] sw_2023_07.tif / Tanganyika: water_px=332124739
[diag] sw_2023_07.tif / Kivu: water_px=24293545


Vectorizing mosaics:  44%|████▍     | 19/43 [14:00<17:16, 43.20s/month]

[diag] sw_2023_08.tif / Tanganyika: water_px=332024084
[diag] sw_2023_08.tif / Kivu: water_px=24277288


Vectorizing mosaics:  47%|████▋     | 20/43 [14:44<16:36, 43.34s/month]

[diag] sw_2023_09.tif / Tanganyika: water_px=332030653
[diag] sw_2023_09.tif / Kivu: water_px=24256055


Vectorizing mosaics:  49%|████▉     | 21/43 [15:26<15:47, 43.09s/month]

[diag] sw_2023_10.tif / Tanganyika: water_px=332046393
[diag] sw_2023_10.tif / Kivu: water_px=24286324


Vectorizing mosaics:  51%|█████     | 22/43 [16:08<14:55, 42.66s/month]

[diag] sw_2023_11.tif / Tanganyika: water_px=331960742
[diag] sw_2023_11.tif / Kivu: water_px=24280337


Vectorizing mosaics:  53%|█████▎    | 23/43 [16:50<14:10, 42.51s/month]

[diag] sw_2023_12.tif / Tanganyika: water_px=332039887
[diag] sw_2023_12.tif / Kivu: water_px=24285003


Vectorizing mosaics:  56%|█████▌    | 24/43 [17:32<13:27, 42.48s/month]

[diag] sw_2024_01.tif / Tanganyika: water_px=332060545
[diag] sw_2024_01.tif / Kivu: water_px=24285216


Vectorizing mosaics:  58%|█████▊    | 25/43 [18:15<12:43, 42.42s/month]

[diag] sw_2024_02.tif / Tanganyika: water_px=332109763
[diag] sw_2024_02.tif / Kivu: water_px=24266428


Vectorizing mosaics:  60%|██████    | 26/43 [18:59<12:08, 42.86s/month]

[diag] sw_2024_03.tif / Tanganyika: water_px=332149676
[diag] sw_2024_03.tif / Kivu: water_px=24290014


Vectorizing mosaics:  63%|██████▎   | 27/43 [19:43<11:33, 43.37s/month]

[diag] sw_2024_04.tif / Tanganyika: water_px=332153100
[diag] sw_2024_04.tif / Kivu: water_px=24276521


Vectorizing mosaics:  65%|██████▌   | 28/43 [20:31<11:09, 44.61s/month]

[diag] sw_2024_05.tif / Tanganyika: water_px=332170393
[diag] sw_2024_05.tif / Kivu: water_px=24287734


Vectorizing mosaics:  67%|██████▋   | 29/43 [21:15<10:25, 44.68s/month]

[diag] sw_2024_06.tif / Tanganyika: water_px=332358604
[diag] sw_2024_06.tif / Kivu: water_px=24295297


Vectorizing mosaics:  70%|██████▉   | 30/43 [22:01<09:45, 45.04s/month]

[diag] sw_2024_07.tif / Tanganyika: water_px=332295740
[diag] sw_2024_07.tif / Kivu: water_px=24290897


Vectorizing mosaics:  72%|███████▏  | 31/43 [22:46<08:58, 44.89s/month]

[diag] sw_2024_08.tif / Tanganyika: water_px=332273946
[diag] sw_2024_08.tif / Kivu: water_px=24293047


Vectorizing mosaics:  74%|███████▍  | 32/43 [23:31<08:15, 45.01s/month]

[diag] sw_2024_09.tif / Tanganyika: water_px=332227666
[diag] sw_2024_09.tif / Kivu: water_px=24268215


Vectorizing mosaics:  77%|███████▋  | 33/43 [24:16<07:30, 45.03s/month]

[diag] sw_2024_10.tif / Tanganyika: water_px=332022255
[diag] sw_2024_10.tif / Kivu: water_px=24268593


Vectorizing mosaics:  79%|███████▉  | 34/43 [25:02<06:47, 45.30s/month]

[diag] sw_2024_11.tif / Tanganyika: water_px=332075987
[diag] sw_2024_11.tif / Kivu: water_px=24253144


Vectorizing mosaics:  81%|████████▏ | 35/43 [25:49<06:06, 45.75s/month]

[diag] sw_2024_12.tif / Tanganyika: water_px=332123090
[diag] sw_2024_12.tif / Kivu: water_px=24267287


Vectorizing mosaics:  84%|████████▎ | 36/43 [26:35<05:20, 45.73s/month]

[diag] sw_2025_01.tif / Tanganyika: water_px=332161599
[diag] sw_2025_01.tif / Kivu: water_px=24276342


Vectorizing mosaics:  86%|████████▌ | 37/43 [27:20<04:33, 45.52s/month]

[diag] sw_2025_02.tif / Tanganyika: water_px=332174731
[diag] sw_2025_02.tif / Kivu: water_px=24280676


Vectorizing mosaics:  88%|████████▊ | 38/43 [28:04<03:45, 45.16s/month]

[diag] sw_2025_03.tif / Tanganyika: water_px=332181449
[diag] sw_2025_03.tif / Kivu: water_px=24281504


Vectorizing mosaics:  91%|█████████ | 39/43 [28:48<02:59, 44.93s/month]

[diag] sw_2025_04.tif / Tanganyika: water_px=332135994
[diag] sw_2025_04.tif / Kivu: water_px=24273488


Vectorizing mosaics:  93%|█████████▎| 40/43 [29:35<02:16, 45.35s/month]

[diag] sw_2025_05.tif / Tanganyika: water_px=332202462
[diag] sw_2025_05.tif / Kivu: water_px=24278137


Vectorizing mosaics:  95%|█████████▌| 41/43 [30:19<01:30, 45.02s/month]

[diag] sw_2025_06.tif / Tanganyika: water_px=332230861
[diag] sw_2025_06.tif / Kivu: water_px=24288600


Vectorizing mosaics:  98%|█████████▊| 42/43 [31:05<00:45, 45.31s/month]

[diag] sw_2025_07.tif / Tanganyika: water_px=332205461
[diag] sw_2025_07.tif / Kivu: water_px=24251190


Vectorizing mosaics: 100%|██████████| 43/43 [31:53<00:00, 44.50s/month]﻿


Completed: wrote 86 vector file(s).


In [95]:
# ==============================================================
# 8) Inventory — quick QA table of written vectors (area & parts)
# ==============================================================

rows = []
for p in written:
    m = re.search(r"sw_(\d{4})_(\d{2})_(Tanganyika|Kivu|full)\.(?:gpkg|shp)$",
                  p.name, re.IGNORECASE)
    year  = int(m.group(1)) if m else None
    month = int(m.group(2)) if m else None
    lake  = m.group(3) if m else None

    try:
        g = gpd.read_file(p)
        total_area = float(g["area_km2"].sum()) if "area_km2" in g.columns else np.nan
        n_parts    = len(g)
    except Exception:
        total_area = np.nan
        n_parts    = np.nan

    rows.append({
        "file": str(p),
        "year": year,
        "month": month,
        "lake": lake,
        "parts": n_parts,
        "area_km2_sum": total_area
    })

inv_df = pd.DataFrame(rows).sort_values(["lake","year","month"]).reset_index(drop=True)
print(inv_df.head(12).to_string(index=False))

# Save inventory next to GEE_SW vectors
inv_csv = OUT_SUBDIR / "gee_sw_vectors_inventory.csv"
inv_df.to_csv(inv_csv, index=False, encoding="utf-8-sig")
print("Inventory saved →", inv_csv)


                                                                                                                         file  year  month lake  parts  area_km2_sum
C:\Users\ibana\Desktop\JRC_Tanganica\GIS_Intermediate\Intermediate_files\SurfaceWater\vectors\GEE_SW\Kivu\sw_2022_01_Kivu.shp  2022      1 Kivu      1   2409.620018
C:\Users\ibana\Desktop\JRC_Tanganica\GIS_Intermediate\Intermediate_files\SurfaceWater\vectors\GEE_SW\Kivu\sw_2022_02_Kivu.shp  2022      2 Kivu      1   2408.351160
C:\Users\ibana\Desktop\JRC_Tanganica\GIS_Intermediate\Intermediate_files\SurfaceWater\vectors\GEE_SW\Kivu\sw_2022_03_Kivu.shp  2022      3 Kivu      1   2409.429187
C:\Users\ibana\Desktop\JRC_Tanganica\GIS_Intermediate\Intermediate_files\SurfaceWater\vectors\GEE_SW\Kivu\sw_2022_04_Kivu.shp  2022      4 Kivu      1   2409.020410
C:\Users\ibana\Desktop\JRC_Tanganica\GIS_Intermediate\Intermediate_files\SurfaceWater\vectors\GEE_SW\Kivu\sw_2022_05_Kivu.shp  2022      5 Kivu      1   2409.385666
C:\Users\i