# Compute per-hex depth quantile from NWS model bathymetry

Replaces the mean/median depth in `meta.json` with a low quantile (10th percentile)
of bathymetry sampled within each hex polygon.

**Why:** mean/median depth misclassifies hexes near steep coasts (e.g. Norway) as
non-habitable even when a substantial fraction of their area is shallower than 85 m.

**Source:** `cmems_mod_nws_phy_anfc_1.5km_static` — static bathymetry (`deptho`) from
the same NW shelf model used for the Lagrangian simulations, via the Copernicus Marine
Toolbox. Requires Copernicus Marine credentials (`copernicusmarine login` or env vars).

**Output:** updated `database/data/meta.json` with `depth` replaced by the 10th
percentile of ocean depth within each hex.

In [None]:
import json
import numpy as np
import xarray as xr
import geopandas as gpd
import shapely
import copernicusmarine
from pathlib import Path
from tqdm.auto import tqdm

In [None]:
# --- parameters ---
DEPTH_QUANTILE = 0.10   # 10th percentile

DATASET_ID = "cmems_mod_nws_phy_anfc_1.5km_static"
DEPTH_VAR  = "deptho"   # sea floor depth below geoid (m, positive)

# Study region bounding box (NW European shelf, with margin)
LON_MIN, LON_MAX = -20.0, 13.0
LAT_MIN, LAT_MAX =  46.0, 63.0

OUT_DIR   = Path("../../database/data")
META_PATH = OUT_DIR / "meta.json"

In [None]:
# Load static bathymetry from Copernicus Marine
# Credentials: run `copernicusmarine login` once, or set
# COPERNICUSMARINE_SERVICE_USERNAME / _PASSWORD env vars
ds = copernicusmarine.open_dataset(
    dataset_id=DATASET_ID,
    minimum_longitude=LON_MIN,
    maximum_longitude=LON_MAX,
    minimum_latitude=LAT_MIN,
    maximum_latitude=LAT_MAX,
    variables=[DEPTH_VAR],
)
print(ds)

bathy = ds[DEPTH_VAR].load()
print(f"\nLoaded: {bathy.shape}, {bathy.nbytes / 1e6:.0f} MB")
print(f"Depth range: {float(bathy.min()):.0f} – {float(bathy.max()):.0f} m")

In [None]:
# Build explicit land mask: ocean = deptho > 0 (NEMO convention; land cells have deptho = 0 or NaN)
ocean_mask = bathy > 0
bathy_ocean = bathy.where(ocean_mask)   # NaN on land, depth value on ocean

print(f"Ocean cells:  {int(ocean_mask.sum())}")
print(f"Land/dry cells: {int((~ocean_mask).sum())}")
print(f"Depth range (ocean only): {float(bathy_ocean.min()):.0f} – {float(bathy_ocean.max()):.0f} m")

In [None]:
# Load hex polygons
hexes = gpd.read_file(OUT_DIR / "hexes.geojson")
print(f"Hexes: {len(hexes)}")

In [None]:
def hex_depth_quantile(geom, q=DEPTH_QUANTILE):
    """Return q-th quantile of ocean depth (m) within hex polygon.

    Uses bathy_ocean (land already NaN-masked) so only wet grid cells
    contribute to the quantile.
    """
    minx, miny, maxx, maxy = geom.bounds

    local = bathy_ocean.sel(
        latitude=slice(miny - 0.01, maxy + 0.01),
        longitude=slice(minx - 0.01, maxx + 0.01),
    )
    if local.size == 0:
        return np.nan

    lons = local.longitude.values
    lats = local.latitude.values
    LON_G, LAT_G = np.meshgrid(lons, lats)

    # Restrict to grid points inside the hex polygon
    inside = shapely.contains_xy(geom, LON_G.ravel(), LAT_G.ravel())
    vals = local.values.ravel()[inside]

    # Drop NaN (land, already masked above)
    ocean = vals[np.isfinite(vals)]
    if len(ocean) == 0:
        return np.nan

    return float(np.percentile(ocean, q * 100))

# Quick test
print(hex_depth_quantile(hexes.geometry.iloc[0]))

In [None]:
depth_q = np.full(len(hexes), np.nan)

for i, geom in enumerate(tqdm(hexes.geometry, desc="hex depth q10")):
    depth_q[i] = hex_depth_quantile(geom)

hexes["depth_q10"] = depth_q
print(f"Done. NaN count: {np.isnan(depth_q).sum()} / {len(depth_q)}")
print(f"Depth q10 range: {np.nanmin(depth_q):.0f} – {np.nanmax(depth_q):.0f} m")

In [None]:
import matplotlib.pyplot as plt

meta = json.loads(META_PATH.read_text())
old_depth = np.array([meta["depth"].get(str(i), np.nan) for i in hexes["id"]])

fig, axes = plt.subplots(1, 2, figsize=(12, 4))
for ax, vals, title in zip(
    axes,
    [old_depth, depth_q],
    ["Old: depth_median (from connectivity NetCDF)", f"New: depth_q{int(DEPTH_QUANTILE*100)} (NWS model bathy)"],
):
    sc = ax.scatter(
        hexes.geometry.centroid.x, hexes.geometry.centroid.y,
        c=vals, cmap="Blues", vmin=0, vmax=200, s=2,
    )
    ax.set_title(title)
    plt.colorbar(sc, ax=ax, label="depth (m)")
plt.tight_layout()
plt.savefig(OUT_DIR / "depth_comparison.png", dpi=150)
plt.show()

In [None]:
# Show change in habitable classification
was_nonhabitable = old_depth > 85
now_nonhabitable = depth_q  > 85
newly_habitable  = was_nonhabitable & ~now_nonhabitable
print(f"Previously non-habitable (depth_median > 85 m): {was_nonhabitable.sum()}")
print(f"Now non-habitable (depth_q10 > 85 m):           {now_nonhabitable.sum()}")
print(f"Newly classified as habitable:                   {newly_habitable.sum()}")

In [None]:
# Write updated meta.json
meta["depth"] = {
    str(int(row["id"])): (None if np.isnan(row["depth_q10"]) else row["depth_q10"])
    for _, row in hexes.iterrows()
}

META_PATH.write_text(json.dumps(meta))
print(f"Written: {META_PATH}")