# Compute per-hex depth quantile from GEBCO

Replaces the mean/median depth in `meta.json` with a low quantile (10th percentile)
of GEBCO 2022 bathymetry sampled within each hex polygon.

**Why:** mean/median depth misclassifies hexes near steep coasts (e.g. Norway) as
non-habitable even when a substantial fraction of their area is shallower than 85 m.

**Source:** GEBCO_2022 sub-ice topography/bathymetry via CEDA OPeNDAP (~450 m resolution).
CEDA login may be required — see https://services.ceda.ac.uk/cedasite/register/info/

**Output:** updated `database/data/meta.json` with `depth` replaced by the 10th percentile
of ocean depth within each hex (land points and dry cells excluded).

In [None]:
import json
import numpy as np
import xarray as xr
import geopandas as gpd
import shapely
from pathlib import Path
from tqdm.auto import tqdm

In [None]:
# --- parameters ---
DEPTH_QUANTILE = 0.10   # 10th percentile

# GEBCO 2022 via CEDA OPeNDAP
# Note: CEDA requires free registration; set CEDA credentials in ~/.dodsrc if needed
GEBCO_URL = (
    "https://dap.ceda.ac.uk/bodc/gebco/global/gebco_2022"
    "/sub_ice_topography_bathymetry/netcdf/GEBCO_2022_sub_ice_topo.nc"
)

# Study region bounding box (NW European shelf, with margin)
LON_MIN, LON_MAX = -30.0, 15.0
LAT_MIN, LAT_MAX =  45.0, 65.0

OUT_DIR = Path("../../database/data")
META_PATH = OUT_DIR / "meta.json"

In [None]:
# Load GEBCO for the study region via OPeNDAP (downloads once, ~100 MB subset)
print("Opening GEBCO via OPeNDAP ...")
ds = xr.open_dataset(GEBCO_URL, engine="pydap")
print(ds)

print("\nSubsetting to study region ...")
elev = (
    ds["elevation"]
    .sel(lat=slice(LAT_MIN, LAT_MAX), lon=slice(LON_MIN, LON_MAX))
    .load()   # pull into memory once
)
print(f"Loaded: {elev.shape} (lat × lon), {elev.nbytes / 1e6:.0f} MB")

In [None]:
# Quick sanity check: depth should be negative elevation for ocean cells
ocean_frac = float((elev < 0).sum() / elev.size)
print(f"Ocean fraction in study region: {ocean_frac:.1%}")
print(f"Elevation range: {float(elev.min()):.0f} m to {float(elev.max()):.0f} m")

In [None]:
# Load hex polygons
hexes = gpd.read_file(OUT_DIR / "hexes.geojson")
print(f"Hexes: {len(hexes)}")
hexes.head(2)

In [None]:
# Pre-extract coordinate arrays for fast bbox slicing
lons = elev.lon.values
lats = elev.lat.values

def hex_depth_quantile(geom, q=DEPTH_QUANTILE):
    """Return q-th quantile of ocean depth (m, positive) within hex polygon."""
    minx, miny, maxx, maxy = geom.bounds

    # Slice GEBCO to hex bounding box (+ tiny margin)
    local = elev.sel(
        lat=slice(miny - 0.01, maxy + 0.01),
        lon=slice(minx - 0.01, maxx + 0.01),
    )
    if local.size == 0:
        return np.nan

    local_lons = local.lon.values
    local_lats = local.lat.values
    LON_G, LAT_G = np.meshgrid(local_lons, local_lats)

    # Mask to polygon using shapely 2.x vectorised contains
    inside = shapely.contains_xy(geom, LON_G.ravel(), LAT_G.ravel())
    elev_vals = local.values.ravel()[inside]

    # Ocean only: GEBCO elevation < 0 → depth = -elevation
    ocean_depths = -elev_vals[elev_vals < 0]
    if len(ocean_depths) == 0:
        return np.nan

    return float(np.percentile(ocean_depths, q * 100))

print("Function defined. Test on first hex:")
print(hex_depth_quantile(hexes.geometry.iloc[0]))

In [None]:
# Compute per-hex depth quantile for all hexes
depth_q = np.full(len(hexes), np.nan)

for i, geom in enumerate(tqdm(hexes.geometry, desc="hex depth q10")):
    depth_q[i] = hex_depth_quantile(geom)

hexes["depth_q10"] = depth_q
print(f"Done. NaN count: {np.isnan(depth_q).sum()} / {len(depth_q)}")
print(f"Depth q10 range: {np.nanmin(depth_q):.0f} – {np.nanmax(depth_q):.0f} m")

In [None]:
# Compare old median depth vs new q10 depth for hexes near Norway
import matplotlib.pyplot as plt

meta = json.loads(META_PATH.read_text())
old_depth = np.array([meta["depth"].get(str(i), np.nan) for i in hexes["id"]])

fig, axes = plt.subplots(1, 2, figsize=(12, 4))
for ax, vals, title in zip(
    axes,
    [old_depth, depth_q],
    ["Old: depth_median (from NetCDF)", f"New: depth_q{int(DEPTH_QUANTILE*100)} (GEBCO)"],
):
    sc = ax.scatter(hexes.geometry.centroid.x, hexes.geometry.centroid.y,
                    c=vals, cmap="Blues", vmin=0, vmax=200, s=2)
    ax.set_title(title)
    plt.colorbar(sc, ax=ax, label="depth (m)")
plt.tight_layout()
plt.savefig(OUT_DIR / "depth_comparison.png", dpi=150)
plt.show()

In [None]:
# Show how many hexes change habitable status (depth > 85 m threshold)
was_nonhabitable = old_depth > 85
now_nonhabitable = depth_q > 85
newly_habitable = was_nonhabitable & ~now_nonhabitable
print(f"Previously non-habitable (depth_median > 85 m): {was_nonhabitable.sum()}")
print(f"Now non-habitable (depth_q10 > 85 m):           {now_nonhabitable.sum()}")
print(f"Newly classified as habitable:                   {newly_habitable.sum()}")

In [None]:
# Write updated meta.json: replace 'depth' with depth_q10
# NaN → None so JSON serialises cleanly
new_depth = {
    str(int(row["id"])): (None if np.isnan(row["depth_q10"]) else row["depth_q10"])
    for _, row in hexes.iterrows()
}
meta["depth"] = new_depth

META_PATH.write_text(json.dumps(meta))
print(f"Written: {META_PATH}")