# Build hex metadata

Reads per-hex variables from all 9 files via OPeNDAP (small arrays, feasible).
Produces:
- `hex_label → int_id` mapping (union across all files, escape hex excluded)
- `database/data/hexes.geojson` — hex polygons
- `database/data/meta.json` — per-hex metadata

In [1]:
import json
import numpy as np
import pandas as pd
import xarray as xr
from pathlib import Path

BASE = (
    "https://data.geomar.de/thredds/dodsC/"
    "20.500.12085/11cc2d8f-4039-49d3-aaab-04ce0fb23190/submission"
)

FILES = [
    ("05m", "00-07days"),
    ("05m", "07-14days"),
    ("05m", "07-28days"),
    ("10m", "00-07days"),
    ("10m", "07-14days"),
    ("10m", "07-28days"),
    ("15m", "00-07days"),
    ("15m", "07-14days"),
    ("15m", "07-28days"),
]

ESCAPE_HEX = b"(0, 0, 0)"

OUT_DIR = Path("../../database/data")

def url(depth, time):
    name = f"040_connectivity_analysis_{depth}_{time}.nc"
    return f"{BASE}/040_connectivity_analysis_{depth}/{name}"

# Scalar per-hex variables (pattern: {var}_{dim})
HEX_VARS = [
    "water_fraction",
    "depth_mean", "depth_median", "depth_std",
    "aqc_count", "rst_count", "pop_count",
    "dss_count", "hly_count", "his_count",
    "lon", "lat",
]
# Corner variables: named lon_{dim}_corners / lat_{dim}_corners

In [2]:
# Collect per-hex data from all 9 files, both hex0 and hex1 dimensions
# Key: hex label (bytes), Value: dict of metadata
hex_data = {}

for depth, time in FILES:
    print(f"Reading {depth} {time} ...", end=" ")
    ds = xr.open_dataset(url(depth, time), engine="netcdf4")

    for dim in ["hex0", "hex1"]:
        labels = ds[dim].values  # byte strings
        for i, label in enumerate(labels):
            if label == ESCAPE_HEX:
                continue
            if label in hex_data:
                continue  # already seen
            rec = {}
            for v in HEX_VARS:
                key = f"{v}_{dim}"
                if key in ds:
                    rec[v] = float(ds[key].values[i])
            for v in ["lon", "lat"]:
                # corners are named lon_{dim}_corners / lat_{dim}_corners
                key = f"{v}_{dim}_corners"
                if key in ds:
                    rec[f"{v}_corners"] = ds[key].values[:, i].tolist()
            hex_data[label] = rec

    ds.close()
    print(f"total unique hexes so far: {len(hex_data)}")

print(f"\nTotal unique hexes: {len(hex_data)}")

Reading 05m 00-07days ... 

total unique hexes so far: 8397
Reading 05m 07-14days ... total unique hexes so far: 8397
Reading 05m 07-28days ... 

total unique hexes so far: 8397
Reading 10m 00-07days ... 

total unique hexes so far: 8413
Reading 10m 07-14days ... total unique hexes so far: 8413
Reading 10m 07-28days ... 

total unique hexes so far: 8413
Reading 15m 00-07days ... 

total unique hexes so far: 8425
Reading 15m 07-14days ... total unique hexes so far: 8425
Reading 15m 07-28days ... 

total unique hexes so far: 8425

Total unique hexes: 8425


In [3]:
# Build sorted label → int ID mapping
sorted_labels = sorted(hex_data.keys())
label_to_id = {label: i for i, label in enumerate(sorted_labels)}

print(f"ID range: 0 – {len(label_to_id) - 1}")
print(f"First 3: {sorted_labels[:3]}")
print(f"Last 3:  {sorted_labels[-3:]}")

ID range: 0 – 8424
First 3: [np.bytes_(b'(-1, -19, 20)'), np.bytes_(b'(-1, -2, 3)'), np.bytes_(b'(-1, -20, 21)')]
Last 3:  [np.bytes_(b'(9, 7, -16)'), np.bytes_(b'(9, 8, -17)'), np.bytes_(b'(9, 9, -18)')]


In [4]:
# Build hexes.geojson
features = []
for label in sorted_labels:
    hex_id = label_to_id[label]
    rec = hex_data[label]
    lons = rec["lon_corners"]
    lats = rec["lat_corners"]
    # GeoJSON polygon: list of [lon, lat], closed ring (first == last)
    coords = [[lon, lat] for lon, lat in zip(lons, lats)]
    if coords[0] != coords[-1]:
        coords.append(coords[0])
    features.append({
        "type": "Feature",
        "properties": {"id": hex_id},
        "geometry": {"type": "Polygon", "coordinates": [coords]},
    })

geojson = {"type": "FeatureCollection", "features": features}
out_path = OUT_DIR / "hexes.geojson"
with open(out_path, "w") as f:
    json.dump(geojson, f)
print(f"Written: {out_path} ({len(features)} features)")

Written: ../../database/data/hexes.geojson (8425 features)


In [5]:
# Build meta.json
meta_cols = ["id", "lon", "lat", "depth", "water_fraction",
             "disease", "rest", "aqc", "pop", "his", "hly"]

col_map = {
    "id": lambda label, rec: label_to_id[label],
    "lon": lambda label, rec: rec.get("lon"),
    "lat": lambda label, rec: rec.get("lat"),
    "depth": lambda label, rec: rec.get("depth_median"),
    "water_fraction": lambda label, rec: rec.get("water_fraction"),
    "disease": lambda label, rec: rec.get("dss_count", 0.0),
    "rest": lambda label, rec: rec.get("rst_count", 0.0),
    "aqc": lambda label, rec: rec.get("aqc_count", 0.0),
    "pop": lambda label, rec: rec.get("pop_count", 0.0),
    "his": lambda label, rec: rec.get("his_count", 0.0),
    "hly": lambda label, rec: rec.get("hly_count", 0.0),
}

rows = {col: {} for col in meta_cols}
for label in sorted_labels:
    rec = hex_data[label]
    hex_id = label_to_id[label]
    for col, fn in col_map.items():
        rows[col][str(hex_id)] = fn(label, rec)

out_path = OUT_DIR / "meta.json"
with open(out_path, "w") as f:
    json.dump(rows, f)
print(f"Written: {out_path}")
print(f"Columns: {list(rows.keys())}")
print(f"Sample id=0: { {k: rows[k]['0'] for k in meta_cols} }")

Written: ../../database/data/meta.json
Columns: ['id', 'lon', 'lat', 'depth', 'water_fraction', 'disease', 'rest', 'aqc', 'pop', 'his', 'hly']
Sample id=0: {'id': 0, 'lon': -3.215781689279032, 'lat': 51.46450408760938, 'depth': 5.0, 'water_fraction': 0.22619047619047622, 'disease': 0.0, 'rest': 0.0, 'aqc': 0.0, 'pop': 1.0, 'his': 0.0, 'hly': 0.0}


In [6]:
# Save label → id mapping for use in notebook 03
# Keys are decoded to str (byte strings → regular strings)
mapping_path = OUT_DIR / "hex_label_to_id.json"
with open(mapping_path, "w") as f:
    json.dump({label.decode(): idx for label, idx in label_to_id.items()}, f)
print(f"Written: {mapping_path} ({len(label_to_id)} entries)")

Written: ../../database/data/hex_label_to_id.json (8425 entries)
