# Visualize Lambda Production Results

This notebook reads the output Zarr store from the Lambda production run and visualizes them using xdggs/xarray.

**Input:** Zarr store at `s3://{bucket}/{prefix}`

**Columns in each parquet file:**
- `child_morton`: Morton index at order 12
- `child_healpix`: HEALPix cell ID at order 12
- `count`: Number of observations
- `mean`: Weighted mean elevation
- `sigma`: Uncertainty in mean
- `min`, `max`: Elevation range
- `variance`, `q25`, `q50`, `q75`: Distribution stats

## 1. Imports and Configuration

In [1]:
import warnings

import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
import xdggs
from obstore.store import S3Store
from zarr import config, open_group
from zarr.storage import ObjectStore

warnings.filterwarnings("ignore")

print("Imports complete")

Imports complete


In [2]:
# Configuration
S3_BUCKET = "astera-englacial"
S3_PREFIX = "atl06/production.zarr"
CHILD_ORDER = 12

print(f"Reading from: s3://{S3_BUCKET}/{S3_PREFIX}/")

Reading from: s3://astera-englacial/atl06/production.zarr/


## 2. Load Zarr

In [3]:
config.set({"async.concurrency": 10})

s3_store = S3Store(
    S3_BUCKET,
    prefix=S3_PREFIX,
    region="us-west-2",
    skip_signature=True,
)
store = ObjectStore(store=s3_store, read_only=True)

In [4]:
store

ObjectStore(object_store://S3Store(bucket="astera-englacial", prefix="atl06/production.zarr"))

In [5]:
group = open_group(store, zarr_format=3, use_consolidated=True, mode="r")

In [6]:
%%time
ds = xr.open_dataset(
    store,
    engine="zarr",
    consolidated=True,
    zarr_format=3,
    group=str(CHILD_ORDER),
    chunked_array_type="cubed",
    chunks="auto",
)
ds

CPU times: user 39.2 s, sys: 18.8 s, total: 58 s
Wall time: 2min 38s


KeyboardInterrupt: 

In [None]:
ds

In [None]:
%%time
ds = ds.set_coords("cell_ids")
ds_decoded = ds.dggs.decode(convention="zarr")

In [9]:
# Decode with xdggs to enable DGGS operations
ds = xdggs.decode(ds, index_options={"index_kind": "moc"})

# Add lat/lon coordinates
ds = ds.dggs.assign_latlon_coords()

print("Decoded with xdggs and added lat/lon coords:")
print(ds)

KeyError: 'grid_name'

## 4. Summary Statistics

In [None]:
print("=" * 60)
print("SUMMARY STATISTICS")
print("=" * 60)

print(f"\nTotal cells with data: {len(ds['cell_ids']):,}")
print(f"Total observations: {ds['count'].sum().values:,}")

print("\nElevation (h_mean):")
print(f"  Min:  {ds['h_mean'].min().values:.2f} m")
print(f"  Max:  {ds['h_mean'].max().values:.2f} m")
print(f"  Mean: {ds['h_mean'].mean().values:.2f} m")
print(f"  Std:  {ds['h_mean'].std().values:.2f} m")

print("\nUncertainty (h_sigma):")
print(f"  Min:  {ds['h_sigma'].min().values:.4f} m")
print(f"  Max:  {ds['h_sigma'].max().values:.2f} m")
print(f"  Mean: {ds['h_sigma'].mean().values:.4f} m")

print("\nObservation counts per cell:")
print(f"  Min:  {ds['count'].min().values}")
print(f"  Max:  {ds['count'].max().values:,}")
print(f"  Mean: {ds['count'].mean().values:.1f}")

print("\nCoverage:")
print(f"  Lat: {ds['latitude'].min().values:.2f} to {ds['latitude'].max().values:.2f}")
print(f"  Lon: {ds['longitude'].min().values:.2f} to {ds['longitude'].max().values:.2f}")

## 5. Visualization - Antarctic Overview

In [None]:
# Antarctic Polar Stereographic projection
proj = ccrs.SouthPolarStereo()
data_crs = ccrs.PlateCarree()

fig, axes = plt.subplots(2, 2, figsize=(18, 16), subplot_kw={"projection": proj})

# Add Antarctic coastline to all subplots
for ax in axes.flat:
    ax.coastlines(resolution="50m", linewidth=0.5)
    ax.add_feature(cfeature.LAND, facecolor="lightgray", alpha=0.3)
    ax.gridlines(draw_labels=False, alpha=0.3)
    ax.set_extent([-180, 180, -90, -60], crs=data_crs)

# 1. Mean elevation
ax = axes[0, 0]
valid = ~np.isnan(ds["h_mean"].values)
scatter = ax.scatter(
    ds["longitude"].values[valid],
    ds["latitude"].values[valid],
    c=ds["h_mean"].values[valid],
    s=0.5,
    cmap="terrain",
    alpha=0.8,
    vmin=0,
    vmax=4000,
    transform=data_crs,
)
ax.set_title(f"Mean Elevation ({np.sum(valid):,} cells)", fontsize=14, weight="bold")
plt.colorbar(scatter, ax=ax, label="Elevation (m)", shrink=0.7)

# 2. Observation count
ax = axes[0, 1]
valid = ds["count"].values > 0
scatter = ax.scatter(
    ds["longitude"].values[valid],
    ds["latitude"].values[valid],
    c=ds["count"].values[valid],
    s=0.5,
    cmap="viridis",
    alpha=0.8,
    norm=plt.matplotlib.colors.LogNorm(vmin=1),
    transform=data_crs,
)
ax.set_title(f"Observation Count ({np.sum(valid):,} cells)", fontsize=14, weight="bold")
plt.colorbar(scatter, ax=ax, label="Count (log scale)", shrink=0.7)

# 3. Uncertainty (sigma)
ax = axes[1, 0]
valid = ~np.isnan(ds["h_sigma"].values)
scatter = ax.scatter(
    ds["longitude"].values[valid],
    ds["latitude"].values[valid],
    c=ds["h_sigma"].values[valid],
    s=0.5,
    cmap="plasma",
    alpha=0.8,
    vmax=1.0,
    transform=data_crs,
)
ax.set_title(f"Uncertainty (h_sigma) ({np.sum(valid):,} cells)", fontsize=14, weight="bold")
plt.colorbar(scatter, ax=ax, label="Uncertainty (m)", shrink=0.7)

# 4. Elevation range (max - min)
ax = axes[1, 1]
h_range = ds["h_max"].values - ds["h_min"].values
valid = ~np.isnan(h_range)
scatter = ax.scatter(
    ds["longitude"].values[valid],
    ds["latitude"].values[valid],
    c=h_range[valid],
    s=0.5,
    cmap="hot",
    alpha=0.8,
    vmax=100,
    transform=data_crs,
)
ax.set_title(f"Elevation Range (max-min) ({np.sum(valid):,} cells)", fontsize=14, weight="bold")
plt.colorbar(scatter, ax=ax, label="Range (m)", shrink=0.7)

plt.suptitle("ATL06 Lambda Production Results - Cycle 22", fontsize=16, weight="bold", y=1.02)
plt.tight_layout()
plt.show()

## 6. Histograms

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Elevation distribution
ax = axes[0, 0]
valid = ~np.isnan(ds["h_mean"].values)
ax.hist(ds["h_mean"].values[valid], bins=100, edgecolor="none", alpha=0.7)
ax.set_xlabel("Elevation (m)")
ax.set_ylabel("Count")
ax.set_title("Mean Elevation Distribution")
ax.axvline(
    ds["h_mean"].mean().values,
    color="red",
    linestyle="--",
    label=f"Mean: {ds['h_mean'].mean().values:.0f}m",
)
ax.legend()

# Observation count distribution (log scale)
ax = axes[0, 1]
valid = ds["count"].values > 0
ax.hist(np.log10(ds["count"].values[valid]), bins=50, edgecolor="none", alpha=0.7)
ax.set_xlabel("log10(Observation Count)")
ax.set_ylabel("Number of Cells")
ax.set_title("Observation Count Distribution")

# Uncertainty distribution
ax = axes[1, 0]
valid = ~np.isnan(ds["h_sigma"].values) & (ds["h_sigma"].values < 10)
ax.hist(ds["h_sigma"].values[valid], bins=100, edgecolor="none", alpha=0.7)
ax.set_xlabel("Uncertainty (m)")
ax.set_ylabel("Count")
ax.set_title("Uncertainty Distribution (h_sigma < 10m)")

# Latitude distribution
ax = axes[1, 1]
ax.hist(ds["latitude"].values, bins=50, edgecolor="none", alpha=0.7)
ax.set_xlabel("Latitude")
ax.set_ylabel("Count")
ax.set_title("Latitude Distribution")

plt.tight_layout()
plt.show()

## 7. Regional Zoom - West Antarctica

In [None]:
# Zoom into West Antarctica (Thwaites/Pine Island region)
proj = ccrs.SouthPolarStereo()
data_crs = ccrs.PlateCarree()

fig, ax = plt.subplots(figsize=(12, 10), subplot_kw={"projection": proj})

ax.coastlines(resolution="10m", linewidth=0.5)
ax.add_feature(cfeature.LAND, facecolor="lightgray", alpha=0.3)
ax.gridlines(draw_labels=True, alpha=0.3)

# West Antarctica extent
ax.set_extent([-140, -70, -85, -70], crs=data_crs)

valid = ~np.isnan(ds["h_mean"].values)
scatter = ax.scatter(
    ds["longitude"].values[valid],
    ds["latitude"].values[valid],
    c=ds["h_mean"].values[valid],
    s=2,
    cmap="terrain",
    alpha=0.9,
    vmin=0,
    vmax=2500,
    transform=data_crs,
)

plt.colorbar(scatter, ax=ax, label="Elevation (m)", shrink=0.7)
ax.set_title(
    "West Antarctica - Mean Elevation (Thwaites/Pine Island Region)", fontsize=14, weight="bold"
)
plt.show()

In [None]:
# Optionally save combined dataset to zarr
# output_path = f"s3://{S3_BUCKET}/{S3_PREFIX}/combined.zarr"
# ds.to_zarr(output_path, mode='w')
# print(f"Saved to: {output_path}")

# Or save locally
# ds.to_zarr("production_results_combined.zarr", mode='w')
# print("Saved to: production_results_combined.zarr")

print("To save the combined dataset, uncomment the code above.")