# Sentinel-2 RGB to GeoZarr with embedded STAC

This example demonstrates how to:

1. Query Earth-Search for a low-cloud Sentinel-2 L2A scene over Vienna.
2. Stream the 10 m red/green/blue COGs into xarray/rioxarray.
3. Write a consolidated, 512 × 512-chunk GeoZarr dataset.
4. Embed a minimal STAC Item (`proj:code`, `bbox`, `geometry`, `gsd`, etc.) into `root/.zattrs`.
5. Re‐consolidate metadata so that `xr.open_zarr(consolidated=True)` exposes the `stac` block.
6. Coarsen the RGB by 4 × 4 and display a quick preview.

In [None]:
"""Create a STAC-aware GeoZarr RGB tile of a Sentinel-2 scene over Vienna."""

import shutil
from datetime import date
from pathlib import Path

import jsonschema
import matplotlib.pyplot as plt
import pystac_client
import rioxarray as rxr
import xarray as xr
import zarr

# 1. Query Earth-Search for a recent, low-cloud Sentinel-2 L2A scene 
API = "https://earth-search.aws.element84.com/v1"
coll = "sentinel-2-l2a"
bbox = [16.20, 48.10, 16.45, 48.30]  # Vienna
today, last_year = date.today(), date.today().replace(year=date.today().year - 1)
daterange = f"{last_year:%Y-%m-%d}/{today:%Y-%m-%d}"

item = next(
    pystac_client.Client.open(API)
    .search(collections=[coll], bbox=bbox, datetime=daterange,
            query={"eo:cloud_cover": {"lt": 5}}, limit=1)
    .items(),
    None,
)
assert item, "No Sentinel-2 scene found"
print("Scene:", item.id, "cloud", item.properties["eo:cloud_cover"])

# 2. Stack RGB bands lazily 
bands = ["red", "green", "blue"]
rgb = xr.concat(
    [rxr.open_rasterio(item.assets[b].href,
                       chunks={"band": 1, "x": 2048, "y": 2048},
                       masked=True).assign_coords(band=[b])
     for b in bands],
    dim="band",
)
rgb.name = "radiance"
rgb = rgb.rio.write_crs(item.properties["proj:code"])
rgb.attrs["transform"] = list(rgb.rio.transform())

# 3. Write as consolidated GeoZarr (Dataset) 
store = Path(f"{coll}_{'_'.join(bands)}_{item.id}.zarr")
if store.exists():
    shutil.rmtree(store)

(radiance_ds := rgb.drop_vars(["x", "y"]).to_dataset()) \
    .chunk({"y": 512, "x": 512}) \
    .to_zarr(store, mode="w", consolidated=True)

# 4. Embed a minimal STAC Item in .zattrs, then re-consolidate metadata 
gsd = min(item.assets[b].to_dict().get("gsd", 10) for b in bands)
mini = {
    "type": "Item",
    "stac_version": "1.0.0",
    "id": item.id,
    "bbox": item.bbox,
    "geometry": item.geometry,
    "properties": {
        "datetime": item.properties["datetime"],
        "proj:code": item.properties["proj:code"],
        "proj:bbox": item.bbox,
        "platform": item.properties["platform"],
        "instruments": item.properties["instruments"],
        "eo:cloud_cover": item.properties["eo:cloud_cover"],
        "gsd": gsd,
    },
    "assets": {
        "data": {"href": store.name,
                 "type": "application/x-zarr",
                 "roles": ["data"]}
    },
    "license": item.properties.get("license", "proprietary"),
}
jsonschema.validate(mini, {"type": "object", "required": ["type", "id", "assets"]})
root = zarr.open_group(store, mode="a")
root.attrs["stac"] = mini
zarr.convenience.consolidate_metadata(store)  # update .zmetadata

# 5. Re-open and show the STAC block is present 
ds = xr.open_zarr(store, consolidated=True)
print("Embedded STAC:", ds.attrs["stac"])

# 6. Quick preview: 4×4 coarsened RGB 
preview = ds.radiance.coarsen(y=4, x=4, boundary="trim").mean()
plt.imshow(preview.transpose("y", "x", "band").astype("uint8").values)
plt.axis("off")
plt.title("Coarsened Sentinel-2 RGB")
plt.show()