# Convert Sigmet files into Zarr

## Imports

In [None]:
import fsspec
import xarray as xr
import numpy as np

from raw2zarr.builder.executor import append_sequential  # Note: iris engine not yet supported in append_parallel
from raw2zarr.builder.builder_utils import get_icechunk_repo

Now let's convert some sigmet files hosted in the [IDEAM](https://registry.opendata.aws/ideam-radares/) AWS bucket.

**Note for CI Testing**: This notebook is configured to process only 2 files when `NOTEBOOK_TEST_FILES=2` environment variable is set (used in GitHub Actions). For full processing, you can manually set a larger number or modify the cell below.

**Note on Processing**: This notebook uses `append_sequential` because the iris engine (for Sigmet files) is not yet supported in `append_parallel`. The parallel version currently only supports NEXRAD files.

In [None]:
radar = "Guaviare"
append_dim = "vcp_time"
engine = "iris"
zarr_format = 3
consolidated = True if zarr_format == 2 else False
zarr_store = f"../zarr/{radar}2.zarr"
query = f"2022/06/05/{radar}/{radar[:3].upper()}"
str_bucket = "s3://s3-radaresideam/l2_data"

# CI Mode Detection
import os
ci_mode = os.environ.get('NOTEBOOK_TEST_FILES', '0') != '0'
if ci_mode:
    print(f"🤖 CI Mode: Processing {os.environ.get('NOTEBOOK_TEST_FILES')} files for testing")
else:
    print("👤 Manual Mode: Processing full dataset")

In [42]:
f"../zarr/{radar}2.zarr"

'../zarr/Guaviare2.zarr'

In [43]:
f"{str_bucket}{query}*"

's3://s3-radaresideam/l2_data2022/06/05/Guaviare/GUA*'

In [44]:
fs = fsspec.filesystem("s3", anon=True)
radar_files = [f"s3://{i}" for i in sorted(fs.glob(f"{str_bucket}/{query}*"))]

In [45]:
len(radar_files)

1141

In [6]:
# ?append_files

In [None]:
# For CI testing: use only 1 file to keep execution time under 30 seconds
# For full processing: change to radar_files[550:600] or desired range
import os
num_files = int(os.environ.get('NOTEBOOK_TEST_FILES', '2'))  # CI uses 2, manual use can override
test_files = radar_files[:num_files]

print(f"Processing {len(test_files)} files for demonstration")

# Initialize icechunk repository
repo = get_icechunk_repo(zarr_store)

# Note: Using append_sequential for iris engine (Sigmet files)
# append_parallel currently only supports nexradlevel2 engine
append_sequential(
    radar_files=test_files,
    append_dim=append_dim,
    repo=repo,
    zarr_format=zarr_format,
    engine=engine,
)

## Read the radar datatree store in Zarr format

In [19]:
!ls ../zarr/Guaviare2.zarr

PRECA  PRECB  PRECC  SURVP  zarr.json


In [None]:
# Only try to read the store if it exists and has content (skip in CI mode with limited files)
import os
try:
    if os.path.exists(zarr_store) and len(os.listdir(zarr_store)) > 1:  # More than just zarr.json
        dt_radar = xr.open_datatree(
            zarr_store, 
            engine="zarr", 
            consolidated=False, 
            zarr_format=3, 
            chunks={}
        )
        print("✅ Zarr store loaded successfully")
    else:
        print("⚠️  Zarr store empty or minimal (expected in CI mode) - skipping read operations")
        dt_radar = None
except Exception as e:
    print(f"⚠️  Could not read zarr store (expected in CI mode): {e}")
    dt_radar = None

In [None]:
if dt_radar is not None:
    dt_radar["/SURVP/sweep_0/DBZH"]
else:
    print("📝 Zarr reading skipped - this is normal in CI testing mode")

In [None]:
if dt_radar is not None:
    list(dt_radar.children)
else:
    print("📝 Zarr reading skipped - this is normal in CI testing mode")

We can now access each sweep by using a key-value method. Let's check the lowest elevation angle

In [None]:
if dt_radar is not None:
    ds_05 = dt_radar["SURVP/sweep_0"].ds
    display(ds_05)
else:
    print("📝 Zarr reading skipped - this is normal in CI testing mode")

Before creating a radar plot we need to georeference the dataset. This can be done using `xradar.georeference` module

Now we can create a radial plot

In [None]:
if dt_radar is not None and "SURVP/sweep_0" in dt_radar:
    ds_05.isel(vcp_time=0).DBZH.plot(
        x="x", 
        y="y", 
        cmap="ChaseSpectral", 
        vmin=-10, 
        vmax=50
    )
else:
    print("📝 Plotting skipped - this is normal in CI testing mode")