In [None]:
import numpy as np
import xarray as xr
from affine import Affine

import rioxarray
from rioxarray import Convention

# Create sample data
data = np.random.rand(100, 100)
da = xr.DataArray(
    data,
    dims=["y", "x"],
    coords={
        "x": np.linspace(-180, 180, 100),
        "y": np.linspace(-90, 90, 100)
    }
)

transform = Affine(3.6, 0.0, -180.0, 0.0, -1.8, 90.0)
print("Sample data created")

## CF Convention

The CF convention stores geospatial metadata in grid_mapping coordinate variables.

In [None]:
# Write CRS and transform using CF convention
da_cf = da.rio.write_crs("EPSG:4326", convention=Convention.CF)
da_cf = da_cf.rio.write_transform(transform, convention=Convention.CF)

print("CF Convention attributes:")
print(f"Grid mapping: {da_cf.attrs.get('grid_mapping')}")
print(f"Grid mapping coordinate: {list(da_cf.coords.keys())}")
print(f"Grid mapping attrs: {da_cf.coords['spatial_ref'].attrs.keys()}")
print(f"GeoTransform: {da_cf.coords['spatial_ref'].attrs.get('GeoTransform')}")

## Zarr Conventions

The Zarr conventions store geospatial metadata as direct attributes on the data array.

In [None]:
# Write CRS and transform using Zarr conventions
da_zarr = da.rio.write_crs("EPSG:4326", convention=Convention.Zarr)
da_zarr = da_zarr.rio.write_transform(transform, convention=Convention.Zarr)

print("Zarr Convention attributes:")
print(f"proj:code: {da_zarr.attrs.get('proj:code')}")
print(f"spatial:transform: {da_zarr.attrs.get('spatial:transform')}")
print(f"zarr_conventions: {[c['name'] for c in da_zarr.attrs.get('zarr_conventions', [])]}")

## Zarr-Specific Methods

rioxarray provides specialized methods for working with Zarr conventions.

In [None]:
# Write CRS in multiple Zarr formats using convention module
from rioxarray._convention import zarr as zarr_conv

da_zarr_full = da.rio.write_crs("EPSG:4326", convention=Convention.Zarr)
da_zarr_full = zarr_conv.write_crs(da_zarr_full, da_zarr_full.rio.crs, format="all")

print("Multiple CRS formats:")
print(f"proj:code: {da_zarr_full.attrs.get('proj:code')}")
print(f"proj:wkt2: {da_zarr_full.attrs.get('proj:wkt2')[:50]}...")
print(f"proj:projjson type: {type(da_zarr_full.attrs.get('proj:projjson'))}")

In [None]:
# Write complete spatial metadata using convention module
da_spatial = da.rio.write_transform(transform, convention=Convention.Zarr)
da_spatial = zarr_conv.write_spatial_metadata(da_spatial, "y", "x", transform=transform)

print("Complete spatial metadata:")
print(f"spatial:dimensions: {da_spatial.attrs.get('spatial:dimensions')}")
print(f"spatial:shape: {da_spatial.attrs.get('spatial:shape')}")
print(f"spatial:bbox: {da_spatial.attrs.get('spatial:bbox')}")
print(f"spatial:registration: {da_spatial.attrs.get('spatial:registration')}")

In [None]:
# Write CRS and transform together
da_complete = da.rio.write_crs("EPSG:4326", convention=Convention.Zarr)
da_complete = da_complete.rio.write_transform(transform, convention=Convention.Zarr)

print("Complete Zarr conventions:")
print(f"Has CRS: {'proj:wkt2' in da_complete.attrs}")
print(f"Has transform: {'spatial:transform' in da_complete.attrs}")
print(f"Has dimensions: {'spatial:dimensions' in da_complete.attrs}")
print(f"Number of attributes: {len(da_complete.attrs)}")

## Global Convention Setting

You can set the default convention globally to avoid specifying it for each method call.

In [None]:
# Set Zarr as the global default
with rioxarray.set_options(convention=Convention.Zarr):
    da_global = da.rio.write_crs("EPSG:4326")  # Uses Zarr convention
    da_global = da_global.rio.write_transform(transform)  # Uses Zarr convention
    
    print("Using global Zarr convention:")
    print(f"proj:wkt2: {da_global.attrs.get('proj:wkt2', 'Not found')[:50] if da_global.attrs.get('proj:wkt2') else 'Not found'}...")
    print(f"spatial:transform: {da_global.attrs.get('spatial:transform')}")
    print(f"Has grid_mapping: {'grid_mapping' in da_global.attrs}")

## Reading with Different Conventions

The reading behavior follows this priority: CF first (default), Zarr as fallback when explicitly declared.

In [None]:
# Create data with both conventions
da_both = da.rio.write_crs("EPSG:4326", convention=Convention.CF)
da_both = da_both.rio.write_crs("EPSG:4326", convention=Convention.Zarr)

print("Data with both conventions:")
print(f"Has CF grid_mapping: {'grid_mapping' in da_both.attrs}")
print(f"Has Zarr proj:code: {'proj:wkt2' in da_both.attrs}")

# Default reading (CF first, Zarr fallback)
crs_default = da_both.rio.crs
print(f"\nDefault reading (CF first): {crs_default}")

# Read using CF convention exclusively
with rioxarray.set_options(convention=Convention.CF):
    crs_cf = da_both.rio.crs
    print(f"CF convention only: {crs_cf}")

# Read using Zarr convention exclusively  
with rioxarray.set_options(convention=Convention.Zarr):
    crs_zarr = da_both.rio.crs
    print(f"Zarr convention only: {crs_zarr}")

## Performance Comparison

Zarr conventions can be faster for reading metadata since they use direct attribute access instead of coordinate variable lookups.

In [None]:
import time
import tempfile
import shutil
from pathlib import Path

# Create a temporary directory for test files
temp_dir = Path(tempfile.mkdtemp())

try:
    # Create larger test data 
    large_data = xr.DataArray(
        np.random.rand(2000, 2000),
        dims=["y", "x"],
        coords={
            "x": np.linspace(-180, 180, 2000),
            "y": np.linspace(-90, 90, 2000)
        }
    )
    
    # Add geospatial metadata
    transform = Affine(0.18, 0.0, -180.0, 0.0, -0.18, 90.0)
    
    # Create CF data and write to disk
    cf_data = large_data.rio.write_crs("EPSG:4326", convention=Convention.CF)
    cf_data = cf_data.rio.write_transform(transform, convention=Convention.CF)
    cf_path = temp_dir / "cf_data.zarr"
    cf_data.to_zarr(cf_path)
    
    # Create Zarr data and write to disk  
    zarr_data = large_data.rio.write_crs("EPSG:4326", convention=Convention.Zarr)
    zarr_data = zarr_data.rio.write_transform(transform, convention=Convention.Zarr)
    zarr_path = temp_dir / "zarr_data.zarr"
    zarr_data.to_zarr(zarr_path)
    
    print("Dataset info:")
    print(f"Data shape: {large_data.shape}")
    print(f"CF file size: {sum(f.stat().st_size for f in cf_path.rglob('*') if f.is_file()) / 1024**2:.1f} MB")
    print(f"Zarr file size: {sum(f.stat().st_size for f in zarr_path.rglob('*') if f.is_file()) / 1024**2:.1f} MB")
    
    # Time CF opening and metadata access
    with rioxarray.set_options(convention=Convention.CF):
        # Time opening from disk
        start = time.time()
        for _ in range(20):
            cf_from_disk = xr.open_dataset(cf_path, decode_coords="all")
            cf_array = cf_from_disk[list(cf_from_disk.data_vars.keys())[0]]
            cf_from_disk.close()  # Clean up
        cf_open_time = time.time() - start
        
        # Time metadata access (reopen once for the test)
        cf_from_disk = xr.open_dataset(cf_path, decode_coords="all")
        cf_array = cf_from_disk[list(cf_from_disk.data_vars.keys())[0]]
        start = time.time()
        for _ in range(100):
            _ = cf_array.rio.crs
            _ = cf_array.rio.transform()
        cf_access_time = time.time() - start
        cf_from_disk.close()
    
    # Time Zarr opening and metadata access
    with rioxarray.set_options(convention=Convention.Zarr):
        # Time opening from disk
        start = time.time()
        for _ in range(20):
            zarr_from_disk = xr.open_dataset(zarr_path, decode_coords=True)
            zarr_array = zarr_from_disk[list(zarr_from_disk.data_vars.keys())[0]]
            zarr_from_disk.close()  # Clean up
        zarr_open_time = time.time() - start
        
        # Time metadata access (reopen once for the test)
        zarr_from_disk = xr.open_dataset(zarr_path, decode_coords=True)
        zarr_array = zarr_from_disk[list(zarr_from_disk.data_vars.keys())[0]]
        start = time.time()
        for _ in range(100):
            _ = zarr_array.rio.crs
            _ = zarr_array.rio.transform()
        zarr_access_time = time.time() - start
        zarr_from_disk.close()
    
    print(f"\nPerformance Comparison:")
    print(f"Dataset Opening (20 iterations each):")
    print(f"  CF convention: {cf_open_time:.4f} seconds")
    print(f"  Zarr convention: {zarr_open_time:.4f} seconds")
    print(f"  Opening speedup: {cf_open_time / zarr_open_time:.2f}x")
    
    print(f"\nMetadata Access (100 iterations each):")
    print(f"  CF convention: {cf_access_time:.4f} seconds")
    print(f"  Zarr convention: {zarr_access_time:.4f} seconds")
    print(f"  Access speedup: {cf_access_time / zarr_access_time:.2f}x")
    
    print(f"\nTotal Time (opening + access):")
    print(f"  CF total: {cf_open_time + cf_access_time:.4f} seconds")
    print(f"  Zarr total: {zarr_open_time + zarr_access_time:.4f} seconds")
    print(f"  Overall speedup: {(cf_open_time + cf_access_time) / (zarr_open_time + zarr_access_time):.2f}x")
    
finally:
    # Clean up temporary files
    shutil.rmtree(temp_dir, ignore_errors=True)