In [None]:
# Script used to compute multiscale pyramids from the comparison files (AE) and save them as Zarr files

In [None]:
from ndpyramid import pyramid_reproject
import xarray as xr
import dask
import numpy as np

In [None]:
# Repeat the script for each file

In [None]:
dsn = xr.open_dataset("Comparison Metrics/20240701/20240701_AE_marsfc.nc")

# stack all weather variables into a single DataArray with a new band dimension

# List of variables to stack
var_names = ["u10", "v10", "t2m", "msl", "q"] # tp - put tp back in

# Stack all variables into one DataArray with a new 'band' dimension
stacked = xr.concat([dsn[var] for var in var_names], dim="band")
stacked = stacked.assign_coords(band=("band", var_names))

# Rename the stacked variable
stacked.name = "climate"

# Drop the original variables from the dataset (they're now stacked)
new_dsn = dsn.drop_vars(var_names)

# Add the stacked DataArray back in
new_dsn["climate"] = stacked

# Reorder dimensions if needed
new_dsn["climate"] = new_dsn["climate"].transpose("band", "time", "latitude", "longitude")

In [None]:
# Specify which bands to keep, e.g., keep only 'q' and 't2m'
bands_to_keep = ['q', 't2m']
new_dsn = new_dsn.sel(band=bands_to_keep)

# Reassign the band coordinate with dtype '<U4'
fixed_band = np.array(new_dsn.band.values, dtype='U4')
new_dsn = new_dsn.assign_coords(band=fixed_band)

# Replace time values with simple float indices
new_dsn = new_dsn.assign_coords(time=np.arange(len(new_dsn.time)).astype("float64")) 

# Drop unwanted coordinate variables
vars_to_drop = ['number', 'expver', 'step', 'meanSea', 'surface', 'number', 'expver']
existing_vars = [var for var in vars_to_drop if var in new_dsn.variables]
new_dsn = new_dsn.drop_vars(existing_vars)

# Step 2: Rename dimensions
new_dsn = new_dsn.rename({"latitude": "y", "longitude": "x"})

# assign coordinate system
if not new_dsn.rio.crs:
    new_dsn = new_dsn.rio.write_crs("EPSG:4326")

In [None]:
# Create pyramids

In [None]:
VERSION = 2
LEVELS = 6

In [None]:
%%time
dt = pyramid_reproject(
    new_dsn, levels=LEVELS, extra_dim="band", other_chunks={"band": 2, "time": 41}, clear_attrs=True
)

In [None]:
# write the pyramid to zarr
dt.to_zarr("Pyramids/20240701/20240701_AE_marsfc.zarr", consolidated=True, mode="w")