# Prepare versions of NWM Reanalysis dataset in Zarr format

* Save a subset of NWM reanalysis covering a single HUC2 for testing purposes. The size is supposed to be not so small that it is trivial, but not so big that things take a really long time to run. 
* Rechunk this subset so we have versions with rectangular chunks, row chunks, and column chunks.|

## Setup

In [3]:
import json
from os.path import basename, join
import os 
import shutil
from os import makedirs

from dask.distributed import Client
from dask_gateway import Gateway
import xarray as xr
import fsspec
from rechunker import rechunk
import s3fs
import fsspec

%matplotlib inline

def get_json(uri):
    with fsspec.open(uri) as fd:
        return json.load(fd)

In [4]:
# Connect to existing cluster using cluster.name

# This constant needs to be set!
cluster_name = ''
gateway = Gateway()
cluster = gateway.connect(cluster_name)
client = cluster.get_client()

KeyError: 'start_time'

In [40]:
# Set various URIs.

# The CHRTOUT data from the NWM Retrospective Zarr 2.1 dataset
# This has "Streamflow values at points associated with flow lines" 
# See https://registry.opendata.aws/nwm-archive/
nwm_uri = 's3://noaa-nwm-retrospective-2-1-zarr-pds/chrtout.zarr'

# URI of JSON file with COMIDS to use for creating subset of NWM. 
# This was produced by save_huc2_comids.ipynb.
comid_uri = 's3://azavea-noaa-hydro-data/noaa/huc2-comids.json'

# This root URI should be updated for each run of this notebook.
out_root_uri = 's3://azavea-noaa-hydro-data/esip-experiments/datasets/reanalysis-chrtout/zarr/lf/07-08-2022a'

In [None]:
nwm_subset_uri = join(out_root_uri, 'nwm-subset.zarr')
rechunk_tmp_uri = join(out_root_uri, 'rechunk-tmp.zarr')
trans_chunk_uri = join(out_root_uri, 'trans-chunk.zarr')

## Save subet of NWM

In [41]:
ds = xr.open_zarr(fsspec.get_mapper(nwm_uri, anon=True, requester_pays=True))

comids = get_json(comid_uri)['comids']

avail_comids = list(set(ds.feature_id.values).intersection(set(comids)))
# Need the following line to sort the comids or will get the following warning.
# PerformanceWarning: Slicing with an out-of-order index is generating 736 times more chunks
avail_comids.sort()
print(
    f'There are {len(comids)} reaches in the HUC and {len(avail_comids)} of those are in NWM.')

There are 137288 reaches in the HUC and 122256 of those are in NWM.


In [42]:
# time_range = slice('01-01-1990', '01-01-2000')
time_range = slice('01-01-1990', '01-01-2000')

In [43]:
ds.streamflow

Unnamed: 0,Array,Chunk
Bytes,7.42 TiB,153.81 MiB
Shape,"(367439, 2776738)","(672, 30000)"
Count,50872 Tasks,50871 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 7.42 TiB 153.81 MiB Shape (367439, 2776738) (672, 30000) Count 50872 Tasks 50871 Chunks Type float64 numpy.ndarray",2776738  367439,

Unnamed: 0,Array,Chunk
Bytes,7.42 TiB,153.81 MiB
Shape,"(367439, 2776738)","(672, 30000)"
Count,50872 Tasks,50871 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.59 MiB,10.59 MiB
Shape,"(2776738,)","(2776738,)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 10.59 MiB 10.59 MiB Shape (2776738,) (2776738,) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",2776738  1,

Unnamed: 0,Array,Chunk
Bytes,10.59 MiB,10.59 MiB
Shape,"(2776738,)","(2776738,)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,39.72 MiB,39.72 MiB
Shape,"(2776738,)","(2776738,)"
Count,2 Tasks,1 Chunks
Type,|S15,numpy.ndarray
"Array Chunk Bytes 39.72 MiB 39.72 MiB Shape (2776738,) (2776738,) Count 2 Tasks 1 Chunks Type |S15 numpy.ndarray",2776738  1,

Unnamed: 0,Array,Chunk
Bytes,39.72 MiB,39.72 MiB
Shape,"(2776738,)","(2776738,)"
Count,2 Tasks,1 Chunks
Type,|S15,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.59 MiB,10.59 MiB
Shape,"(2776738,)","(2776738,)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 10.59 MiB 10.59 MiB Shape (2776738,) (2776738,) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",2776738  1,

Unnamed: 0,Array,Chunk
Bytes,10.59 MiB,10.59 MiB
Shape,"(2776738,)","(2776738,)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.59 MiB,10.59 MiB
Shape,"(2776738,)","(2776738,)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 10.59 MiB 10.59 MiB Shape (2776738,) (2776738,) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",2776738  1,

Unnamed: 0,Array,Chunk
Bytes,10.59 MiB,10.59 MiB
Shape,"(2776738,)","(2776738,)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.59 MiB,10.59 MiB
Shape,"(2776738,)","(2776738,)"
Count,2 Tasks,1 Chunks
Type,int32,numpy.ndarray
"Array Chunk Bytes 10.59 MiB 10.59 MiB Shape (2776738,) (2776738,) Count 2 Tasks 1 Chunks Type int32 numpy.ndarray",2776738  1,

Unnamed: 0,Array,Chunk
Bytes,10.59 MiB,10.59 MiB
Shape,"(2776738,)","(2776738,)"
Count,2 Tasks,1 Chunks
Type,int32,numpy.ndarray


In [44]:
sub_ds = ds.sel(time=time_range, feature_id=avail_comids)

# Fails without this block. See https://github.com/pydata/xarray/issues/5219 
def remove_chunks_encoding(ds):
    for var in list(ds.keys()) + list(ds.coords):
        if 'chunks' in ds[var].encoding:
            del ds[var].encoding['chunks']
    return ds

sub_ds = remove_chunks_encoding(sub_ds)
sub_ds

    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,477.56 kiB
Shape,"(122256,)","(122256,)"
Count,3 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 477.56 kiB 477.56 kiB Shape (122256,) (122256,) Count 3 Tasks 1 Chunks Type float32 numpy.ndarray",122256  1,

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,477.56 kiB
Shape,"(122256,)","(122256,)"
Count,3 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.75 MiB,1.75 MiB
Shape,"(122256,)","(122256,)"
Count,3 Tasks,1 Chunks
Type,|S15,numpy.ndarray
"Array Chunk Bytes 1.75 MiB 1.75 MiB Shape (122256,) (122256,) Count 3 Tasks 1 Chunks Type |S15 numpy.ndarray",122256  1,

Unnamed: 0,Array,Chunk
Bytes,1.75 MiB,1.75 MiB
Shape,"(122256,)","(122256,)"
Count,3 Tasks,1 Chunks
Type,|S15,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,477.56 kiB
Shape,"(122256,)","(122256,)"
Count,3 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 477.56 kiB 477.56 kiB Shape (122256,) (122256,) Count 3 Tasks 1 Chunks Type float32 numpy.ndarray",122256  1,

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,477.56 kiB
Shape,"(122256,)","(122256,)"
Count,3 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,477.56 kiB
Shape,"(122256,)","(122256,)"
Count,3 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 477.56 kiB 477.56 kiB Shape (122256,) (122256,) Count 3 Tasks 1 Chunks Type float32 numpy.ndarray",122256  1,

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,477.56 kiB
Shape,"(122256,)","(122256,)"
Count,3 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,477.56 kiB
Shape,"(122256,)","(122256,)"
Count,3 Tasks,1 Chunks
Type,int32,numpy.ndarray
"Array Chunk Bytes 477.56 kiB 477.56 kiB Shape (122256,) (122256,) Count 3 Tasks 1 Chunks Type int32 numpy.ndarray",122256  1,

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,477.56 kiB
Shape,"(122256,)","(122256,)"
Count,3 Tasks,1 Chunks
Type,int32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,79.86 GiB,123.78 MiB
Shape,"(87672, 122256)","(672, 24143)"
Count,66592 Tasks,3537 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 79.86 GiB 123.78 MiB Shape (87672, 122256) (672, 24143) Count 66592 Tasks 3537 Chunks Type float64 numpy.ndarray",122256  87672,

Unnamed: 0,Array,Chunk
Bytes,79.86 GiB,123.78 MiB
Shape,"(87672, 122256)","(672, 24143)"
Count,66592 Tasks,3537 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,79.86 GiB,123.78 MiB
Shape,"(87672, 122256)","(672, 24143)"
Count,66592 Tasks,3537 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 79.86 GiB 123.78 MiB Shape (87672, 122256) (672, 24143) Count 66592 Tasks 3537 Chunks Type float64 numpy.ndarray",122256  87672,

Unnamed: 0,Array,Chunk
Bytes,79.86 GiB,123.78 MiB
Shape,"(87672, 122256)","(672, 24143)"
Count,66592 Tasks,3537 Chunks
Type,float64,numpy.ndarray


In [45]:
sub_ds.streamflow

Unnamed: 0,Array,Chunk
Bytes,79.86 GiB,123.78 MiB
Shape,"(87672, 122256)","(672, 24143)"
Count,66592 Tasks,3537 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 79.86 GiB 123.78 MiB Shape (87672, 122256) (672, 24143) Count 66592 Tasks 3537 Chunks Type float64 numpy.ndarray",122256  87672,

Unnamed: 0,Array,Chunk
Bytes,79.86 GiB,123.78 MiB
Shape,"(87672, 122256)","(672, 24143)"
Count,66592 Tasks,3537 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,477.56 kiB
Shape,"(122256,)","(122256,)"
Count,3 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 477.56 kiB 477.56 kiB Shape (122256,) (122256,) Count 3 Tasks 1 Chunks Type float32 numpy.ndarray",122256  1,

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,477.56 kiB
Shape,"(122256,)","(122256,)"
Count,3 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.75 MiB,1.75 MiB
Shape,"(122256,)","(122256,)"
Count,3 Tasks,1 Chunks
Type,|S15,numpy.ndarray
"Array Chunk Bytes 1.75 MiB 1.75 MiB Shape (122256,) (122256,) Count 3 Tasks 1 Chunks Type |S15 numpy.ndarray",122256  1,

Unnamed: 0,Array,Chunk
Bytes,1.75 MiB,1.75 MiB
Shape,"(122256,)","(122256,)"
Count,3 Tasks,1 Chunks
Type,|S15,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,477.56 kiB
Shape,"(122256,)","(122256,)"
Count,3 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 477.56 kiB 477.56 kiB Shape (122256,) (122256,) Count 3 Tasks 1 Chunks Type float32 numpy.ndarray",122256  1,

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,477.56 kiB
Shape,"(122256,)","(122256,)"
Count,3 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,477.56 kiB
Shape,"(122256,)","(122256,)"
Count,3 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 477.56 kiB 477.56 kiB Shape (122256,) (122256,) Count 3 Tasks 1 Chunks Type float32 numpy.ndarray",122256  1,

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,477.56 kiB
Shape,"(122256,)","(122256,)"
Count,3 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,477.56 kiB
Shape,"(122256,)","(122256,)"
Count,3 Tasks,1 Chunks
Type,int32,numpy.ndarray
"Array Chunk Bytes 477.56 kiB 477.56 kiB Shape (122256,) (122256,) Count 3 Tasks 1 Chunks Type int32 numpy.ndarray",122256  1,

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,477.56 kiB
Shape,"(122256,)","(122256,)"
Count,3 Tasks,1 Chunks
Type,int32,numpy.ndarray


In [47]:
# Fails with the following error if chunk(<int>) isn't called.
# ValueError: Zarr requires uniform chunk sizes except for final chunk. Variable named 'streamflow' has incompatible dask chunks: ((48,), (445, 5348, 802, 2192, 3915, 10509, 12946, 3414, 7504, 87, 13247, 6817, 24143, 2, 11164, 1156, 3632, 499, 1518, 1666, 1633, 8, 3, 4965, 3147, 723, 771)). Consider rechunking using `chunk()`.
sub_ds = remove_chunks_encoding(sub_ds.chunk({'time': 672, 'feature_id': 30000}))
# sub_ds = remove_chunks_encoding(sub_ds.chunk(1000))

In [50]:
sub_ds.streamflow

Unnamed: 0,Array,Chunk
Bytes,79.86 GiB,153.81 MiB
Shape,"(87672, 122256)","(672, 30000)"
Count,75315 Tasks,655 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 79.86 GiB 153.81 MiB Shape (87672, 122256) (672, 30000) Count 75315 Tasks 655 Chunks Type float64 numpy.ndarray",122256  87672,

Unnamed: 0,Array,Chunk
Bytes,79.86 GiB,153.81 MiB
Shape,"(87672, 122256)","(672, 30000)"
Count,75315 Tasks,655 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,117.19 kiB
Shape,"(122256,)","(30000,)"
Count,13 Tasks,5 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 477.56 kiB 117.19 kiB Shape (122256,) (30000,) Count 13 Tasks 5 Chunks Type float32 numpy.ndarray",122256  1,

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,117.19 kiB
Shape,"(122256,)","(30000,)"
Count,13 Tasks,5 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.75 MiB,439.45 kiB
Shape,"(122256,)","(30000,)"
Count,13 Tasks,5 Chunks
Type,|S15,numpy.ndarray
"Array Chunk Bytes 1.75 MiB 439.45 kiB Shape (122256,) (30000,) Count 13 Tasks 5 Chunks Type |S15 numpy.ndarray",122256  1,

Unnamed: 0,Array,Chunk
Bytes,1.75 MiB,439.45 kiB
Shape,"(122256,)","(30000,)"
Count,13 Tasks,5 Chunks
Type,|S15,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,117.19 kiB
Shape,"(122256,)","(30000,)"
Count,13 Tasks,5 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 477.56 kiB 117.19 kiB Shape (122256,) (30000,) Count 13 Tasks 5 Chunks Type float32 numpy.ndarray",122256  1,

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,117.19 kiB
Shape,"(122256,)","(30000,)"
Count,13 Tasks,5 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,117.19 kiB
Shape,"(122256,)","(30000,)"
Count,13 Tasks,5 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 477.56 kiB 117.19 kiB Shape (122256,) (30000,) Count 13 Tasks 5 Chunks Type float32 numpy.ndarray",122256  1,

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,117.19 kiB
Shape,"(122256,)","(30000,)"
Count,13 Tasks,5 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,117.19 kiB
Shape,"(122256,)","(30000,)"
Count,13 Tasks,5 Chunks
Type,int32,numpy.ndarray
"Array Chunk Bytes 477.56 kiB 117.19 kiB Shape (122256,) (30000,) Count 13 Tasks 5 Chunks Type int32 numpy.ndarray",122256  1,

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,117.19 kiB
Shape,"(122256,)","(30000,)"
Count,13 Tasks,5 Chunks
Type,int32,numpy.ndarray


In [51]:
%%time 

sub_ds.to_zarr(nwm_subset_uri)

CPU times: user 9.63 s, sys: 409 ms, total: 10 s
Wall time: 9min 55s


<xarray.backends.zarr.ZarrStore at 0x7fe657e6bac0>

In [52]:
saved_ds = xr.open_zarr(nwm_subset_uri)
saved_ds.streamflow

Unnamed: 0,Array,Chunk
Bytes,79.86 GiB,153.81 MiB
Shape,"(87672, 122256)","(672, 30000)"
Count,656 Tasks,655 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 79.86 GiB 153.81 MiB Shape (87672, 122256) (672, 30000) Count 656 Tasks 655 Chunks Type float64 numpy.ndarray",122256  87672,

Unnamed: 0,Array,Chunk
Bytes,79.86 GiB,153.81 MiB
Shape,"(87672, 122256)","(672, 30000)"
Count,656 Tasks,655 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,117.19 kiB
Shape,"(122256,)","(30000,)"
Count,6 Tasks,5 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 477.56 kiB 117.19 kiB Shape (122256,) (30000,) Count 6 Tasks 5 Chunks Type float32 numpy.ndarray",122256  1,

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,117.19 kiB
Shape,"(122256,)","(30000,)"
Count,6 Tasks,5 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.75 MiB,439.45 kiB
Shape,"(122256,)","(30000,)"
Count,6 Tasks,5 Chunks
Type,|S15,numpy.ndarray
"Array Chunk Bytes 1.75 MiB 439.45 kiB Shape (122256,) (30000,) Count 6 Tasks 5 Chunks Type |S15 numpy.ndarray",122256  1,

Unnamed: 0,Array,Chunk
Bytes,1.75 MiB,439.45 kiB
Shape,"(122256,)","(30000,)"
Count,6 Tasks,5 Chunks
Type,|S15,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,117.19 kiB
Shape,"(122256,)","(30000,)"
Count,6 Tasks,5 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 477.56 kiB 117.19 kiB Shape (122256,) (30000,) Count 6 Tasks 5 Chunks Type float32 numpy.ndarray",122256  1,

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,117.19 kiB
Shape,"(122256,)","(30000,)"
Count,6 Tasks,5 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,117.19 kiB
Shape,"(122256,)","(30000,)"
Count,6 Tasks,5 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 477.56 kiB 117.19 kiB Shape (122256,) (30000,) Count 6 Tasks 5 Chunks Type float32 numpy.ndarray",122256  1,

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,117.19 kiB
Shape,"(122256,)","(30000,)"
Count,6 Tasks,5 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,117.19 kiB
Shape,"(122256,)","(30000,)"
Count,6 Tasks,5 Chunks
Type,int32,numpy.ndarray
"Array Chunk Bytes 477.56 kiB 117.19 kiB Shape (122256,) (30000,) Count 6 Tasks 5 Chunks Type int32 numpy.ndarray",122256  1,

Unnamed: 0,Array,Chunk
Bytes,477.56 kiB,117.19 kiB
Shape,"(122256,)","(30000,)"
Count,6 Tasks,5 Chunks
Type,int32,numpy.ndarray


In [14]:
sub_ds.chunks

Frozen({'feature_id': (1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 256), 'time': (48,)})

## Save rechunked versions of subset of NWM.

In [54]:
def _rechunk(target_chunks, output_uri):
    max_mem = '2GB'
    rm(rechunk_tmp_uri)
    ds = xr.open_zarr(nwm_subset_uri)

    # Note, if you get a ContainsArrayError, you probably need to delete temp_store and target_store first.
    # See https://github.com/pangeo-data/rechunker/issues/78

    targ_store = fsspec.get_mapper(output_uri)
    temp_store = fsspec.get_mapper(rechunk_tmp_uri)
    rechunk_plan = rechunk(ds, target_chunks, max_mem, targ_store, temp_store=temp_store)
    rechunk_plan.execute()

In [55]:
%%time

# Each chunk has dimension that is the transpose of the original chunk size.
trans_chunk_uri = join(out_root_uri, 'trans-chunk.zarr')
target_chunks = {'time': 30000, 'feature_id': 672} 
_rechunk(target_chunks, trans_chunk_uri)

CPU times: user 938 ms, sys: 57.7 ms, total: 995 ms
Wall time: 6min 45s


In [56]:
rechunked_ds = xr.open_zarr(square_chunk_uri)
rechunked_ds.chunks

1. Consolidating metadata in this existing store with zarr.consolidate_metadata().
2. Explicitly setting consolidated=False, to avoid trying to read consolidate metadata, or
3. Explicitly setting consolidated=True, to raise an error in this case instead of falling back to try reading non-consolidated metadata.
  rechunked_ds = xr.open_zarr(square_chunk_uri)


Frozen({'feature_id': (672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 672, 624), 'time': (30000, 30000, 27672)})