In [9]:
import coiled

cluster = coiled.Cluster(n_workers=2, backend_options={"region_name": "us-east-1"})
cluster.adapt(minimum=10, maximum=200)
client = cluster.get_client()

Output()

Output()

2023-08-18 14:55:25,648 - distributed.deploy.adaptive - INFO - Adaptive scaling started: minimum=10 maximum=200


In [10]:
client

0,1
Connection method: Cluster object,Cluster type: coiled.Cluster
Dashboard: https://cluster-urcde.dask.host/kojzlHq1OfviOQsz/status,

0,1
Dashboard: https://cluster-urcde.dask.host/kojzlHq1OfviOQsz/status,Workers: 2
Total threads: 8,Total memory: 29.70 GiB

0,1
Comm: tls://10.0.87.243:8786,Workers: 2
Dashboard: http://10.0.87.243:8787/status,Total threads: 8
Started: Just now,Total memory: 29.70 GiB

0,1
Comm: tls://10.0.85.105:34805,Total threads: 4
Dashboard: http://10.0.85.105:8787/status,Memory: 14.86 GiB
Nanny: tls://10.0.85.105:46383,
Local directory: /scratch/dask-scratch-space/worker-q0yx7fgr,Local directory: /scratch/dask-scratch-space/worker-q0yx7fgr

0,1
Comm: tls://10.0.85.186:43161,Total threads: 4
Dashboard: http://10.0.85.186:8787/status,Memory: 14.84 GiB
Nanny: tls://10.0.85.186:39373,
Local directory: /scratch/dask-scratch-space/worker-pqioexda,Local directory: /scratch/dask-scratch-space/worker-pqioexda


## Setup

In [1]:
%load_ext watermark

import dask
import distributed
import flox  # make sure its available
import fsspec
import numpy as np
import xarray as xr
import zarr

xr.set_options(
    display_expand_attrs=False,
    display_expand_coords=False,
    display_expand_data=True,
)

%watermark -iv

zarr       : 2.15.0
fsspec     : 2023.6.0
flox       : 0.7.2
numpy      : 1.25.2
xarray     : 2023.6.0
distributed: 2023.8.0
dask       : 2023.8.0



## Load NWM data

In [6]:
ds = xr.open_dataset(
    fsspec.get_mapper(
        "s3://noaa-nwm-retrospective-2-1-zarr-pds/chrtout.zarr", anon=True
    ),
    consolidated=True,
    inline_array=True,
    engine="zarr",
    chunks={},
)
ds

Unnamed: 0,Array,Chunk
Bytes,10.59 MiB,10.59 MiB
Shape,"(2776738,)","(2776738,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.59 MiB 10.59 MiB Shape (2776738,) (2776738,) Dask graph 1 chunks in 1 graph layer Data type float32 numpy.ndarray",2776738  1,

Unnamed: 0,Array,Chunk
Bytes,10.59 MiB,10.59 MiB
Shape,"(2776738,)","(2776738,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,39.72 MiB,39.72 MiB
Shape,"(2776738,)","(2776738,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,|S15 numpy.ndarray,|S15 numpy.ndarray
"Array Chunk Bytes 39.72 MiB 39.72 MiB Shape (2776738,) (2776738,) Dask graph 1 chunks in 1 graph layer Data type |S15 numpy.ndarray",2776738  1,

Unnamed: 0,Array,Chunk
Bytes,39.72 MiB,39.72 MiB
Shape,"(2776738,)","(2776738,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,|S15 numpy.ndarray,|S15 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.59 MiB,10.59 MiB
Shape,"(2776738,)","(2776738,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.59 MiB 10.59 MiB Shape (2776738,) (2776738,) Dask graph 1 chunks in 1 graph layer Data type float32 numpy.ndarray",2776738  1,

Unnamed: 0,Array,Chunk
Bytes,10.59 MiB,10.59 MiB
Shape,"(2776738,)","(2776738,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.59 MiB,10.59 MiB
Shape,"(2776738,)","(2776738,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.59 MiB 10.59 MiB Shape (2776738,) (2776738,) Dask graph 1 chunks in 1 graph layer Data type float32 numpy.ndarray",2776738  1,

Unnamed: 0,Array,Chunk
Bytes,10.59 MiB,10.59 MiB
Shape,"(2776738,)","(2776738,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.59 MiB,10.59 MiB
Shape,"(2776738,)","(2776738,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,int32 numpy.ndarray,int32 numpy.ndarray
"Array Chunk Bytes 10.59 MiB 10.59 MiB Shape (2776738,) (2776738,) Dask graph 1 chunks in 1 graph layer Data type int32 numpy.ndarray",2776738  1,

Unnamed: 0,Array,Chunk
Bytes,10.59 MiB,10.59 MiB
Shape,"(2776738,)","(2776738,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,int32 numpy.ndarray,int32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,7.42 TiB,153.81 MiB
Shape,"(367439, 2776738)","(672, 30000)"
Dask graph,50871 chunks in 1 graph layer,50871 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 7.42 TiB 153.81 MiB Shape (367439, 2776738) (672, 30000) Dask graph 50871 chunks in 1 graph layer Data type float64 numpy.ndarray",2776738  367439,

Unnamed: 0,Array,Chunk
Bytes,7.42 TiB,153.81 MiB
Shape,"(367439, 2776738)","(672, 30000)"
Dask graph,50871 chunks in 1 graph layer,50871 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,7.42 TiB,153.81 MiB
Shape,"(367439, 2776738)","(672, 30000)"
Dask graph,50871 chunks in 1 graph layer,50871 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 7.42 TiB 153.81 MiB Shape (367439, 2776738) (672, 30000) Dask graph 50871 chunks in 1 graph layer Data type float64 numpy.ndarray",2776738  367439,

Unnamed: 0,Array,Chunk
Bytes,7.42 TiB,153.81 MiB
Shape,"(367439, 2776738)","(672, 30000)"
Dask graph,50871 chunks in 1 graph layer,50871 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [7]:
resampler = ds.resample(time="A-SEP", label="right", skipna=True)

## Rechunk to begin new chunks at group boundaries

## Resample

In [8]:
%%time

# Get the maximum value for each data variable in the dataset
ds_WY_max = resampler.max()
ds_WY_max = ds_WY_max.rename(
    {"streamflow": "streamflow_max", "velocity": "velocity_max"}
)

# Get the minimum value for each data variable in the dataset
ds_WY_min = resampler.min()
ds_WY_min = ds_WY_min.rename(
    {"streamflow": "streamflow_min", "velocity": "velocity_min"}
)

ds_out = xr.merge([ds_WY_max, ds_WY_min], compat="override")
ds_out

CPU times: user 4.38 s, sys: 205 ms, total: 4.58 s
Wall time: 5.86 s


Unnamed: 0,Array,Chunk
Bytes,39.72 MiB,39.72 MiB
Shape,"(2776738,)","(2776738,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,|S15 numpy.ndarray,|S15 numpy.ndarray
"Array Chunk Bytes 39.72 MiB 39.72 MiB Shape (2776738,) (2776738,) Dask graph 1 chunks in 1 graph layer Data type |S15 numpy.ndarray",2776738  1,

Unnamed: 0,Array,Chunk
Bytes,39.72 MiB,39.72 MiB
Shape,"(2776738,)","(2776738,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,|S15 numpy.ndarray,|S15 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.59 MiB,10.59 MiB
Shape,"(2776738,)","(2776738,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.59 MiB 10.59 MiB Shape (2776738,) (2776738,) Dask graph 1 chunks in 1 graph layer Data type float32 numpy.ndarray",2776738  1,

Unnamed: 0,Array,Chunk
Bytes,10.59 MiB,10.59 MiB
Shape,"(2776738,)","(2776738,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.59 MiB,10.59 MiB
Shape,"(2776738,)","(2776738,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.59 MiB 10.59 MiB Shape (2776738,) (2776738,) Dask graph 1 chunks in 1 graph layer Data type float32 numpy.ndarray",2776738  1,

Unnamed: 0,Array,Chunk
Bytes,10.59 MiB,10.59 MiB
Shape,"(2776738,)","(2776738,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.59 MiB,10.59 MiB
Shape,"(2776738,)","(2776738,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,int32 numpy.ndarray,int32 numpy.ndarray
"Array Chunk Bytes 10.59 MiB 10.59 MiB Shape (2776738,) (2776738,) Dask graph 1 chunks in 1 graph layer Data type int32 numpy.ndarray",2776738  1,

Unnamed: 0,Array,Chunk
Bytes,10.59 MiB,10.59 MiB
Shape,"(2776738,)","(2776738,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,int32 numpy.ndarray,int32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.59 MiB,10.59 MiB
Shape,"(2776738,)","(2776738,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.59 MiB 10.59 MiB Shape (2776738,) (2776738,) Dask graph 1 chunks in 1 graph layer Data type float32 numpy.ndarray",2776738  1,

Unnamed: 0,Array,Chunk
Bytes,10.59 MiB,10.59 MiB
Shape,"(2776738,)","(2776738,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,910.95 MiB,234.38 kiB
Shape,"(43, 2776738)","(1, 30000)"
Dask graph,3999 chunks in 178 graph layers,3999 chunks in 178 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 910.95 MiB 234.38 kiB Shape (43, 2776738) (1, 30000) Dask graph 3999 chunks in 178 graph layers Data type float64 numpy.ndarray",2776738  43,

Unnamed: 0,Array,Chunk
Bytes,910.95 MiB,234.38 kiB
Shape,"(43, 2776738)","(1, 30000)"
Dask graph,3999 chunks in 178 graph layers,3999 chunks in 178 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,910.95 MiB,234.38 kiB
Shape,"(43, 2776738)","(1, 30000)"
Dask graph,3999 chunks in 178 graph layers,3999 chunks in 178 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 910.95 MiB 234.38 kiB Shape (43, 2776738) (1, 30000) Dask graph 3999 chunks in 178 graph layers Data type float64 numpy.ndarray",2776738  43,

Unnamed: 0,Array,Chunk
Bytes,910.95 MiB,234.38 kiB
Shape,"(43, 2776738)","(1, 30000)"
Dask graph,3999 chunks in 178 graph layers,3999 chunks in 178 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,910.95 MiB,234.38 kiB
Shape,"(43, 2776738)","(1, 30000)"
Dask graph,3999 chunks in 178 graph layers,3999 chunks in 178 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 910.95 MiB 234.38 kiB Shape (43, 2776738) (1, 30000) Dask graph 3999 chunks in 178 graph layers Data type float64 numpy.ndarray",2776738  43,

Unnamed: 0,Array,Chunk
Bytes,910.95 MiB,234.38 kiB
Shape,"(43, 2776738)","(1, 30000)"
Dask graph,3999 chunks in 178 graph layers,3999 chunks in 178 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,910.95 MiB,234.38 kiB
Shape,"(43, 2776738)","(1, 30000)"
Dask graph,3999 chunks in 178 graph layers,3999 chunks in 178 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 910.95 MiB 234.38 kiB Shape (43, 2776738) (1, 30000) Dask graph 3999 chunks in 178 graph layers Data type float64 numpy.ndarray",2776738  43,

Unnamed: 0,Array,Chunk
Bytes,910.95 MiB,234.38 kiB
Shape,"(43, 2776738)","(1, 30000)"
Dask graph,3999 chunks in 178 graph layers,3999 chunks in 178 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [None]:
ds_out.to_zarr("s3://cherian-scratch/nwm-streamflow-water-year.zarr", mode="w")