# Inspection of Files


In [1]:
import sys
import fsspec
import xarray as xr
import hvplot.xarray
import zarr

ModuleNotFoundError: No module named 'xarray'

In [7]:
print("Python : ", sys.version)
print("fsspec : ", fsspec.__version__)
print("zarr   : ", zarr.__version__)

Python :  3.10.9 | packaged by conda-forge | (main, Feb  2 2023, 20:20:04) [GCC 11.3.0]
fsspec :  2023.3.0+9.g5920300
zarr   :  2.13.3


In [2]:
fs = fsspec.filesystem('s3', anon=True)

In [3]:
flist = fs.ls('s3://noaa-nwm-retrospective-2-1-pds/')
flist

['noaa-nwm-retrospective-2-1-pds/forcing',
 'noaa-nwm-retrospective-2-1-pds/index.html',
 'noaa-nwm-retrospective-2-1-pds/model_output']

In [4]:
flist = fs.glob('noaa-nwm-retrospective-2-1-pds/model_output/*')
print(flist[0])
print(flist[-1])

noaa-nwm-retrospective-2-1-pds/model_output/1979
noaa-nwm-retrospective-2-1-pds/model_output/2020


In [5]:
flist = fs.glob('noaa-nwm-retrospective-2-1-pds/model_output/1979/*LDAS*')
flist[0]

'noaa-nwm-retrospective-2-1-pds/model_output/1979/197902010300.LDASOUT_DOMAIN1.comp'

In [6]:
flist = fs.glob('noaa-nwm-retrospective-2-1-pds/model_output/2020/*LDAS*')
flist[-1]

'noaa-nwm-retrospective-2-1-pds/model_output/2020/202012312100.LDASOUT_DOMAIN1.comp'

Okay, so at this point we've learned that we have 3-hourly output over roughly 40 years

In [7]:
# %%time
# flist = fs.glob('noaa-nwm-retrospective-2-1-pds/model_output/*/*LDAS*')   # this is slow
40 * 365 * 24 / 3

116800.0

In [8]:
flist[0]

'noaa-nwm-retrospective-2-1-pds/model_output/2020/202001010000.LDASOUT_DOMAIN1.comp'

So about 117,000 NetCDF files! 

Let's check one out.  Although it's not super efficient, we can open a NetCDF file on S3 as a virtual file object with `fs.open(s3_url_of_netcdf_file)`.  If we open a dataset in xarray using `chunks=` we are telling xarray to use Dask, and `chunks={}` means use the native chunking in the NetCDF file

In [9]:
ds = xr.open_dataset(fs.open(flist[0]), chunks={})

In [10]:
ds

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 135.00 MiB 135.00 MiB Shape (1, 3840, 4608) (1, 3840, 4608) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  1,

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 135.00 MiB 135.00 MiB Shape (1, 3840, 4608) (1, 3840, 4608) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  1,

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 135.00 MiB 135.00 MiB Shape (1, 3840, 4608) (1, 3840, 4608) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  1,

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 135.00 MiB 135.00 MiB Shape (1, 3840, 4608) (1, 3840, 4608) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  1,

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 135.00 MiB 135.00 MiB Shape (1, 3840, 4608) (1, 3840, 4608) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  1,

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 135.00 MiB 135.00 MiB Shape (1, 3840, 4608) (1, 3840, 4608) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  1,

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 135.00 MiB 135.00 MiB Shape (1, 3840, 4608) (1, 3840, 4608) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  1,

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 135.00 MiB 135.00 MiB Shape (1, 3840, 4608) (1, 3840, 4608) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  1,

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 135.00 MiB 135.00 MiB Shape (1, 3840, 4608) (1, 3840, 4608) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  1,

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,540.00 MiB,540.00 MiB
Shape,"(1, 3840, 4, 4608)","(1, 3840, 4, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 540.00 MiB 540.00 MiB Shape (1, 3840, 4, 4608) (1, 3840, 4, 4608) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",1  1  4608  4  3840,

Unnamed: 0,Array,Chunk
Bytes,540.00 MiB,540.00 MiB
Shape,"(1, 3840, 4, 4608)","(1, 3840, 4, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,540.00 MiB,540.00 MiB
Shape,"(1, 3840, 4, 4608)","(1, 3840, 4, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 540.00 MiB 540.00 MiB Shape (1, 3840, 4, 4608) (1, 3840, 4, 4608) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",1  1  4608  4  3840,

Unnamed: 0,Array,Chunk
Bytes,540.00 MiB,540.00 MiB
Shape,"(1, 3840, 4, 4608)","(1, 3840, 4, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 135.00 MiB 135.00 MiB Shape (1, 3840, 4608) (1, 3840, 4608) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  1,

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 135.00 MiB 135.00 MiB Shape (1, 3840, 4608) (1, 3840, 4608) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  1,

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 135.00 MiB 135.00 MiB Shape (1, 3840, 4608) (1, 3840, 4608) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  1,

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 135.00 MiB 135.00 MiB Shape (1, 3840, 4608) (1, 3840, 4608) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  1,

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 135.00 MiB 135.00 MiB Shape (1, 3840, 4608) (1, 3840, 4608) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  1,

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 135.00 MiB 135.00 MiB Shape (1, 3840, 4608) (1, 3840, 4608) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  1,

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,270.00 MiB,270.00 MiB
Shape,"(1, 3840, 2, 4608)","(1, 3840, 2, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 270.00 MiB 270.00 MiB Shape (1, 3840, 2, 4608) (1, 3840, 2, 4608) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",1  1  4608  2  3840,

Unnamed: 0,Array,Chunk
Bytes,270.00 MiB,270.00 MiB
Shape,"(1, 3840, 2, 4608)","(1, 3840, 2, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,270.00 MiB,270.00 MiB
Shape,"(1, 3840, 2, 4608)","(1, 3840, 2, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 270.00 MiB 270.00 MiB Shape (1, 3840, 2, 4608) (1, 3840, 2, 4608) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",1  1  4608  2  3840,

Unnamed: 0,Array,Chunk
Bytes,270.00 MiB,270.00 MiB
Shape,"(1, 3840, 2, 4608)","(1, 3840, 2, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 135.00 MiB 135.00 MiB Shape (1, 3840, 4608) (1, 3840, 4608) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  1,

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [11]:
ds.data_vars

Data variables:
    crs       |S1 ...
    COSZ      (time, y, x) float64 dask.array<chunksize=(1, 3840, 4608), meta=np.ndarray>
    FSA       (time, y, x) float64 dask.array<chunksize=(1, 3840, 4608), meta=np.ndarray>
    FIRA      (time, y, x) float64 dask.array<chunksize=(1, 3840, 4608), meta=np.ndarray>
    HFX       (time, y, x) float64 dask.array<chunksize=(1, 3840, 4608), meta=np.ndarray>
    LH        (time, y, x) float64 dask.array<chunksize=(1, 3840, 4608), meta=np.ndarray>
    EDIR      (time, y, x) float64 dask.array<chunksize=(1, 3840, 4608), meta=np.ndarray>
    ALBEDO    (time, y, x) float64 dask.array<chunksize=(1, 3840, 4608), meta=np.ndarray>
    UGDRNOFF  (time, y, x) float64 dask.array<chunksize=(1, 3840, 4608), meta=np.ndarray>
    TRAD      (time, y, x) float64 dask.array<chunksize=(1, 3840, 4608), meta=np.ndarray>
    SOIL_W    (time, y, soil_layers_stag, x) float64 dask.array<chunksize=(1, 3840, 4, 4608), meta=np.ndarray>
    SOIL_M    (time, y, soil_layers_stag,

In [12]:
ds = ds[['ACCET', 'SNEQV', 'FSNO']]

In [13]:
ds.data_vars

Data variables:
    ACCET    (time, y, x) float64 dask.array<chunksize=(1, 3840, 4608), meta=np.ndarray>
    SNEQV    (time, y, x) float64 dask.array<chunksize=(1, 3840, 4608), meta=np.ndarray>
    FSNO     (time, y, x) float64 dask.array<chunksize=(1, 3840, 4608), meta=np.ndarray>

In [14]:
ds['ACCET']

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 135.00 MiB 135.00 MiB Shape (1, 3840, 4608) (1, 3840, 4608) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  1,

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [15]:
ds.ACCET

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 135.00 MiB 135.00 MiB Shape (1, 3840, 4608) (1, 3840, 4608) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",4608  3840  1,

Unnamed: 0,Array,Chunk
Bytes,135.00 MiB,135.00 MiB
Shape,"(1, 3840, 4608)","(1, 3840, 4608)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


The data is chunked as full spatial domain and 1 time step, with about 135MB chunk size.   This is actually great for visualization of maps at specific time steps or for calculations that involve the entire dataset. So kerchunking this data would be a nice first step. 

In [16]:
%%time
da = ds.ACCET.load()

CPU times: user 765 ms, sys: 190 ms, total: 955 ms
Wall time: 3.24 s


In [17]:
da

In [18]:
da.hvplot(x='x', y='y', rasterize=True, cmap='turbo', data_aspect=1)