In [1]:
import s3fs
from glob import glob
import xarray as xr

## Specify the S3 bucket where the NPS assets are

In [2]:
bucket = 's3://npwbanalres'

## Create a connection to the S3 bucket and list all of the assets 

In [3]:
s3 = s3fs.S3FileSystem(anon=False)

In [4]:
s3.glob(f'{bucket}/*.nc4')

['npwbanalres/v_1_5_1980_gridmet_historical_accumswe.nc4',
 'npwbanalres/v_1_5_1980_gridmet_historical_accumswe_monthly.nc4',
 'npwbanalres/v_1_5_1980_gridmet_historical_aet.nc4',
 'npwbanalres/v_1_5_1980_gridmet_historical_aet_monthly.nc4',
 'npwbanalres/v_1_5_1980_gridmet_historical_deficit.nc4',
 'npwbanalres/v_1_5_1980_gridmet_historical_deficit_monthly.nc4',
 'npwbanalres/v_1_5_1980_gridmet_historical_pet.nc4',
 'npwbanalres/v_1_5_1980_gridmet_historical_pet_monthly.nc4',
 'npwbanalres/v_1_5_1980_gridmet_historical_rain.nc4',
 'npwbanalres/v_1_5_1980_gridmet_historical_rain_monthly.nc4',
 'npwbanalres/v_1_5_1980_gridmet_historical_runoff.nc4',
 'npwbanalres/v_1_5_1980_gridmet_historical_runoff_monthly.nc4',
 'npwbanalres/v_1_5_1980_gridmet_historical_soilwater.nc4',
 'npwbanalres/v_1_5_1980_gridmet_historical_soilwater_monthly.nc4',
 'npwbanalres/v_1_5_1981_gridmet_historical_accumswe.nc4',
 'npwbanalres/v_1_5_1981_gridmet_historical_accumswe_monthly.nc4',
 'npwbanalres/v_1_5_1981

In [28]:
years = list(set(int(v.split('_')[3].split('.')[0]) for v in s3.glob(f'{bucket}/*.nc4') if 'monthly' not in v))
print(f'Number of Years: {len(years)}')
print(f'Year Range: {min(years)} to {max(years)}')

Number of Years: 42
Year Range: 1980 to 2021


## List the variables for both daily and monthly assets

In [6]:
wb_variables = list(set(v.split('_')[-1].split('.')[0] for v in s3.glob(f'{bucket}/*.nc4') if 'monthly' not in v))
wb_variables

['soilwater', 'deficit', 'aet', 'accumswe', 'runoff', 'rain', 'pet']

In [7]:
wb_m_variables = list(set(f"{v.split('_')[-2]}_{v.split('_')[-1].split('.')[0]}" for v in s3.glob(f'{bucket}/*.nc4') if 'monthly' in v))
wb_m_variables

['aet_monthly',
 'rain_monthly',
 'accumswe_monthly',
 'runoff_monthly',
 'soilwater_monthly',
 'pet_monthly',
 'deficit_monthly']

## Connect to an NPS asset in S3

In [8]:
var = wb_m_variables[3]
var

'runoff_monthly'

In [9]:
urls = s3.glob(f'{bucket}/*{var}.nc4')
urls

### Read in a single asset

In [20]:
url = urls[0]
url

'npwbanalres/v_1_5_1980_gridmet_historical_runoff_monthly.nc4'

In [21]:
s3_file_obj = s3.open(url, mode='rb')

In [22]:
xr_ds = xr.open_dataset(s3_file_obj, chunks='auto', engine='h5netcdf')

In [23]:
xr_ds

Unnamed: 0,Array,Chunk
Bytes,59.17 MiB,59.17 MiB
Shape,"(3300, 4700)","(3300, 4700)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 59.17 MiB 59.17 MiB Shape (3300, 4700) (3300, 4700) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",4700  3300,

Unnamed: 0,Array,Chunk
Bytes,59.17 MiB,59.17 MiB
Shape,"(3300, 4700)","(3300, 4700)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,59.17 MiB,59.17 MiB
Shape,"(3300, 4700)","(3300, 4700)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 59.17 MiB 59.17 MiB Shape (3300, 4700) (3300, 4700) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",4700  3300,

Unnamed: 0,Array,Chunk
Bytes,59.17 MiB,59.17 MiB
Shape,"(3300, 4700)","(3300, 4700)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,192 B,192 B
Shape,"(12, 2)","(12, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray
"Array Chunk Bytes 192 B 192 B Shape (12, 2) (12, 2) Dask graph 1 chunks in 2 graph layers Data type datetime64[ns] numpy.ndarray",2  12,

Unnamed: 0,Array,Chunk
Bytes,192 B,192 B
Shape,"(12, 2)","(12, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,24 B,24 B
Shape,"(12,)","(12,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int16 numpy.ndarray,int16 numpy.ndarray
"Array Chunk Bytes 24 B 24 B Shape (12,) (12,) Dask graph 1 chunks in 2 graph layers Data type int16 numpy.ndarray",12  1,

Unnamed: 0,Array,Chunk
Bytes,24 B,24 B
Shape,"(12,)","(12,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int16 numpy.ndarray,int16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,709.99 MiB,127.89 MiB
Shape,"(12, 3300, 4700)","(6, 1980, 2822)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 709.99 MiB 127.89 MiB Shape (12, 3300, 4700) (6, 1980, 2822) Dask graph 8 chunks in 2 graph layers Data type float32 numpy.ndarray",4700  3300  12,

Unnamed: 0,Array,Chunk
Bytes,709.99 MiB,127.89 MiB
Shape,"(12, 3300, 4700)","(6, 1980, 2822)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


### Read in multiple assets

In [18]:
ds_all = []
for url in urls:
    #s3_file_obj = s3.open(url, mode='rb')
    ds_all.append(xr.open_dataset(s3.open(url, mode='rb'), chunks='auto', engine='h5netcdf'))

In [19]:
ds_ts = xr.concat(ds_all, dim='time')
ds_ts

Unnamed: 0,Array,Chunk
Bytes,7.88 kiB,192 B
Shape,"(504, 2)","(12, 2)"
Dask graph,42 chunks in 85 graph layers,42 chunks in 85 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray
"Array Chunk Bytes 7.88 kiB 192 B Shape (504, 2) (12, 2) Dask graph 42 chunks in 85 graph layers Data type datetime64[ns] numpy.ndarray",2  504,

Unnamed: 0,Array,Chunk
Bytes,7.88 kiB,192 B
Shape,"(504, 2)","(12, 2)"
Dask graph,42 chunks in 85 graph layers,42 chunks in 85 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.98 kiB,24 B
Shape,"(504,)","(12,)"
Dask graph,42 chunks in 85 graph layers,42 chunks in 85 graph layers
Data type,int16 numpy.ndarray,int16 numpy.ndarray
"Array Chunk Bytes 0.98 kiB 24 B Shape (504,) (12,) Dask graph 42 chunks in 85 graph layers Data type int16 numpy.ndarray",504  1,

Unnamed: 0,Array,Chunk
Bytes,0.98 kiB,24 B
Shape,"(504,)","(12,)"
Dask graph,42 chunks in 85 graph layers,42 chunks in 85 graph layers
Data type,int16 numpy.ndarray,int16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,29.12 GiB,127.89 MiB
Shape,"(504, 3300, 4700)","(6, 1980, 2822)"
Dask graph,336 chunks in 85 graph layers,336 chunks in 85 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 29.12 GiB 127.89 MiB Shape (504, 3300, 4700) (6, 1980, 2822) Dask graph 336 chunks in 85 graph layers Data type float32 numpy.ndarray",4700  3300  504,

Unnamed: 0,Array,Chunk
Bytes,29.12 GiB,127.89 MiB
Shape,"(504, 3300, 4700)","(6, 1980, 2822)"
Dask graph,336 chunks in 85 graph layers,336 chunks in 85 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


---

In [None]:
for v in wb_m_variables:
    print(v)
    urls = s3.glob(f'{bucket}/*{v}.nc4')
    #print(urls)
    ly_noly = [0,1,2,3,4,5,6,7,8,9,10]
    for y in ly_noly:
        #print(y)
        url = urls[y]
        print(url)
        try:
            s3_file_obj = s3.open(url, mode='rb')
            xr_ds = xr.open_dataset(s3_file_obj, chunks='auto', engine='h5netcdf')
            print(xr_ds.dims)
            print(xr_ds[v].attrs)
        except:
            print(f'FAILED: {url}')
    