In [1]:
import fsspec
import rioxarray
import xarray as xr
import hvplot.xarray as hvplot
import s3fs
import eodc_hub_role
s3fs.core.setup_logging("DEBUG")

In [2]:
credentials = eodc_hub_role.fetch_and_set_credentials()
bucket = 'nasa-eodc-data-store'

In [3]:
#parameters
temporal_resolution = "daily"
model = "GISS-E2-1-G"
variable = "tas"
anon=True

In [5]:
# Initiate fsspec filesystems for reading and writing
s3_path = f"s3://nex-gddp-cmip6/NEX-GDDP-CMIP6/{model}/historical/r1i1p1*/{variable}/*"
fs_read = fsspec.filesystem("s3", anon=anon, skip_instance_cache=False)
fs_write = fsspec.filesystem("")

In [6]:
# Retrieve list of available months
files_paths = fs_read.glob(s3_path)
print(f"{len(files_paths)} discovered from {s3_path}")

2023-07-29 15:48:02,272 - s3fs - DEBUG - _lsdir -- Get directory listing page for nex-gddp-cmip6/NEX-GDDP-CMIP6/GISS-E2-1-G/historical
2023-07-29 15:48:02,273 - s3fs - DEBUG - set_session -- Setting up s3fs instance
2023-07-29 15:48:02,280 - s3fs - DEBUG - set_session -- RC: caching enabled? False (explicit option is False)
2023-07-29 15:48:02,405 - s3fs - DEBUG - _lsdir -- Get directory listing page for nex-gddp-cmip6/NEX-GDDP-CMIP6/GISS-E2-1-G/historical/r1i1p1f2
2023-07-29 15:48:02,447 - s3fs - DEBUG - _lsdir -- Get directory listing page for nex-gddp-cmip6/NEX-GDDP-CMIP6/GISS-E2-1-G/historical/r1i1p1f2/hurs
2023-07-29 15:48:02,472 - s3fs - DEBUG - _lsdir -- Get directory listing page for nex-gddp-cmip6/NEX-GDDP-CMIP6/GISS-E2-1-G/historical/r1i1p1f2/huss
2023-07-29 15:48:02,503 - s3fs - DEBUG - _lsdir -- Get directory listing page for nex-gddp-cmip6/NEX-GDDP-CMIP6/GISS-E2-1-G/historical/r1i1p1f2/pr
2023-07-29 15:48:02,526 - s3fs - DEBUG - _lsdir -- Get directory listing page for nex

65 discovered from s3://nex-gddp-cmip6/NEX-GDDP-CMIP6/GISS-E2-1-G/historical/r1i1p1*/tas/*


In [7]:
fs = s3fs.S3FileSystem(
    key=credentials['AccessKeyId'],
    secret=credentials['SecretAccessKey'],
    token=credentials['SessionToken'], 
    anon=False
)
filepath = f's3://{files_paths[0]}'
f = fs.open(filepath, mode='rb')
ds = xr.open_dataset(f)
ds

2023-07-29 15:48:05,080 - s3fs - DEBUG - set_session -- Setting up s3fs instance
2023-07-29 15:48:05,088 - s3fs - DEBUG - set_session -- RC: caching enabled? False (explicit option is False)
2023-07-29 15:48:05,156 - s3fs - DEBUG - _call_s3 -- CALL: head_object - ({},) - {'Bucket': 'nex-gddp-cmip6', 'Key': 'NEX-GDDP-CMIP6/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950.nc'}
2023-07-29 15:48:07,074 - s3fs - DEBUG - _fetch_range -- Fetch: nex-gddp-cmip6/NEX-GDDP-CMIP6/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950.nc, 0-5242888
2023-07-29 15:48:07,075 - s3fs - DEBUG - _call_s3 -- CALL: get_object - () - {'Bucket': 'nex-gddp-cmip6', 'Key': 'NEX-GDDP-CMIP6/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950.nc', 'Range': 'bytes=0-5242887', 'IfMatch': '"e63fa68f613515cc48c381278d480cc0-30"'}
2023-07-29 15:48:07,219 - s3fs - DEBUG - _fetch_range -- Fetch: nex-gddp-cmip6/NEX-GDDP-CMIP6/GIS

In [9]:
chunk_sets = []
# Optimized for analysis
temporal_target_chunks = { 'lat': ds.lat.shape[0], 'lon': ds.lon.shape[0], 'time': 29 }
chunk_sets.append(temporal_target_chunks)

In [10]:
# Optimized for visualization at a single time step
global_target_chunks = { 'lat': ds.lat.shape[0], 'lon': ds.lon.shape[0], 'time': 1 }
global_target_chunks
chunk_sets.append(global_target_chunks)

In [24]:
# Optimized for time series
spatial_target_chunks = {'time': 365, 'lat': 262, 'lon': 262}
chunk_sets.append(spatial_target_chunks)

In [13]:
store = 's3://yuvipanda-test1/cmr/gpm3imergdl.zarr/'
ds = xr.open_dataset(store, engine="zarr", chunks={})

2023-07-29 15:52:19,509 - s3fs - DEBUG - set_session -- Setting up s3fs instance
2023-07-29 15:52:19,517 - s3fs - DEBUG - set_session -- RC: caching enabled? False (explicit option is False)
2023-07-29 15:52:19,613 - s3fs - DEBUG - _call_s3 -- CALL: get_object - () - {'Bucket': 'yuvipanda-test1', 'Key': 'cmr/gpm3imergdl.zarr/.zmetadata'}
2023-07-29 15:52:19,687 - s3fs - DEBUG - _call_s3 -- CALL: get_object - () - {'Bucket': 'yuvipanda-test1', 'Key': 'cmr/gpm3imergdl.zarr/time/0'}
2023-07-29 15:52:19,726 - s3fs - DEBUG - _call_s3 -- CALL: get_object - () - {'Bucket': 'yuvipanda-test1', 'Key': 'cmr/gpm3imergdl.zarr/time/0'}
2023-07-29 15:52:19,743 - s3fs - DEBUG - _call_s3 -- CALL: get_object - () - {'Bucket': 'yuvipanda-test1', 'Key': 'cmr/gpm3imergdl.zarr/time_bnds/0.0'}
2023-07-29 15:52:19,762 - s3fs - DEBUG - _call_s3 -- CALL: get_object - () - {'Bucket': 'yuvipanda-test1', 'Key': 'cmr/gpm3imergdl.zarr/time_bnds/814.0'}
2023-07-29 15:52:19,778 - s3fs - DEBUG - _call_s3 -- CALL: get_o

In [14]:
ds

Unnamed: 0,Array,Chunk
Bytes,196.72 GiB,247.19 MiB
Shape,"(8149, 3600, 1800)","(10, 3600, 1800)"
Dask graph,815 chunks in 2 graph layers,815 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 196.72 GiB 247.19 MiB Shape (8149, 3600, 1800) (10, 3600, 1800) Dask graph 815 chunks in 2 graph layers Data type float32 numpy.ndarray",1800  3600  8149,

Unnamed: 0,Array,Chunk
Bytes,196.72 GiB,247.19 MiB
Shape,"(8149, 3600, 1800)","(10, 3600, 1800)"
Dask graph,815 chunks in 2 graph layers,815 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,49.18 GiB,61.80 MiB
Shape,"(8149, 3600, 1800)","(10, 3600, 1800)"
Dask graph,815 chunks in 2 graph layers,815 chunks in 2 graph layers
Data type,int8 numpy.ndarray,int8 numpy.ndarray
"Array Chunk Bytes 49.18 GiB 61.80 MiB Shape (8149, 3600, 1800) (10, 3600, 1800) Dask graph 815 chunks in 2 graph layers Data type int8 numpy.ndarray",1800  3600  8149,

Unnamed: 0,Array,Chunk
Bytes,49.18 GiB,61.80 MiB
Shape,"(8149, 3600, 1800)","(10, 3600, 1800)"
Dask graph,815 chunks in 2 graph layers,815 chunks in 2 graph layers
Data type,int8 numpy.ndarray,int8 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,49.18 GiB,61.80 MiB
Shape,"(8149, 3600, 1800)","(10, 3600, 1800)"
Dask graph,815 chunks in 2 graph layers,815 chunks in 2 graph layers
Data type,int8 numpy.ndarray,int8 numpy.ndarray
"Array Chunk Bytes 49.18 GiB 61.80 MiB Shape (8149, 3600, 1800) (10, 3600, 1800) Dask graph 815 chunks in 2 graph layers Data type int8 numpy.ndarray",1800  3600  8149,

Unnamed: 0,Array,Chunk
Bytes,49.18 GiB,61.80 MiB
Shape,"(8149, 3600, 1800)","(10, 3600, 1800)"
Dask graph,815 chunks in 2 graph layers,815 chunks in 2 graph layers
Data type,int8 numpy.ndarray,int8 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,196.72 GiB,247.19 MiB
Shape,"(8149, 3600, 1800)","(10, 3600, 1800)"
Dask graph,815 chunks in 2 graph layers,815 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 196.72 GiB 247.19 MiB Shape (8149, 3600, 1800) (10, 3600, 1800) Dask graph 815 chunks in 2 graph layers Data type float32 numpy.ndarray",1800  3600  8149,

Unnamed: 0,Array,Chunk
Bytes,196.72 GiB,247.19 MiB
Shape,"(8149, 3600, 1800)","(10, 3600, 1800)"
Dask graph,815 chunks in 2 graph layers,815 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,49.18 GiB,61.80 MiB
Shape,"(8149, 3600, 1800)","(10, 3600, 1800)"
Dask graph,815 chunks in 2 graph layers,815 chunks in 2 graph layers
Data type,int8 numpy.ndarray,int8 numpy.ndarray
"Array Chunk Bytes 49.18 GiB 61.80 MiB Shape (8149, 3600, 1800) (10, 3600, 1800) Dask graph 815 chunks in 2 graph layers Data type int8 numpy.ndarray",1800  3600  8149,

Unnamed: 0,Array,Chunk
Bytes,49.18 GiB,61.80 MiB
Shape,"(8149, 3600, 1800)","(10, 3600, 1800)"
Dask graph,815 chunks in 2 graph layers,815 chunks in 2 graph layers
Data type,int8 numpy.ndarray,int8 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,49.18 GiB,61.80 MiB
Shape,"(8149, 3600, 1800)","(10, 3600, 1800)"
Dask graph,815 chunks in 2 graph layers,815 chunks in 2 graph layers
Data type,int8 numpy.ndarray,int8 numpy.ndarray
"Array Chunk Bytes 49.18 GiB 61.80 MiB Shape (8149, 3600, 1800) (10, 3600, 1800) Dask graph 815 chunks in 2 graph layers Data type int8 numpy.ndarray",1800  3600  8149,

Unnamed: 0,Array,Chunk
Bytes,49.18 GiB,61.80 MiB
Shape,"(8149, 3600, 1800)","(10, 3600, 1800)"
Dask graph,815 chunks in 2 graph layers,815 chunks in 2 graph layers
Data type,int8 numpy.ndarray,int8 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,196.72 GiB,247.19 MiB
Shape,"(8149, 3600, 1800)","(10, 3600, 1800)"
Dask graph,815 chunks in 2 graph layers,815 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 196.72 GiB 247.19 MiB Shape (8149, 3600, 1800) (10, 3600, 1800) Dask graph 815 chunks in 2 graph layers Data type float32 numpy.ndarray",1800  3600  8149,

Unnamed: 0,Array,Chunk
Bytes,196.72 GiB,247.19 MiB
Shape,"(8149, 3600, 1800)","(10, 3600, 1800)"
Dask graph,815 chunks in 2 graph layers,815 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,49.18 GiB,61.80 MiB
Shape,"(8149, 3600, 1800)","(10, 3600, 1800)"
Dask graph,815 chunks in 2 graph layers,815 chunks in 2 graph layers
Data type,int8 numpy.ndarray,int8 numpy.ndarray
"Array Chunk Bytes 49.18 GiB 61.80 MiB Shape (8149, 3600, 1800) (10, 3600, 1800) Dask graph 815 chunks in 2 graph layers Data type int8 numpy.ndarray",1800  3600  8149,

Unnamed: 0,Array,Chunk
Bytes,49.18 GiB,61.80 MiB
Shape,"(8149, 3600, 1800)","(10, 3600, 1800)"
Dask graph,815 chunks in 2 graph layers,815 chunks in 2 graph layers
Data type,int8 numpy.ndarray,int8 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,127.33 kiB,160 B
Shape,"(8149, 2)","(10, 2)"
Dask graph,815 chunks in 2 graph layers,815 chunks in 2 graph layers
Data type,object numpy.ndarray,object numpy.ndarray
"Array Chunk Bytes 127.33 kiB 160 B Shape (8149, 2) (10, 2) Dask graph 815 chunks in 2 graph layers Data type object numpy.ndarray",2  8149,

Unnamed: 0,Array,Chunk
Bytes,127.33 kiB,160 B
Shape,"(8149, 2)","(10, 2)"
Dask graph,815 chunks in 2 graph layers,815 chunks in 2 graph layers
Data type,object numpy.ndarray,object numpy.ndarray


In [12]:
for chunk_set in [chunk_sets[1]]:
    dir_path = str(("_").join(map(str, chunk_set.values())))
    store_name = f"{dir_path}/CMIP6_{temporal_resolution}_{model}_{variable}.zarr"
    store = s3fs.S3Map(root=f"{bucket}/{store_name}", s3=fs, check=True)
    ds = xr.open_dataset(store, engine="zarr", chunks={})
    print(ds)

2023-07-29 15:51:19,854 - s3fs - DEBUG - _call_s3 -- CALL: head_object - ({},) - {'Bucket': 'nasa-eodc-data-store', 'Key': '600_1440_1/CMIP6_daily_GISS-E2-1-G_tas.zarr'}
2023-07-29 15:51:19,894 - s3fs - DEBUG - _error_wrapper -- Client error (maybe retryable): An error occurred (404) when calling the HeadObject operation: Not Found
2023-07-29 15:51:19,895 - s3fs - DEBUG - _call_s3 -- CALL: list_objects_v2 - ({},) - {'Bucket': 'nasa-eodc-data-store', 'Prefix': '600_1440_1/CMIP6_daily_GISS-E2-1-G_tas.zarr/', 'Delimiter': '/', 'MaxKeys': 1}
2023-07-29 15:51:19,982 - s3fs - DEBUG - _call_s3 -- CALL: put_object - () - {'Bucket': 'nasa-eodc-data-store', 'Key': '600_1440_1/CMIP6_daily_GISS-E2-1-G_tas.zarr/a'}
2023-07-29 15:51:20,003 - s3fs - DEBUG - _call_s3 -- CALL: delete_objects - ({},) - {'Bucket': 'nasa-eodc-data-store', 'Delete': {'Objects': [{'Key': '600_1440_1/CMIP6_daily_GISS-E2-1-G_tas.zarr/a'}], 'Quiet': True}}
2023-07-29 15:51:20,026 - s3fs - DEBUG - _call_s3 -- CALL: head_object 

<xarray.Dataset>
Dimensions:  (lat: 600, lon: 1440, time: 730)
Coordinates:
  * lat      (lat) float64 -59.88 -59.62 -59.38 -59.12 ... 89.38 89.62 89.88
  * lon      (lon) float64 0.125 0.375 0.625 0.875 ... 359.1 359.4 359.6 359.9
  * time     (time) object 1950-01-01 12:00:00 ... 1950-12-31 12:00:00
Data variables:
    tas      (time, lat, lon) float32 dask.array<chunksize=(1, 600, 1440), meta=np.ndarray>
Attributes: (12/23)
    Conventions:           CF-1.7
    activity:              NEX-GDDP-CMIP6
    cmip6_institution_id:  NASA-GISS
    cmip6_license:         CC-BY-SA 4.0
    cmip6_source_id:       GISS-E2-1-G
    contact:               Dr. Rama Nemani: rama.nemani@nasa.gov, Dr. Bridget...
    ...                    ...
    scenario:              historical
    source:                BCSD
    title:                 GISS-E2-1-G, r1i1p1f2, historical, global downscal...
    tracking_id:           25d6baa3-0404-4eba-a3f1-afddbf69d4cc
    variant_label:         r1i1p1f2
    version:  

In [28]:
ds