In [17]:
import fsspec
import rioxarray
import xarray as xr
import hvplot.xarray as hvplot
import s3fs
import eodc_hub_role

In [12]:
credentials = eodc_hub_role.fetch_and_set_credentials()
bucket = 'nasa-eodc-data-store'

In [13]:
#parameters
temporal_resolution = "daily"
model = "GISS-E2-1-G"
variable = "tas"
anon=True

In [18]:
# Initiate fsspec filesystems for reading and writing
s3_path = f"s3://nex-gddp-cmip6/NEX-GDDP-CMIP6/{model}/historical/r1i1p1*/{variable}/*"
fs_read = fsspec.filesystem("s3", anon=anon, skip_instance_cache=False)
fs_write = fsspec.filesystem("")

In [19]:
# Retrieve list of available months
files_paths = fs_read.glob(s3_path)
print(f"{len(files_paths)} discovered from {s3_path}")

65 discovered from s3://nex-gddp-cmip6/NEX-GDDP-CMIP6/GISS-E2-1-G/historical/r1i1p1*/tas/*


In [26]:
fs = s3fs.S3FileSystem(
    key=credentials['AccessKeyId'],
    secret=credentials['SecretAccessKey'],
    token=credentials['SessionToken'], 
    anon=False
)
filepath = f's3://{files_paths[0]}'
f = fs.open(filepath, mode='rb')
ds = xr.open_dataset(f)
ds

In [21]:
chunk_sets = []
# Optimized for analysis
temporal_target_chunks = { 'lat': ds.lat.shape[0], 'lon': ds.lon.shape[0], 'time': 29 }
chunk_sets.append(temporal_target_chunks)

In [22]:
# Optimized for visualization at a single time step
global_target_chunks = { 'lat': ds.lat.shape[0], 'lon': ds.lon.shape[0], 'time': 1 }
global_target_chunks
chunk_sets.append(global_target_chunks)

In [24]:
# Optimized for time series
spatial_target_chunks = {'time': 365, 'lat': 262, 'lon': 262}
chunk_sets.append(spatial_target_chunks)

In [27]:
for chunk_set in chunk_sets:
    dir_path = str(("_").join(map(str, chunk_set.values())))
    store_name = f"{dir_path}/CMIP6_{temporal_resolution}_{model}_{variable}.zarr"
    store = s3fs.S3Map(root=f"{bucket}/{store_name}", s3=fs, check=True)
    ds = xr.open_zarr(store, consolidated=True)
    print(ds)

<xarray.Dataset>
Dimensions:  (lat: 600, lon: 1440, time: 730)
Coordinates:
  * lat      (lat) float64 -59.88 -59.62 -59.38 -59.12 ... 89.38 89.62 89.88
  * lon      (lon) float64 0.125 0.375 0.625 0.875 ... 359.1 359.4 359.6 359.9
  * time     (time) object 1950-01-01 12:00:00 ... 1950-12-31 12:00:00
Data variables:
    tas      (time, lat, lon) float32 dask.array<chunksize=(29, 600, 1440), meta=np.ndarray>
Attributes: (12/23)
    Conventions:           CF-1.7
    activity:              NEX-GDDP-CMIP6
    cmip6_institution_id:  NASA-GISS
    cmip6_license:         CC-BY-SA 4.0
    cmip6_source_id:       GISS-E2-1-G
    contact:               Dr. Rama Nemani: rama.nemani@nasa.gov, Dr. Bridget...
    ...                    ...
    scenario:              historical
    source:                BCSD
    title:                 GISS-E2-1-G, r1i1p1f2, historical, global downscal...
    tracking_id:           25d6baa3-0404-4eba-a3f1-afddbf69d4cc
    variant_label:         r1i1p1f2
    version: 

In [28]:
ds