In [7]:
import time

import xarray as xr
from multiformats import CID
from py_hamt import HAMT, KuboCAS, ZarrHAMTStore
from xarray import Dataset

ipns_hash = "bafyr4ieluxdrinwpso33odqjecozqg2t2vsgoodp3unccs6ebma5ovrb2i"

# Create a content address store instance using the IPFS gateway URL
kubo_cas = KuboCAS(gateway_base_url="http://127.0.0.1:8080/")

# Decode the root CID for the Zarr dataset
root_node_id = CID.decode(ipns_hash)

# Create HAMT instance using the IPFSStore
hamt = HAMT(cas=kubo_cas, values_are_bytes=True, root_node_id=root_node_id, read_only=True)

# Initialize the store
zhs = ZarrHAMTStore(hamt, read_only=True)

# Open the dataset with xarray
zarr_ds: Dataset = xr.open_zarr(store=zhs, zarr_format=3)

# Sort the dataset by latitude and longitude
zarr_ds = zarr_ds.sortby("longitude")
zarr_ds = zarr_ds.sortby("latitude")

print(zarr_ds)

<xarray.Dataset> Size: 1GB
Dimensions:    (time: 1096, latitude: 360, longitude: 720)
Coordinates:
  * longitude  (longitude) float32 3kB -179.8 -179.2 -178.8 ... 179.2 179.8
  * latitude   (latitude) float32 1kB -89.75 -89.25 -88.75 ... 88.75 89.25 89.75
  * time       (time) datetime64[ns] 9kB 1979-01-01 1979-01-02 ... 1981-12-31
Data variables:
    tmax       (time, latitude, longitude) float32 1GB ...
Attributes:
    Conventions:    CF-1.0
    version:        V1.0
    history:        created 9/2016 by CAS NOAA/ESRL PSD
    title:          CPC GLOBAL TEMP V1.0
    dataset_title:  CPC GLOBAL TEMP
    Source:         ftp://ftp.cpc.ncep.noaa.gov/precip/wd52ws/global_temp/
    References:     https://www.psl.noaa.gov/data/gridded/data.cpc.globaltemp...


In [8]:
lat_bounds = (0, 92)
lon_bounds = (0, 90)
time_bounds = ("1979-01-01", "1980-12-31")

# Subset the dataset
sliced_ds = zarr_ds.sel(
    latitude=slice(*lat_bounds),
    longitude=slice(*lon_bounds),
    time=slice(*time_bounds)
)

print(sliced_ds)

<xarray.Dataset> Size: 95MB
Dimensions:    (time: 731, latitude: 180, longitude: 180)
Coordinates:
  * longitude  (longitude) float32 720B 0.25 0.75 1.25 ... 88.75 89.25 89.75
  * latitude   (latitude) float32 720B 0.25 0.75 1.25 1.75 ... 88.75 89.25 89.75
  * time       (time) datetime64[ns] 6kB 1979-01-01 1979-01-02 ... 1980-12-31
Data variables:
    tmax       (time, latitude, longitude) float32 95MB ...
Attributes:
    Conventions:    CF-1.0
    version:        V1.0
    history:        created 9/2016 by CAS NOAA/ESRL PSD
    title:          CPC GLOBAL TEMP V1.0
    dataset_title:  CPC GLOBAL TEMP
    Source:         ftp://ftp.cpc.ncep.noaa.gov/precip/wd52ws/global_temp/
    References:     https://www.psl.noaa.gov/data/gridded/data.cpc.globaltemp...


In [9]:
# Trigger computation / download
start_time = time.time()
sliced_ds.load()
end_time = time.time()

download_time = end_time - start_time
download_size_mb = sliced_ds.nbytes / (1024 * 1024)
download_speed_mbs = download_size_mb / download_time if download_time > 0 else 0.0

print(f"Download and subset completed in {download_time:.2f} seconds. Speed: {download_speed_mbs:.2f} MB/s")
print(sliced_ds)

Download and subset completed in 475.80 seconds. Speed: 0.19 MB/s
<xarray.Dataset> Size: 95MB
Dimensions:    (time: 731, latitude: 180, longitude: 180)
Coordinates:
  * longitude  (longitude) float32 720B 0.25 0.75 1.25 ... 88.75 89.25 89.75
  * latitude   (latitude) float32 720B 0.25 0.75 1.25 1.75 ... 88.75 89.25 89.75
  * time       (time) datetime64[ns] 6kB 1979-01-01 1979-01-02 ... 1980-12-31
Data variables:
    tmax       (time, latitude, longitude) float32 95MB nan nan nan ... nan nan
Attributes:
    Conventions:    CF-1.0
    version:        V1.0
    history:        created 9/2016 by CAS NOAA/ESRL PSD
    title:          CPC GLOBAL TEMP V1.0
    dataset_title:  CPC GLOBAL TEMP
    Source:         ftp://ftp.cpc.ncep.noaa.gov/precip/wd52ws/global_temp/
    References:     https://www.psl.noaa.gov/data/gridded/data.cpc.globaltemp...
