To execute this notebook:

1. Install dependencies
`pdm install`

2. Create a kernel
`pdm run python -m ipykernel install --user --name v3-env --display-name "Python (v3-env)"`

3. Set the kernel of this notebook on the newly created `v3-env`

4. Execute

In [1]:
import time

import xarray as xr
from multiformats import CID
from py_hamt import HAMT, IPFSStore, IPFSZarr3
from xarray import Dataset

ipns_hash = "bafyr4idlv44ryi65tm6kejtpze3niotafgt2p6jfedlld5yotqo3ekkn3u"

# Initialize IPFSStore with a public IPFS gateway
ipfs_store = IPFSStore(gateway_uri_stem="https://ipfs-gateway.dclimate.net/", timeout_seconds=120)

# Decode the root CID for the Zarr dataset
root_cid = CID.decode(ipns_hash)

# Create HAMT instance using the IPFSStore
hamt = HAMT(store=ipfs_store, root_node_id=root_cid)

# Wrap the HAMT instance with IPFSZarr3
zarr_store = IPFSZarr3(hamt, read_only=True)

# Open the dataset with xarray
zarr_ds: Dataset = xr.open_zarr(store=zarr_store, zarr_format=3)

# Sort the dataset by latitude and longitude
zarr_ds = zarr_ds.sortby("longitude")
zarr_ds = zarr_ds.sortby("latitude")

print(zarr_ds)

<xarray.Dataset> Size: 18GB
Dimensions:    (time: 16900, latitude: 360, longitude: 720)
Coordinates:
  * time       (time) datetime64[ns] 135kB 1979-01-01 1979-01-02 ... 2025-04-08
  * latitude   (latitude) float32 1kB -89.75 -89.25 -88.75 ... 88.75 89.25 89.75
  * longitude  (longitude) float32 3kB -179.8 -179.2 -178.8 ... 179.2 179.8
Data variables:
    tmax       (time, latitude, longitude) float32 18GB ...
Attributes:
    Conventions:    CF-1.0
    Source:         ftp://ftp.cpc.ncep.noaa.gov/precip/wd52ws/global_temp/
    References:     https://www.psl.noaa.gov/data/gridded/data.cpc.globaltemp...
    version:        V1.0
    title:          CPC GLOBAL TEMP V1.0
    dataset_title:  CPC GLOBAL TEMP
    history:        Updated 2025-04-09 15:37:46


In [2]:
lat_bounds = (40, 52)
lon_bounds = (4, 10)
time_bounds = ("1979-01-01", "1979-05-31")

# Subset the dataset
sliced_ds = zarr_ds.sel(
    latitude=slice(*lat_bounds),
    longitude=slice(*lon_bounds),
    time=slice(*time_bounds)
)

print(sliced_ds)

<xarray.Dataset> Size: 175kB
Dimensions:    (time: 151, latitude: 24, longitude: 12)
Coordinates:
  * time       (time) datetime64[ns] 1kB 1979-01-01 1979-01-02 ... 1979-05-31
  * latitude   (latitude) float32 96B 40.25 40.75 41.25 ... 50.75 51.25 51.75
  * longitude  (longitude) float32 48B 4.25 4.75 5.25 5.75 ... 8.75 9.25 9.75
Data variables:
    tmax       (time, latitude, longitude) float32 174kB ...
Attributes:
    Conventions:    CF-1.0
    Source:         ftp://ftp.cpc.ncep.noaa.gov/precip/wd52ws/global_temp/
    References:     https://www.psl.noaa.gov/data/gridded/data.cpc.globaltemp...
    version:        V1.0
    title:          CPC GLOBAL TEMP V1.0
    dataset_title:  CPC GLOBAL TEMP
    history:        Updated 2025-04-09 15:37:46


In [3]:
# Trigger computation / download
start_time = time.time()
sliced_ds.load()
end_time = time.time()

download_time = end_time - start_time
download_size_mb = sliced_ds.nbytes / (1024 * 1024)
download_speed_mbs = download_size_mb / download_time if download_time > 0 else 0.0

print(f"Download and subset completed in {download_time:.2f} seconds. Speed: {download_speed_mbs:.2f} MB/s")
print(sliced_ds)

Download and subset completed in 4.15 seconds. Speed: 0.04 MB/s
<xarray.Dataset> Size: 175kB
Dimensions:    (time: 151, latitude: 24, longitude: 12)
Coordinates:
  * time       (time) datetime64[ns] 1kB 1979-01-01 1979-01-02 ... 1979-05-31
  * latitude   (latitude) float32 96B 40.25 40.75 41.25 ... 50.75 51.25 51.75
  * longitude  (longitude) float32 48B 4.25 4.75 5.25 5.75 ... 8.75 9.25 9.75
Data variables:
    tmax       (time, latitude, longitude) float32 174kB 15.9 nan ... 29.11
Attributes:
    Conventions:    CF-1.0
    Source:         ftp://ftp.cpc.ncep.noaa.gov/precip/wd52ws/global_temp/
    References:     https://www.psl.noaa.gov/data/gridded/data.cpc.globaltemp...
    version:        V1.0
    title:          CPC GLOBAL TEMP V1.0
    dataset_title:  CPC GLOBAL TEMP
    history:        Updated 2025-04-09 15:37:46
