To execute this notebook:

1. Install dependencies
`pdm install`

2. Create a kernel
`pdm run python -m ipykernel install --user --name v2-env --display-name "Python (v2-env)"`

3. Set the kernel of this notebook on the newly created `v2-env`

4. Execute

In [2]:
import time

import xarray as xr
from multiformats import CID
from py_hamt import HAMT, IPFSStore
from xarray import Dataset

ipns_hash = "bafyr4iceke6zmxqvvil4okumfde4pm4exrcqqp4x4qlh2ddlwo4cdxclb4"

# Initialize IPFSStore with a public IPFS gateway
ipfs_store = IPFSStore(gateway_uri_stem="https://ipfs-gateway.dclimate.net/", timeout_seconds=120)

# Decode the root CID for the Zarr dataset
root_cid = CID.decode(ipns_hash)

# Create HAMT instance using the IPFSStore
hamt = HAMT(store=ipfs_store, root_node_id=root_cid)

# Open the dataset with xarray
zarr_ds: Dataset = xr.open_zarr(store=hamt)

# Sort the dataset by latitude and longitude
zarr_ds = zarr_ds.sortby("longitude")
zarr_ds = zarr_ds.sortby("latitude")

print(zarr_ds)

<xarray.Dataset> Size: 758MB
Dimensions:    (latitude: 360, longitude: 720, time: 731)
Coordinates:
  * latitude   (latitude) float32 1kB -89.75 -89.25 -88.75 ... 88.75 89.25 89.75
  * longitude  (longitude) float32 3kB -179.8 -179.2 -178.8 ... 179.2 179.8
  * time       (time) datetime64[ns] 6kB 1979-01-01 1979-01-02 ... 1980-12-31
Data variables:
    tmax       (time, latitude, longitude) float32 758MB ...
Attributes:
    Conventions:    CF-1.0
    References:     https://www.psl.noaa.gov/data/gridded/data.cpc.globaltemp...
    Source:         ftp://ftp.cpc.ncep.noaa.gov/precip/wd52ws/global_temp/
    dataset_title:  CPC GLOBAL TEMP
    history:        created 9/2016 by CAS NOAA/ESRL PSD
    title:          CPC GLOBAL TEMP V1.0
    version:        V1.0


In [3]:
lat_bounds = (0, 10)
lon_bounds = (0, 10)
time_bounds = ("1979-01-01", "1980-12-31")

# Subset the dataset
sliced_ds = zarr_ds.sel(
    latitude=slice(*lat_bounds),
    longitude=slice(*lon_bounds),
    time=slice(*time_bounds)
)

print(sliced_ds)

<xarray.Dataset> Size: 1MB
Dimensions:    (latitude: 20, longitude: 20, time: 731)
Coordinates:
  * latitude   (latitude) float32 80B 0.25 0.75 1.25 1.75 ... 8.75 9.25 9.75
  * longitude  (longitude) float32 80B 0.25 0.75 1.25 1.75 ... 8.75 9.25 9.75
  * time       (time) datetime64[ns] 6kB 1979-01-01 1979-01-02 ... 1980-12-31
Data variables:
    tmax       (time, latitude, longitude) float32 1MB ...
Attributes:
    Conventions:    CF-1.0
    References:     https://www.psl.noaa.gov/data/gridded/data.cpc.globaltemp...
    Source:         ftp://ftp.cpc.ncep.noaa.gov/precip/wd52ws/global_temp/
    dataset_title:  CPC GLOBAL TEMP
    history:        created 9/2016 by CAS NOAA/ESRL PSD
    title:          CPC GLOBAL TEMP V1.0
    version:        V1.0


In [4]:
# Trigger computation / download
start_time = time.time()
sliced_ds.load()
end_time = time.time()

download_time = end_time - start_time
download_size_mb = sliced_ds.nbytes / (1024 * 1024)
download_speed_mbs = download_size_mb / download_time if download_time > 0 else 0.0

print(f"Download and subset completed in {download_time:.2f} seconds. Speed: {download_speed_mbs:.2f} MB/s")
print(sliced_ds)

Download and subset completed in 12.77 seconds. Speed: 0.09 MB/s
<xarray.Dataset> Size: 1MB
Dimensions:    (latitude: 20, longitude: 20, time: 731)
Coordinates:
  * latitude   (latitude) float32 80B 0.25 0.75 1.25 1.75 ... 8.75 9.25 9.75
  * longitude  (longitude) float32 80B 0.25 0.75 1.25 1.75 ... 8.75 9.25 9.75
  * time       (time) datetime64[ns] 6kB 1979-01-01 1979-01-02 ... 1980-12-31
Data variables:
    tmax       (time, latitude, longitude) float32 1MB nan nan ... 28.03 30.67
Attributes:
    Conventions:    CF-1.0
    References:     https://www.psl.noaa.gov/data/gridded/data.cpc.globaltemp...
    Source:         ftp://ftp.cpc.ncep.noaa.gov/precip/wd52ws/global_temp/
    dataset_title:  CPC GLOBAL TEMP
    history:        created 9/2016 by CAS NOAA/ESRL PSD
    title:          CPC GLOBAL TEMP V1.0
    version:        V1.0
