In [1]:
import xarray as xr
import pandas as pd
import rioxarray
from ndpyramid import pyramid_reproject
from carbonplan_data.utils import set_zarr_encoding
from carbonplan_data.metadata import get_cf_global_attrs



In [7]:
# input dataset
path = "../data/3B-MO.MS.MRG.3IMERG.20200501-S000000-E235959.05.V06B.HDF5.tif"

# open and extract the input dataset
ds = (
    rioxarray.open_rasterio(path)
    .to_dataset(name="precipitation")
    .squeeze()
    .reset_coords(["band"], drop=True)
)

In [8]:
# # create the pyramid
dt = pyramid_reproject(ds, levels=6)

In [23]:
dt.children

(DataNode(name='0', parent='root', children=[],
 data=<xarray.Dataset>
      Dimensions:        (x: 128, y: 128)
      Coordinates:
        * x              (x) float64 -1.987e+07 -1.956e+07 ... 1.956e+07 1.987e+07
        * y              (y) float64 1.989e+07 1.958e+07 ... -1.958e+07 -1.989e+07
          spatial_ref    int64 0
      Data variables:
          precipitation  (y, x) float32 0.2135 0.2135 0.2135 ... 0.2135 0.2135 0.2135
 ),
 DataNode(name='1', parent='root', children=[],
 data=<xarray.Dataset>
      Dimensions:        (x: 256, y: 256)
      Coordinates:
        * x              (x) float64 -1.995e+07 -1.979e+07 ... 1.979e+07 1.995e+07
        * y              (y) float64 1.997e+07 1.981e+07 ... -1.981e+07 -1.997e+07
          spatial_ref    int64 0
      Data variables:
          precipitation  (y, x) float32 0.2135 0.2135 0.2135 ... 0.2135 0.2135 0.2135
 ),
 DataNode(name='2', parent='root', children=[],
 data=<xarray.Dataset>
      Dimensions:        (x: 512, y: 512)
 

In [24]:
# modify the data in the pyramid
for child in dt.children:
    child.ds = set_zarr_encoding(
        child.ds, codec_config={"id": "zlib", "level": 1}, float_dtype="float32"
    )
    child.ds = child.ds.chunk({"x": 128, "y": 128})
    child.ds["precipitation"].attrs.clear()
dt.attrs = get_cf_global_attrs()

In [25]:
# write the pyramid to zarr
save_path = "../data"
dt.to_zarr(save_path + "/gpmimerg.zarr", consolidated=True)