In [None]:
import itertools
import os

import numpy as np
import zarrita
from utils import calc_shard_size

In [None]:
root_path = "s3://carbonplan-benchmarks/data"
orig_path = "NEX-GDDP-CMIP6/ACCESS-CM2/historical/r1i1p1f1/tasmax/tasmax_day_ACCESS-CM2_historical_r1i1p1f1_gn"

In [None]:
attrs = {
    "variable": "tasmax",
    "model": "ACCESS-CM2",
    "data_type": "pyramids",
    "variant": "r1i1p1f1",
    "zarr_version": "v2",
    "shard_orientation": "0",
    "data_shard_size": "0",
    "data_dtype": "f4",
    "coordinate_chunks": "0",
    "coordinate_shards": "0",
    "compression": "gzipL1",
    "write_empty_chunks": True,
    "inflevel": "100",
}
pixels_per_tile = [128, 256]
data_chunk_size = [1]  # , 5, 10, 25]
projections = ["3857", "4326"]

zarr_version = "v3"
data_shard_size = [0, 50, 100]
shard_orientation = ["both"]

groups = ['0', '1', '2', '3']
sharded_arrays = ['tasmax']
arrays = ['tasmax', 'time', 'x', 'y']

In [None]:
def convert_array_to_v3(
    *,
    group: str,
    array: str,
    source_store: str,
    target_store: str,
    shard_mb: int = None,
    orientation: str = 'full-space',
):
    source_array = zarrita.ArrayV2.open(source_store / group / array)
    data = source_array[:]
    attrs = source_array.attributes
    source_chunks = source_array.metadata.chunks
    if shard_mb:
        chunks = calc_shard_size(
            data, chunks=source_chunks, target_mb=shard_mb, orientation=orientation
        )
        print(f"outer_chunks: {chunks}; inner_chunks: {source_chunks}")
        codecs = [
            zarrita.codecs.sharding_codec(
                chunk_shape=source_chunks,
                codecs=[zarrita.codecs.gzip_codec(level=1)],
            )
        ]
    else:
        chunks = source_chunks
        print(f"chunks: {chunks}")
        codecs = [zarrita.codecs.gzip_codec(level=1)]
    target_array = zarrita.Array.create(
        target_store / group / array,
        shape=data.shape,
        dtype=data.dtype,
        chunk_shape=chunks,
        fill_value=source_array.metadata.fill_value,
        attributes=attrs,
        codecs=codecs,
    )
    target_array[:] = data

In [None]:
def convert_group_to_v3(*, group: str, source_store: str, target_store: str):
    source_group = zarrita.GroupV2.open(source_store / group)
    attrs = source_group.attributes
    zarrita.Group.create(target_store / group, attributes=attrs)

In [None]:
%%time
for pix, chunk, proj, shard, orient in itertools.product(
    pixels_per_tile, data_chunk_size, projections, data_shard_size, shard_orientation
):
    path = f"{attrs['data_type']}-{attrs['zarr_version']}-{proj}-{attrs['write_empty_chunks']}-{pix}-{chunk}-{attrs['shard_orientation']}-{attrs['data_shard_size']}-{attrs['data_dtype']}-{attrs['data_shard_size']}-{attrs['coordinate_chunks']}-{attrs['coordinate_shards']}-{attrs['compression']}-{attrs['inflevel']}"
    source = os.path.join(root_path, orig_path, path)
    if shard == 0:
        if orient == "time":
            continue
        orient = 0
    target_path = f"{attrs['data_type']}-{zarr_version}-{proj}-{attrs['write_empty_chunks']}-{pix}-{chunk}-{orient}-{shard}-{attrs['data_dtype']}-{attrs['coordinate_chunks']}-{attrs['coordinate_shards']}-{attrs['compression']}-{attrs['inflevel']}"
    target = os.path.join(root_path, orig_path, target_path)
    print(f"Source fp: {source}")
    print(f"Target fp: {target}")
    source_store = zarrita.RemoteStore(source)
    target_store = zarrita.RemoteStore(target)
    source_group = zarrita.GroupV2.open(source_store)
    source_attrs = source_group.attributes
    source_attrs['zarr_version'] = zarr_version
    source_attrs['data_shard_size'] = shard
    source_attrs['shard_orientation'] = orient
    zarrita.Group.create(target_store, attributes=source_attrs)
    for group in groups:
        convert_group_to_v3(group=group, source_store=source_store, target_store=target_store)
        for array in arrays:
            if array in sharded_arrays:
                convert_array_to_v3(
                    group=group,
                    array=array,
                    source_store=source_store,
                    target_store=target_store,
                    shard_mb=shard,
                    orientation=orient,
                )
            else:
                convert_array_to_v3(
                    group=group, array=array, source_store=source_store, target_store=target_store
                )