In [1]:
import eodc_hub_role
import numpy as np
import os
import sys
import xarray as xr
import zarr
sys.path.append('..')
import zarr_helpers

# Generate fake data with no chunks

In [2]:
# Fake data directory
fake_data_dir = 'fake_data_no_chunks'

# Define dimensions
time_steps = 1
y = 180
x = 360
multiple = 2 # how much do you want the dataset to grow by each iteration
n_multiples = 10

In [None]:
for n_multiple in range(n_multiples):
    if n_multiple == 0:
        size = y * x        
    else:
        size = y * x * multiple

    x = round(np.sqrt(2 * size))
    y = int(x/2)
    data = np.random.random(size=(time_steps, y, x))

    # Create Xarray datasets with dimensions and coordinates
    ds = xr.Dataset({
        'data': (['time', 'lat', 'lon'], data),
    }, coords={
        'time': np.arange(time_steps),
        'lat': np.linspace(-90, 90, y),
        'lon': np.linspace(-180, 180, x)
    })

    # Save Xarray datasets as Zarr stores
    chunks={'time': 1, 'lat': y, 'lon': x}
    ds = ds.chunk(chunks)    
    ds.to_zarr(f'{fake_data_dir}/store_lat_{y}x_lon_{x}.zarr', mode='w')

In [4]:
# List all items in the directory
items = os.listdir(fake_data_dir)

# Loop through each item and open it with xarray if it's a Zarr store
for item in items:
    item_path = os.path.join(fake_data_dir, item)
    # Check if the item is a directory (Zarr stores are directories)
    if os.path.isdir(item_path):
        try:
            # Attempt to open the Zarr store using xarray
            ds = xr.open_zarr(item_path)
            print(f"Chunk size")
            print(zarr_helpers.get_chunk_size(ds['data']))
            print('-' * 80)  # Print a separator line
        except Exception as e:
            # Print an error message if unable to open the Zarr store
            print(f"Could not open {item} as a Zarr store. Error: {e}")


Chunk size
((1, 359, 719), dtype('float64'), 1.9693069458007812)
--------------------------------------------------------------------------------
Chunk size
((1, 718, 1437), dtype('float64'), 7.8717498779296875)
--------------------------------------------------------------------------------
Chunk size
((1, 1437, 2874), dtype('float64'), 31.508926391601562)
--------------------------------------------------------------------------------
Chunk size
((1, 254, 509), dtype('float64'), 0.9863739013671875)
--------------------------------------------------------------------------------
Chunk size
((1, 180, 360), dtype('float64'), 0.494384765625)
--------------------------------------------------------------------------------
Chunk size
((1, 1016, 2032), dtype('float64'), 15.7509765625)
--------------------------------------------------------------------------------
Chunk size
((1, 2873, 5747), dtype('float64'), 125.96993255615234)
-------------------------------------------------------------

# Create fake data with chunks

In [None]:
%%capture
!pip install git+https://github.com/jbusecke/pangeo-forge-recipes@dynamic_chunks_2

In [None]:
import fsspec
from pangeo_forge_recipes import aggregation, dynamic_target_chunks
import xarray as xr