In [5]:
import eodc_hub_role
import numpy as np
import os
import sys
import xarray as xr
import zarr
sys.path.append('..')
import zarr_helpers

In [28]:
# Fake data directory
fake_data_dir = 'fake_data_no_chunks'

# Define dimensions
time_steps = 1
y = 180
x = 360
multiple = 2 # how much do you want the dataset to grow by each iteration
n_multiples = 10

In [29]:
for n_multiple in range(n_multiples):
    if n_multiple == 0:
        size = y * x        
    else:
        size = y * x * multiple

    x = round(np.sqrt(2 * size))
    y = int(x/2)
    data = np.random.random(size=(time_steps, y, x))

    # Create Xarray datasets with dimensions and coordinates
    ds = xr.Dataset({
        'data': (['time', 'lat', 'lon'], data),
    }, coords={
        'time': np.arange(time_steps),
        'lat': np.linspace(-90, 90, y),
        'lon': np.linspace(-180, 180, x)
    })

    # Save Xarray datasets as Zarr stores
    chunks={'time': 1, 'lat': y, 'lon': x}
    ds = ds.chunk(chunks)    
    ds.to_zarr(f'{fake_data_dir}/store_lat_{y}x_lon_{x}.zarr', mode='w')

In [30]:
# List all items in the directory
items = os.listdir(fake_data_dir)

# Loop through each item and open it with xarray if it's a Zarr store
for item in items:
    item_path = os.path.join(fake_data_dir, item)
    # Check if the item is a directory (Zarr stores are directories)
    if os.path.isdir(item_path):
        try:
            # Attempt to open the Zarr store using xarray
            ds = xr.open_zarr(item_path)
            print(f"Chunk sixe for {item}:")
            print(zarr_helpers.get_chunk_size(ds, 'data')[2])
            print('-' * 80)  # Print a separator line
        except Exception as e:
            # Print an error message if unable to open the Zarr store
            print(f"Could not open {item} as a Zarr store. Error: {e}")


Chunk sixe for store_lat_359x_lon_719.zarr:
1.9693069458007812
--------------------------------------------------------------------------------
Chunk sixe for store_lat_718x_lon_1437.zarr:
7.8717498779296875
--------------------------------------------------------------------------------
Chunk sixe for store_lat_1437x_lon_2874.zarr:
31.508926391601562
--------------------------------------------------------------------------------
Chunk sixe for store_lat_254x_lon_509.zarr:
0.9863739013671875
--------------------------------------------------------------------------------
Chunk sixe for store_lat_180x_lon_360.zarr:
0.494384765625
--------------------------------------------------------------------------------
Chunk sixe for store_lat_1016x_lon_2032.zarr:
15.7509765625
--------------------------------------------------------------------------------
Chunk sixe for store_lat_2873x_lon_5747.zarr:
125.96993255615234
---------------------------------------------------------------------------

# Create a data store which is the 10x resolution as the largest data store, but a tenth the size

In [None]:
total_degrees = 1/10*(180*360)
# lat x lon should equal total degrees
# lat x lon = total_degrees
# y x 2y = total_degrees
y = np.sqrt(total_degrees/2)
x = y*2
x, y

In [None]:
# Create Xarray datasets with dimensions and coordinates
ds_onetenth = xr.Dataset({'data': (['time', 'lat', 'lon'], data_1000x)},
                        coords={'time': np.arange(time_steps),
                                'lat': np.linspace(0, y, y1000),
                                'lon': np.linspace(0, x, x1000)})

# Save Xarray datasets as Zarr stores
ds_onetenth.to_zarr(f'{fake_data_dir}/store_one_tenth_planet.zarr', mode='w')

# Save with no chunks
chunks={'time': 1, 'lat': y1000, 'lon': x1000}
ds_onetenth_nochunks = ds_onetenth.chunk(chunks)
ds_onetenth_nochunks.to_zarr(f'{fake_data_dir}/store_one_tenth_planet_nochunks.zarr', mode='w')

In [None]:
xr.open_zarr(f'{fake_data_dir}/store_one_tenth_planet_nochunks.zarr/')

In [None]:
bucket = 'nasa-eodc-data-store'
credentials = eodc_hub_role.fetch_and_set_credentials()

In [None]:
%%capture
# !aws s3 cp --recursive fake_data/ s3://{bucket}/fake_data/