In [1]:
import eodc_hub_role
import numpy as np
import os
import xarray as xr
import zarr

# Fake data directory
fake_data_dir = 'fake_data'

# Define dimensions
time_steps = 1
lat_dim = 180
lon_dim = 360

# Generate random data for the three Zarr stores
data_180x360 = np.random.random(size=(time_steps, lat_dim, lon_dim))

tenx_size = lat_dim * lon_dim * 10
x10 = round(np.sqrt(2 * tenx_size))
y10 = int(x10/2)
data_10x = np.random.random(size=(time_steps, y10, x10))

onehundredx_size = x10 * y10 * 10
x100 = round(np.sqrt(2 * onehundredx_size))
y100 = int(x100/2)
data_100x = np.random.random(size=(time_steps, y100, x100))

onethousandx_size = x100 * y100 * 10
x1000 = round(np.sqrt(2 * onethousandx_size))
y1000 = int(x1000/2)
data_1000x = np.random.random(size=(time_steps, y1000, x1000))

In [2]:
# Create Xarray datasets with dimensions and coordinates
ds_180x360 = xr.Dataset({
    'data': (['time', 'lat', 'lon'], data_180x360),
}, coords={
    'time': np.arange(time_steps),
    'lat': np.linspace(-90, 90, lat_dim),
    'lon': np.linspace(-180, 180, lon_dim)
})

# Save Xarray datasets as Zarr stores
ds_180x360.to_zarr(f'{fake_data_dir}/store_180x360.zarr', mode='w')

chunks={'time': 1, 'lat': lat_dim, 'lon': lon_dim}
ds_180x360_nochunks = ds_180x360.chunk(chunks)
ds_180x360_nochunks.to_zarr(f'{fake_data_dir}/store_180x360_nochunks.zarr', mode='w')

<xarray.backends.zarr.ZarrStore at 0x1363977b0>

In [3]:
# Create Xarray datasets with dimensions and coordinates
ds_10x = xr.Dataset({'data': (['time', 'lat', 'lon'], data_10x)},
                        coords={'time': np.arange(time_steps),
                                'lat': np.linspace(-90, 90, y10),
                                'lon': np.linspace(-180, 180, x10)})

# Save Xarray datasets as Zarr stores
ds_10x.to_zarr(f'{fake_data_dir}/store_{y10}x{x10}.zarr', mode='w')

# Save with no chunks
chunks={'time': 1, 'lat': y10, 'lon': x10}
ds_10x_nochunks = ds_10x.chunk(chunks)
ds_10x_nochunks.to_zarr(f'{fake_data_dir}/store_{y10}x{x10}_nochunks.zarr', mode='w')

<xarray.backends.zarr.ZarrStore at 0x136ad2cf0>

In [4]:
# Create Xarray datasets with dimensions and coordinates
ds_100x = xr.Dataset({'data': (['time', 'lat', 'lon'], data_100x)},
                        coords={'time': np.arange(time_steps),
                                'lat': np.linspace(-90, 90, y100),
                                'lon': np.linspace(-180, 180, x100)})

# Save Xarray datasets as Zarr stores
ds_100x.to_zarr(f'{fake_data_dir}/store_{y100}x{x100}.zarr', mode='w')

# Save with no chunks
chunks={'time': 1, 'lat': y100, 'lon': x100}
ds_100x_nochunks = ds_100x.chunk(chunks)
ds_100x_nochunks.to_zarr(f'{fake_data_dir}/store_{y100}x{x100}_nochunks.zarr', mode='w')

<xarray.backends.zarr.ZarrStore at 0x136ad2f90>

In [5]:
# Create Xarray datasets with dimensions and coordinates
ds_1000x = xr.Dataset({'data': (['time', 'lat', 'lon'], data_1000x)},
                        coords={'time': np.arange(time_steps),
                                'lat': np.linspace(-90, 90, y1000),
                                'lon': np.linspace(-180, 180, x1000)})

# Save Xarray datasets as Zarr stores
ds_1000x.to_zarr(f'{fake_data_dir}/store_{y1000}x{x1000}.zarr', mode='w')

# Save with no chunks
chunks={'time': 1, 'lat': y1000, 'lon': x1000}
ds_1000x_nochunks = ds_1000x.chunk(chunks)
ds_1000x_nochunks.to_zarr(f'{fake_data_dir}/store_{y1000}x{x1000}_nochunks.zarr', mode='w')

<xarray.backends.zarr.ZarrStore at 0x136b04270>

In [6]:
xr.open_zarr(f'{fake_data_dir}/store_{y1000}x{x1000}_nochunks.zarr')

Unnamed: 0,Array,Chunk
Bytes,493.89 MiB,493.89 MiB
Shape,"(1, 5689, 11379)","(1, 5689, 11379)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 493.89 MiB 493.89 MiB Shape (1, 5689, 11379) (1, 5689, 11379) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",11379  5689  1,

Unnamed: 0,Array,Chunk
Bytes,493.89 MiB,493.89 MiB
Shape,"(1, 5689, 11379)","(1, 5689, 11379)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


# Create a data store which is the 10x resolution as the largest data store, but a tenth the size

In [7]:
total_degrees = 1/10*(180*360)
# lat x lon should equal total degrees
# lat x lon = total_degrees
# y x 2y = total_degrees
y = np.sqrt(total_degrees/2)
x = y*2
x, y

(113.84199576606166, 56.92099788303083)

In [8]:
# Create Xarray datasets with dimensions and coordinates
ds_onetenth = xr.Dataset({'data': (['time', 'lat', 'lon'], data_1000x)},
                        coords={'time': np.arange(time_steps),
                                'lat': np.linspace(0, y, y1000),
                                'lon': np.linspace(0, x, x1000)})

# Save Xarray datasets as Zarr stores
ds_onetenth.to_zarr(f'{fake_data_dir}/store_one_tenth_planet.zarr', mode='w')

# Save with no chunks
chunks={'time': 1, 'lat': y1000, 'lon': x1000}
ds_onetenth_nochunks = ds_onetenth.chunk(chunks)
ds_onetenth_nochunks.to_zarr(f'{fake_data_dir}/store_one_tenth_planet_nochunks.zarr', mode='w')

<xarray.backends.zarr.ZarrStore at 0x13b2f27b0>

In [9]:
xr.open_zarr(f'{fake_data_dir}/store_one_tenth_planet_nochunks.zarr/')

Unnamed: 0,Array,Chunk
Bytes,493.89 MiB,493.89 MiB
Shape,"(1, 5689, 11379)","(1, 5689, 11379)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 493.89 MiB 493.89 MiB Shape (1, 5689, 11379) (1, 5689, 11379) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",11379  5689  1,

Unnamed: 0,Array,Chunk
Bytes,493.89 MiB,493.89 MiB
Shape,"(1, 5689, 11379)","(1, 5689, 11379)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [10]:
bucket = 'nasa-eodc-data-store'
credentials = eodc_hub_role.fetch_and_set_credentials()

In [12]:
%%capture
# !aws s3 cp --recursive fake_data/ s3://{bucket}/fake_data/