# Test 1: Different chunk size, no spatial chunking

In [3]:
%load_ext autoreload
%autoreload
import pandas as pd
import hvplot
import morecantile
import os
pd.options.plotting.backend = 'holoviews'
import sys
sys.path.append('..')
from profiler.main import Timer
import zarr_reader
import zarr_helpers
from titiler_xarray.titiler.xarray.reader import ZarrReader

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload




## Define tiles to test

In [4]:
tms = morecantile.tms.get("WebMercatorQuad")
zooms = range(12) # Zoom 10 is the level at which you can see large roads, 15 is buildings
xyz_tiles = []
for z in zooms:
    tile = tms.tile(57, 28, z)
    xyz_tiles.append((tile.x, tile.y, tile.z))

xyz_tiles

[(0, 0, 0),
 (1, 0, 1),
 (2, 1, 2),
 (5, 3, 3),
 (10, 6, 4),
 (21, 13, 5),
 (42, 26, 6),
 (84, 53, 7),
 (168, 107, 8),
 (337, 214, 9),
 (674, 428, 10),
 (1348, 857, 11)]

## Define data location

Local for now.

In [5]:
# Fake data directory
fake_data_dir = 'fake_data_no_chunks'

# List all items in the directory
items = os.listdir(fake_data_dir)

# Initiate a dictionary to store dataset information
datastore_specs = {}
variable = 'data'
# Loop through each item and open it with xarray if it's a Zarr store
for item in items:
    item_path = os.path.join(fake_data_dir, item)
    # Check if the item is a directory (Zarr stores are directories)
    if os.path.isdir(item_path):
        try:
            # get the dataset specs
            ds = zarr_helpers.open_dataset(item_path)
            dataset_name = item_path.split('/')[-1]
            ds_specs = zarr_helpers.get_dataset_specs(dataset_name, item_path, variable, ds)            
            datastore_specs[dataset_name] = ds_specs
        except Exception as e:
            # Print an error message if unable to open the Zarr store
            print(f"Could not open {item} as a Zarr store. Error: {e}")

## Inspect the dataset specs

In [6]:
df = pd.DataFrame.from_dict(datastore_specs, orient='index')
df.drop(
    columns=['collection_name', 'variable', 'source', 'dtype', 'number_coord_chunks', 'compression']
).sort_values('chunk_size_mb')

Unnamed: 0,shape,lat_resolution,lon_resolution,chunk_size_mb,chunks
store_lat_180x_lon_360.zarr,"{'time': 1, 'lat': 180, 'lon': 360}",1.005587,1.002786,0.494385,"{'time': 1, 'lat': 180, 'lon': 360}"
store_lat_254x_lon_509.zarr,"{'time': 1, 'lat': 254, 'lon': 509}",0.711462,0.708661,0.986374,"{'time': 1, 'lat': 254, 'lon': 509}"
store_lat_359x_lon_719.zarr,"{'time': 1, 'lat': 359, 'lon': 719}",0.502793,0.501393,1.969307,"{'time': 1, 'lat': 359, 'lon': 719}"
store_lat_508x_lon_1016.zarr,"{'time': 1, 'lat': 508, 'lon': 1016}",0.35503,0.35468,3.937744,"{'time': 1, 'lat': 508, 'lon': 1016}"
store_lat_718x_lon_1437.zarr,"{'time': 1, 'lat': 718, 'lon': 1437}",0.251046,0.250696,7.87175,"{'time': 1, 'lat': 718, 'lon': 1437}"
store_lat_1016x_lon_2032.zarr,"{'time': 1, 'lat': 1016, 'lon': 2032}",0.17734,0.177253,15.750977,"{'time': 1, 'lat': 1016, 'lon': 2032}"
store_lat_1437x_lon_2874.zarr,"{'time': 1, 'lat': 1437, 'lon': 2874}",0.125348,0.125305,31.508926,"{'time': 1, 'lat': 1437, 'lon': 2874}"
store_lat_2032x_lon_4064.zarr,"{'time': 1, 'lat': 2032, 'lon': 4064}",0.088626,0.088604,63.003906,"{'time': 1, 'lat': 2032, 'lon': 4064}"
store_lat_2873x_lon_5747.zarr,"{'time': 1, 'lat': 2873, 'lon': 5747}",0.062674,0.062652,125.969933,"{'time': 1, 'lat': 2873, 'lon': 5747}"
store_lat_4063x_lon_8127.zarr,"{'time': 1, 'lat': 4063, 'lon': 8127}",0.044313,0.044302,251.922615,"{'time': 1, 'lat': 4063, 'lon': 8127}"


In [7]:
## Define a function for timing tile generation

In [10]:
def time_tile_generation(tileset: tuple, source: str, variable: str):
    x, y, z = tileset
    with Timer() as t:
        with ZarrReader(
            source,
            variable=variable,
            reference=False
        ) as src_dst:
            image = src_dst.tile(
                x,
                y,
                z,
                tilesize=256,
            )
    return round(t.elapsed * 1000, 2) 

In [18]:
for item in items:
    datastore_specs[item]['tile_times'] = {}
    for xyz_tile in xyz_tiles:
        datastore_specs[item]['tile_times'][xyz_tile[2]] = time_tile_generation(xyz_tile, f'{fake_data_dir}/{item}', variable)

In [19]:
df = pd.DataFrame.from_dict(
    datastore_specs,
    orient='index',
    columns=['chunk_size_mb', 'tile_times']
).sort_values('chunk_size_mb')

df

Unnamed: 0,chunk_size_mb,tile_times
store_lat_180x_lon_360.zarr,0.494385,"{0: 24.6, 1: 25.21, 2: 24.8, 3: 24.41, 4: 24.2..."
store_lat_254x_lon_509.zarr,0.986374,"{0: 34.75, 1: 30.21, 2: 34.9, 3: 26.61, 4: 25...."
store_lat_359x_lon_719.zarr,1.969307,"{0: 45.2, 1: 37.63, 2: 37.43, 3: 33.94, 4: 37...."
store_lat_508x_lon_1016.zarr,3.937744,"{0: 39.84, 1: 31.94, 2: 30.12, 3: 27.95, 4: 27..."
store_lat_718x_lon_1437.zarr,7.87175,"{0: 44.68, 1: 35.37, 2: 34.56, 3: 34.95, 4: 34..."
store_lat_1016x_lon_2032.zarr,15.750977,"{0: 62.98, 1: 48.23, 2: 42.31, 3: 43.89, 4: 41..."
store_lat_1437x_lon_2874.zarr,31.508926,"{0: 114.78, 1: 64.46, 2: 56.62, 3: 51.93, 4: 5..."
store_lat_2032x_lon_4064.zarr,63.003906,"{0: 182.67, 1: 101.6, 2: 79.36, 3: 92.3, 4: 71..."
store_lat_2873x_lon_5747.zarr,125.969933,"{0: 331.35, 1: 177.58, 2: 112.15, 3: 122.12, 4..."
store_lat_4063x_lon_8127.zarr,251.922615,"{0: 1001.63, 1: 350.39, 2: 302.65, 3: 259.69, ..."


In [36]:
# want to plot time as a function of chunksize, with multiple lines for each zoom

for tile in xyz_tiles:
    zoom = tile[2]
    df[zoom] = df['tile_times'].apply(lambda x: x[zoom])

zooms = list(map(str, range(len(xyz_tiles))))
df.plot.scatter(x='chunk_size_mb', y=zooms, value_label='Time in ms', group_label='zoom')