In [None]:
%cd /home/idies/workspace/Storage/ariel4/persistent/ncar-zarr-code/zarr_writing

In [1]:
import os
from utils import write_tools
import zarr
import dask
from dask.distributed import Client


array_cube_side = 2048
desired_cube_side = 512
chunk_size = 64
raw_ncar_folder_path = '/home/idies/workspace/turb/data02_02/ncar-high-rate-fixed-dt'
use_dask = True
dest_folder_name = "sabl2048b" # B is the high-rate data
write_type = "prod" # or "back" for backup

n_dask_workers = 16 # For Dask rechunking

# Kernel dies with Sciserver large jobs resources as of Aug 2023. Out of memory IMO
num_threads = 8  # For writing to FileDB
dask_local_dir = '/home/idies/workspace/turb/data02_02'

timestep_nr = 3

In [None]:
%cd /home/idies/workspace/turb/data02_02/ncar-high-rate-fixed-dt

In [2]:
folders=write_tools.list_fileDB_folders()

# Avoiding 7-2 and 9-2 - they're too full as of May 2023
folders.remove("/home/idies/workspace/turb/data09_02/zarr/")
folders.remove("/home/idies/workspace/turb/data07_02/zarr/")

for i in range(len(folders)):
    folders[i] += dest_folder_name + "_" + str(i + 1).zfill(2) + "_" + write_type + "/"

In [3]:
range_list = [] # Where chunks start and end. Needed for Mike's code to find correct chunks to access
smaller_size = 512
outer_dim = []

for i in range(4):
    mid_dim = []
    for j in range(4):
        inner_dim = []

        for k in range(4):
            a = []
            a.append([i * smaller_size, (i + 1) * smaller_size])
            a.append([j * smaller_size, (j + 1) * smaller_size])
            a.append([k * smaller_size, (k + 1) * smaller_size])

            range_list.append(a)

        mid_dim.append(inner_dim)

    outer_dim.append(mid_dim)

In [4]:
chunk_morton_mapping = write_tools.get_chunk_morton_mapping(range_list, dest_folder_name)
flattened_node_assgn = write_tools.flatten_3d_list(write_tools.node_assignment(4))

In [5]:
dests = []

for i in range(len(range_list)):
#     for j in range(4):
#         for k in range(4):
    min_coord = [a[0] for a in range_list[i]]
    max_coord = [a[1] - 1 for a in range_list[i]]

    morton = (write_tools.morton_pack(array_cube_side, min_coord[2], min_coord[1], min_coord[0]), write_tools.morton_pack(array_cube_side, max_coord[2], max_coord[1], max_coord[0]))

    chunk_name = write_tools.search_dict_by_value(chunk_morton_mapping, morton)

    idx = int(chunk_name[-2:].lstrip('0'))

    filedb_index = flattened_node_assgn[idx - 1] - 1

    destination = os.path.join(folders[filedb_index], dest_folder_name + str(idx).zfill(2) + "_" + str(timestep_nr).zfill(3) + ".zarr")

    dests.append(destination)

In [23]:
dests[:10]

['/home/idies/workspace/turb/data01_01/zarr/sabl2048b_01_prod/sabl2048b01_003.zarr',
 '/home/idies/workspace/turb/data02_01/zarr/sabl2048b_02_prod/sabl2048b02_003.zarr',
 '/home/idies/workspace/turb/data09_01/zarr/sabl2048b_09_prod/sabl2048b09_003.zarr',
 '/home/idies/workspace/turb/data10_01/zarr/sabl2048b_10_prod/sabl2048b10_003.zarr',
 '/home/idies/workspace/turb/data03_01/zarr/sabl2048b_03_prod/sabl2048b03_003.zarr',
 '/home/idies/workspace/turb/data04_01/zarr/sabl2048b_04_prod/sabl2048b04_003.zarr',
 '/home/idies/workspace/turb/data11_01/zarr/sabl2048b_11_prod/sabl2048b11_003.zarr',
 '/home/idies/workspace/turb/data12_01/zarr/sabl2048b_12_prod/sabl2048b12_003.zarr',
 '/home/idies/workspace/turb/data05_02/zarr/sabl2048b_17_prod/sabl2048b17_003.zarr',
 '/home/idies/workspace/turb/data06_02/zarr/sabl2048b_18_prod/sabl2048b18_003.zarr']

In [21]:
vars = list(zarr.open_group(dests[0]).array_keys())
vars.remove('velocity')


def expand_existing_dims(zarr_path):
    zarr_group_512 = zarr.open_group(zarr_path, mode='a')
    
    for chosen_var in vars:
        curr_arr = zarr_group_512[chosen_var][...]
        if len(curr_arr.shape) == 3: # Skip if array already has 4 dimensions, somehow
            reshaped_data = curr_arr.reshape(zarr_group_512.shape + (1,))
            del curr_arr[chosen_var]
            curr_arr.create_dataset(chosen_var, data=reshaped_data, chunks=(64, 64, 64, 1), compressor=None)
    

In [None]:
client = Client(n_workers=n_dask_workers, local_directory=dask_local_dir, processes=True)

tasks = [dask.delayed(expand_existing_dims)(path) for path in dests]


dask.compute(*tasks)

client.close()