## Convert all netCDF NCAR timestep files to Zarr 512 arrays, with Grouped Velocity components, with (64,64,64) chunk size, round-robined across FileDB nodes (spatially using Z-order)

<font color="red">Old Dask version gives this error https://github.com/dask/distributed/issues/3955</font>

In [3]:
desired_cube_side = 512
chunk_size = 64
raw_ncar_folder_path = '/home/idies/workspace/turb/data02_02/ariel-6-timestep-ncar-netCDF'
use_dask = False # Has issues on SciServer Compute
dest_folder_name = "sabl2048a"
# timestep_nr = 1
timestep_range = range(5) # Ned's new High-rate fixed-dt has only 5 timesteps

In [None]:
!pip install "dask[complete]"
!pip install "xarray[complete]"
!pip install morton-py

In [4]:
%cd /home/idies/workspace/Storage/ariel4/persistent/ncar-zarr-code/zarr_writing

/home/idies/workspace/Storage/ariel4/persistent/ncar-zarr-code/zarr_writing


In [5]:
import xarray as xr
import write_tools
import dask

### Get target Folder list

In [None]:
folders=write_tools.list_fileDB_folders()

# Avoiding 7-2 and 9-2 - they're too full as of May 2023
folders.remove("/home/idies/workspace/turb/data09_02/zarr/")
folders.remove("/home/idies/workspace/turb/data07_02/zarr/")

for i in range(len(folders)):
    folders[i] += dest_folder_name + "_" + str(i + 1).zfill(2) + "_prod/"


# Create top-level dirs

# for folder_path in folders:
#     os.makedirs(folder_path, exist_ok=False)

<font color="orange">Don't delete the CD cell!</font>

In [4]:
%cd /home/idies/workspace/turb/data02_02/ariel-6-timestep-ncar-netCDF

/home/idies/workspace/turb/data02_02/ariel-6-timestep-ncar-netCDF


In [9]:
a = ['a', 'b', 'c']
a.reverse()
a

['c', 'b', 'a']

In [5]:
for timestep_nr in timestep_range:
    data_xr = xr.open_dataset("/home/idies/workspace/turb/data02_02/ariel-6-timestep-ncar-netCDF/jhd.00" + str(timestep_nr) + ".nc")
    
    # Group 3 velocity components together
    # This fails with Dask bcs. of write permission error on SciServer Job
    merged_velocity = write_tools.merge_velocities(data_xr, chunk_size_base=chunk_size, use_dask=False)
    
    # Unabbreviate 'e', 'p', 't' variable names
    merged_velocity = merged_velocity.rename({'e': 'energy', 't': 'temperature', 'p': 'pressure'})
    
    # Split 2048^3 into smaller 512^3 arrays
    dims = [dim for dim in data_xr.dims]
    dims.reverse() # I want (nnz, nny, nnx)
    smaller_groups = write_tools.split_zarr_group(merged_velocity, desired_cube_side, dims)
    
    # Given up in favor of Ryan's node coloring technique
#     z_order = write_tools.morton_order_cube(cube_side=4)
    
    node_assignments = write_tools
    
    
    # Distribute them across FileDB
    cubes = smaller_groups
    
    encoding={
        "velocity": dict(chunks=(chunk_size, chunk_size, chunk_size, 3), compressor=None),
        "pressure": dict(chunks=(chunk_size, chunk_size, chunk_size, 1), compressor=None),
        "temperature": dict(chunks=(chunk_size, chunk_size, chunk_size, 1), compressor=None),
        "energy": dict(chunks=(chunk_size, chunk_size, chunk_size, 1), compressor=None)
    }
    
    tasks = []
    for i in range(len(cubes)):
        for j in range(len(cubes[i])):
            for k in range(len(cubes[i][j])):
                filedb_index = z_order[i][j][k] % len(folders)
                current_array = cubes[i][j][k]
                
                dest_groupname = folders[filedb_index] + dest_folder_name + str(z_order[i][j][k] + 1).zfill(2) + "_" + str(timestep_nr).zfill(3) + ".zarr"
                if use_dask:
                    tasks.append(write_tools.write_to_disk_dask(dest_groupname, current_array, encoding))
                else:
                    write_tools.write_to_disk(dest_groupname, current_array, encoding)

    if use_dask:
        dask.compute(*tasks)
