# Insert zarr datasets for each time chunk into whole

In [None]:
# Open first and last zarr file to get date range
ds0 = xr.open_dataset(zlist[0], engine='zarr', chunks={})
ds1 = xr.open_dataset(zlist[-1], engine='zarr', chunks={})

# TODO: the freq argument must reflect the time interval (e.g hourly, daily)
dates = pd.date_range(start=ds0.time[0].values, end=ds1.time[-1].values, freq='1h')

# Have to drop the constant variables (e.g. variables having no time dimension)
drop_vars = ['BF', 'BH', 'C1F', 'C1H', 'C2F', 'C2H', 'C3F', 'C3H', 'C4F', 'C4H',
             'CF1', 'CF2', 'CF3', 'CFN', 'CFN1', 'CLAT', 'COSALPHA', 'DN', 'DNW',
             'DZS', 'E', 'F', 'FNM', 'FNP', 'HGT', 'ISLTYP', 'IVGTYP', 'LAKEMASK',
             'LANDMASK', 'LU_INDEX', 'MAPFAC_M', 'MAPFAC_MX', 'MAPFAC_MY',
             'MAPFAC_U', 'MAPFAC_UX', 'MAPFAC_UY', 'MAPFAC_V', 'MAPFAC_VX', 'MAPFAC_VY',
             'MAX_MSTFX', 'MAX_MSTFY', 'MF_VX_INV', 'MUB', 'P00', 'PB', 'PHB',
             'P_STRAT', 'P_TOP', 'RDN', 'RDNW', 'RDX', 'RDY', 'SHDMAX', 'SHDMIN',
             'SINALPHA', 'SNOALB', 'T00', 'TISO', 'TLP', 'TLP_STRAT', 'VAR',
             'VAR_SSO', 'XLAND', 'lat', 'lat_u', 'lat_v', 'lon', 'lon_u', 'lon_v',
             'ZETATOP', 'ZNU', 'ZNW', 'ZS']

source_dataset = ds0.drop_vars(drop_vars, errors='ignore')

template = (source_dataset.chunk().pipe(xr.zeros_like).isel(time=0, drop=True).expand_dims(time=len(dates)))
template['time'] = dates
template = template.chunk({'time': time_cnk})

# Writes no data (yet)
template.to_zarr(zarr_whole, compute=False, consolidated=True, mode='w')

# Writes the data
ds0.drop_vars(drop_vars).to_zarr(zarr_whole, region={'time': slice(0, time_cnk)})

# Add the wrf constants
ds0[drop_vars].to_zarr(zarr_whole, mode='a')
print(f'  Index {first_idx} (pre-create output): {time.time() - t1_proc:0.3f} s')

for i in range(first_idx, last_idx):
if i == 0:
    continue
t1 = time.time()
start = i * time_cnk
stop = (i + 1) * time_cnk

# print(zlist[i])
dsi = xr.open_dataset(zlist[i], engine='zarr', chunks={})
dsi.to_zarr(zarr_whole, region={'time': slice(start, stop)})
print(f'  Index {i}: {time.time() - t1:0.3f} s')

client.close()
if dask.config.get("temporary-directory") == '/dev/shm':
try:
    fs.rm(f'/dev/shm/dask-worker-space', recursive=True)
except FileNotFoundError:
    pass

print(f'Total time: {time.time() - t1_proc:0.3f} s')
