In [None]:
import json

from zarr.util import NumberEncoder

# Remove variables from Zarr metadata file
This notebook removes variable entries from a `.zmetadata` file in a Zarr store. The actual data will still exist but the variables
will not show up in the dataset.

Can use `diff --ignore-all-space --suppress-common-lines -s -y <file1> <file2>` to compare the new metadata file to old.

In [None]:
base_json_out_dir = '<some_path>/wbeep/conus404_work/00_zarr_json_backups'
base_zarr_dir = '<some_path>/scratch/conus404'

interval = 'monthly'   # one of: hourly, daily, monthly

src_filename = f'{base_zarr_dir}/conus404_{interval}.zarr/.zmetadata'
dst_filename = f'{base_json_out_dir}/20231120_zmetadata_backups/20231120_c404_{interval}_derived_vars_removal/zmetadata.new'

# List of variables to remove
remove_vars = ['E2', 'ES2', 'RH2', 'SH2']

## Load the `.zmetadata` json file

In [None]:
with open(src_filename, 'r') as in_hdl:
    data = json.load(in_hdl)

## Remove variables

In [None]:
for vv in remove_vars:
    print(f'Removing {vv}')
    del data['metadata'][f'{vv}/.zarray']
    del data['metadata'][f'{vv}/.zattrs']

## Write json to a file

In [None]:
with open(dst_filename, 'w') as out_hdl:
    json.dump(data, out_hdl, indent=4, sort_keys=True, ensure_ascii=True, separators=(',', ': '), cls=NumberEncoder)
    
print(f'Updated zmetadata file written to: {dst_filename}')

In [None]:
# Pretty print the JSON string
# print(json.dumps(data, indent = 4, sort_keys=False))