# Remove unused size normed frames from h5 files

This is useful for reducing h5 file sizes. It removes a size normed frame dataset specified by the `recon_key`.

In [1]:
import time
from toolz import concat
from aging.organization.paths import FOLDERS

In [2]:
depth_files = sorted(concat(f.glob('**/results_00.h5') for f in FOLDERS))

In [3]:
script = '''#!/bin/env python
#SBATCH -c 1
#SBATCH -n 1
#SBATCH --mem=10G
#SBATCH -p short
#SBATCH -t 00:07:00
#SBATCH --output=/n/scratch/users/w/wg41/tmp/ontogeny/h5-compression-%j.out

import h5py
from pathlib import Path
from aging.util import copy_h5_file, compare_h5_files
file = Path("{file}")

recon_keys = [
    "win_size_norm_frames_v4",
    "win_size_norm_frames_v5",
    "win_size_norm_frames_v6",
]

try:
    with h5py.File(file, 'a') as h5f:
        for recon_key in recon_keys:
            if recon_key in h5f.keys():
                del h5f[recon_key]
    new_file = file.with_suffix('.tmp')
    # the h5 file needs to be copied because otherwise, deleting the key will not shrink the file.
    # see: https://stackoverflow.com/a/39451617
    copy_h5_file(file, new_file)
except OSError:
    pass
assert compare_h5_files(file, new_file), "Files are not the same"
new_file.rename(file)
'''

In [None]:
for file in depth_files[::-1]:
    new_script = script.format(file=file)
    with open('tmp.py', 'w') as f:
        f.write(new_script)
    !source activate ~/miniconda3/envs/aging && sbatch tmp.py
    time.sleep(0.25)

Submitted batch job 46268077
Submitted batch job 46268082
Submitted batch job 46268100
Submitted batch job 46268104
Submitted batch job 46268108
Submitted batch job 46268113
Submitted batch job 46268115
Submitted batch job 46268120
Submitted batch job 46268122
Submitted batch job 46268124
Submitted batch job 46268128
Submitted batch job 46268132
Submitted batch job 46268136
Submitted batch job 46268140
Submitted batch job 46268143
Submitted batch job 46268144
Submitted batch job 46268151
Submitted batch job 46268156
Submitted batch job 46268161
Submitted batch job 46268166
Submitted batch job 46268173
Submitted batch job 46268179
Submitted batch job 46268185
Submitted batch job 46268187
Submitted batch job 46268188
Submitted batch job 46268189
Submitted batch job 46268191
Submitted batch job 46268197
Submitted batch job 46268203
Submitted batch job 46268210
Submitted batch job 46268212
Submitted batch job 46268213
Submitted batch job 46268217
Submitted batch job 46268221
Submitted batc