In [1]:
import hdf5plugin
import h5py
import glob
import fitsio
import numpy as np

In [2]:
cut_files = glob.glob("mdet_data_v6cuts/*.fits")

In [3]:
d = fitsio.read(cut_files[0])
cols = d.dtype.names

In [7]:
from esutil.pbar import PBar

ofile_hdf5 = "metadetect_cutsv6.h5"

with h5py.File(ofile_hdf5, "w") as fp:
    mdet_grp = fp.create_group("mdet")
    for mdet_step in PBar(
        ["noshear", "1p", "1m", "2p", "2m"],
        desc="making HDF5 file",

    ):
        grp = mdet_grp.create_group(mdet_step)
        for col in d.dtype.names:
            if col == "mdet_step":
                continue
            dt = d[col].dtype
            if col.endswith("_nodered"):
                dt = np.dtype(">f4")
            elif col == "tilename":
                dt = np.dtype("<S12")
            elif col == "mdet_step":
                dt = np.dtype("<S7")

            grp.create_dataset(
                col,
                dtype=dt,
                shape=(160_000_000,),
                maxshape=(None,),
                # **hdf5plugin.LZ4(),
            )


making HDF5 file: |--------------------| 0/5   0% [elapsed: 00:00 left: ?]

making HDF5 file: |####################| 5/5 100% [elapsed: 00:00 left: 00:00]


In [8]:
def _process_file(fname, loc, fp):
    d = fitsio.read(fname)
    for mdet_step in ["noshear", "1p", "1m", "2p", "2m"]:
        msk = d["mdet_step"] == mdet_step
        d_mdet = d[msk]
        num = d_mdet.shape[0]
        for col in d.dtype.names:
            if col == "mdet_step":
                continue
            fp["mdet"][mdet_step][col][loc:loc + num] = d_mdet[col].astype(fp["mdet"][mdet_step][col].dtype)
    return loc + num

with h5py.File(ofile_hdf5, "a") as fp:
    num = 0
    for i, fname in PBar(
        enumerate(cut_files),
        desc="processing files",
        total=len(cut_files),
    ):
        num = _process_file(fname, num, fp)
        if i % 100 == 0:
            fp.flush()

with h5py.File(ofile_hdf5, "a") as fp:
    for grp in fp.values():
        for mdet_grp in grp.values():
            for dset in mdet_grp.values():
                dset.resize(num, axis=0)

processing files: |####################| 9928/9928 100% [elapsed: 1:11:14 left: 00:00]


In [9]:
print(num)

151630637


In [10]:
# blind things
import os
import io
import sys
from des_y6utils.shear_masking import generate_shear_masking_factor
from ngmix.shape import g1g2_to_eta1eta2, eta1eta2_to_g1g2
import contextlib

COLS_TO_KEEP = ["pgauss", "gauss"]

with open(os.path.expanduser("~/.test_des_blinding_v7"), "r") as fp:
        passphrase = fp.read().strip()

fac = generate_shear_masking_factor(passphrase)

bofile_hdf5 = ofile_hdf5.rsplit(".", maxsplit=1)[0] + "_blinded.h5"

os.system(f"rm -f {bofile_hdf5}")
os.system(f"cp {ofile_hdf5} {bofile_hdf5}")
buff = io.StringIO()
with contextlib.redirect_stderr(sys.stdout):
    with contextlib.redirect_stdout(buff):
        try:
            with h5py.File(bofile_hdf5, "a") as fp:
                for pre in COLS_TO_KEEP:
                    e1o, e2o = (
                        fp["mdet"]["noshear"][pre + "_g_1"][:].copy(),
                        fp["mdet"]["noshear"][pre + "_g_2"][:].copy(),
                    )
                    if pre not in ["gauss"]:
                        e1 = e1o * fac
                        e2 = e2o * fac
                    else:
                        # use eta due to bounds
                        eta1o, eta2o = g1g2_to_eta1eta2(e1o, e2o)
                        eta1 = eta1o * fac
                        eta2 = eta2o * fac
                        e1, e2 = eta1eta2_to_g1g2(eta1, eta2)

                    fp["mdet"]["noshear"][pre + "_g_1"][:] = e1
                    fp["mdet"]["noshear"][pre + "_g_2"][:] = e2

                    fp.flush()

                    assert not np.array_equal(fp["mdet"]["noshear"][pre + "_g_1"][:], e1o)
                    assert not np.array_equal(fp["mdet"]["noshear"][pre + "_g_2"][:], e2o)

        except Exception:
            failed = True
            print("blinding error")