In [1]:
import h5py
import glob
import fitsio
import numpy as np
import os
import sys

In [2]:
ofile_hdf5 = "metadetect_cutsv6.h5"


In [3]:
cut_files = sorted(glob.glob("mdet_data_v6cuts/*.fits"))
print(len(cut_files))

10169


In [4]:
d = fitsio.read(cut_files[0])
cols = d.dtype.names
print(cols)

('uid', 'patch_num', 'tilename', 'slice_id', 'mdet_step', 'ra', 'dec', 'x', 'y', 'mfrac', 'mfrac_img', 'nepoch_g', 'nepoch_r', 'nepoch_i', 'nepoch_z', 'psfrec_g_1', 'psfrec_g_2', 'psfrec_T', 'gauss_s2n', 'gauss_g_1', 'gauss_g_2', 'gauss_g_cov_1_1', 'gauss_g_cov_1_2', 'gauss_g_cov_2_2', 'gauss_T_err', 'gauss_T_ratio', 'gauss_psf_T', 'pgauss_T_err', 'pgauss_T', 'pgauss_psf_T', 'pgauss_band_flux_g', 'pgauss_band_flux_r', 'pgauss_band_flux_i', 'pgauss_band_flux_z', 'pgauss_band_flux_err_g', 'pgauss_band_flux_err_r', 'pgauss_band_flux_err_i', 'pgauss_band_flux_err_z', 'pgauss_band_flux_g_nodered', 'pgauss_band_flux_r_nodered', 'pgauss_band_flux_i_nodered', 'pgauss_band_flux_z_nodered')


In [5]:
from esutil.pbar import PBar


os.system(f"rm -f {ofile_hdf5}")

with h5py.File(ofile_hdf5, "w") as fp:
    mdet_grp = fp.create_group("mdet")
    for mdet_step in ["noshear", "1p", "1m", "2p", "2m"]:
        grp = mdet_grp.create_group(mdet_step)
        for col in d.dtype.names:
            if col == "mdet_step":
                continue
            dt = d[col].dtype
            if col.endswith("_nodered"):
                dt = np.dtype(">f4")
            elif col == "tilename":
                dt = np.dtype("<S12")
            elif col == "mdet_step":
                dt = np.dtype("<S7")

            grp.create_dataset(
                col,
                dtype=dt,
                shape=(160_000_000,),
                maxshape=(None,),
                # **hdf5plugin.LZ4(),
            )

os.system(f"chmod go-rwx {ofile_hdf5}");

In [6]:
SHEARS = ["noshear", "1p", "1m", "2p", "2m"]

def _process_file(fname, num_per_shear, fp):
    d = fitsio.read(fname, ext=1)
    
    if d.shape[0] == 0:
        return num_per_shear
    
    for mdet_step in SHEARS:
        msk = d["mdet_step"] == mdet_step
        d_mdet = d[msk]
        loc = num_per_shear[mdet_step]
        _num = d_mdet.shape[0]
        if _num > 0:
            for col in d.dtype.names:
                if col == "mdet_step":
                    continue
                fp["mdet"][mdet_step][col][loc:loc + _num] = d_mdet[col].astype(fp["mdet"][mdet_step][col].dtype)
        
            num_per_shear[mdet_step] += _num
    
    return num_per_shear

with h5py.File(ofile_hdf5, "a") as fp:
    num_per_shear = {
        key: 0 for key in SHEARS
    }
    for i, fname in PBar(
        enumerate(cut_files),
        desc="processing files",
        total=len(cut_files),
        file=sys.stdout,
    ):
        num_per_shear = _process_file(fname, num_per_shear, fp)
        if i % 100 == 0:
            num = num_per_shear["noshear"]
            print(
                f"\nexpecting ~{num/(i+1)*len(cut_files)/1e6:6.2f} "
                f"million objects ({num} so far)",
                flush=True,
            )
            fp.flush()

with h5py.File(ofile_hdf5, "a") as fp:
    for shear in SHEARS:
        sgrp = fp["mdet"][shear]
        
        for dset in sgrp.values():
            dset.resize(num_per_shear[shear], axis=0)

processing files: |--------------------| 0/10169   0% [elapsed: 00:00 left: ?]
expecting ~173.34 million objects (17046 so far)
processing files: |--------------------| 99/10169   0% [elapsed: 00:43 left: 1:14:15]
expecting ~159.33 million objects (1582503 so far)
processing files: |--------------------| 199/10169   1% [elapsed: 01:30 left: 1:15:25]
expecting ~163.27 million objects (3227199 so far)
processing files: |--------------------| 299/10169   2% [elapsed: 02:24 left: 1:19:13]
expecting ~165.54 million objects (4899909 so far)
processing files: |--------------------| 400/10169   3% [elapsed: 03:08 left: 1:16:43]
expecting ~164.87 million objects (6501535 so far)
processing files: |--------------------| 500/10169   4% [elapsed: 03:51 left: 1:14:42]
expecting ~163.78 million objects (8068930 so far)
processing files: |#-------------------| 600/10169   5% [elapsed: 04:55 left: 1:18:26]
expecting ~163.08 million objects (9637998 so far)
processing files: |#-------------------| 700/

In [7]:
print(num_per_shear)

{'noshear': 151948577, '1p': 151957783, '1m': 151960056, '2p': 151963881, '2m': 151956386}


In [8]:
# blind things
import os
import io
import sys
from des_y6utils.shear_masking import generate_shear_masking_factor
from ngmix.shape import g1g2_to_eta1eta2, eta1eta2_to_g1g2
import contextlib

COLS_TO_KEEP = ["pgauss", "gauss"]

with open(os.path.expanduser("~/.test_des_blinding_v7"), "r") as fp:
        passphrase = fp.read().strip()

fac = generate_shear_masking_factor(passphrase)

bofile_hdf5 = ofile_hdf5.rsplit(".", maxsplit=1)[0] + "_blinded.h5"

os.system(f"rm -f {bofile_hdf5}")
os.system(f"cp {ofile_hdf5} {bofile_hdf5}")

buff = io.StringIO()
with contextlib.redirect_stderr(sys.stdout):
    with contextlib.redirect_stdout(buff):
        try:
            with h5py.File(bofile_hdf5, "a") as fp:
                for pre in COLS_TO_KEEP:
                    e1o, e2o = (
                        fp["mdet"]["noshear"][pre + "_g_1"][:].copy(),
                        fp["mdet"]["noshear"][pre + "_g_2"][:].copy(),
                    )
                    if pre not in ["gauss"]:
                        e1 = e1o * fac
                        e2 = e2o * fac
                    else:
                        # use eta due to bounds
                        eta1o, eta2o = g1g2_to_eta1eta2(e1o, e2o)
                        eta1 = eta1o * fac
                        eta2 = eta2o * fac
                        e1, e2 = eta1eta2_to_g1g2(eta1, eta2)

                    fp["mdet"]["noshear"][pre + "_g_1"][:] = e1
                    fp["mdet"]["noshear"][pre + "_g_2"][:] = e2

                    fp.flush()

                    assert not np.array_equal(fp["mdet"]["noshear"][pre + "_g_1"][:], e1o)
                    assert not np.array_equal(fp["mdet"]["noshear"][pre + "_g_2"][:], e2o)

        except Exception:
            failed = True
            print("blinding error")