In [1]:
import fitsio
import glob
from des_y6utils.mdet import make_mdet_cuts
from esutil.pbar import PBar
import numpy as np
import h5py
import joblib
import os
import sys
import gc

import fastparquet
import pandas as pd
from numpy.lib.recfunctions import repack_fields

In [10]:
COLS = (
    'tilename',
    'uid',
    'patch_num',
    'mdet_step',
    'ra',
    'dec',
    'gauss_g_1',
    'gauss_g_2',
    'pgauss_band_flux_g',
    'pgauss_band_flux_r',
    'pgauss_band_flux_i',
    'pgauss_band_flux_z',
    'pgauss_band_flux_err_g',
    'pgauss_band_flux_err_r',
    'pgauss_band_flux_err_i',
    'pgauss_band_flux_err_z',
    'gauss_T_err',
    'gauss_T_ratio',
    'gauss_psf_T',
    'gauss_s2n',    
    'slice_id',
    'x',
    'y',
    'mfrac',
    'mfrac_img',
    'nepoch_g',
    'nepoch_r',
    'nepoch_i',
    'nepoch_z',
    'psfrec_g_1',
    'psfrec_g_2',
    'psfrec_T',
    'gauss_g_cov_1_1',
    'gauss_g_cov_1_2',
    'gauss_g_cov_2_2',
    'pgauss_T_err',
    'pgauss_T',
    'pgauss_psf_T',
)

In [11]:
pf_fnames = glob.glob("cutsv3/patch-*.parq")


def _get_col_one(fname, cols, mdet_step):
    pf = fastparquet.ParquetFile(fname)
    return pf.to_pandas(
        columns=cols, 
        filters=[[("mdet_step", "==", mdet_step)]], 
        row_filter=True,
    ).to_records(index=False, column_dtypes={"tilename": "<S12", "mdet_step": "<S7"})[cols]


def _get_col(fnames, cols, mdet_step, n_jobs=8):
    jobs = [
        joblib.delayed(_get_col_one)(fname, cols, mdet_step)
        for fname in fnames
    ]
    with joblib.Parallel(n_jobs=n_jobs, verbose=10) as par:
        return np.concatenate(par(jobs), axis=0)

In [12]:
# %%time

# d = _get_col(pf_fnames, ["ra", "dec"], "noshear")

In [13]:
pf = fastparquet.ParquetFile(pf_fnames[0])
d = pf.to_pandas(
    filters=[[("mdet_step", "==", "noshear")]], 
    row_filter=True,
).to_records(index=False, column_dtypes={"tilename": "<S12", "mdet_step": "<S7"})
    
chunk_size = 2
nchunks = len(COLS) // chunk_size
if nchunks * chunk_size < len(COLS):
    nchunks += 1

with h5py.File("metadetect_desdmv4_cutsv3.h5", "w") as fp:
    mdet_grp = fp.create_group("mdet")
    for mdet_step in PBar(["noshear", "1p", "1m", "2p", "2m"], desc="mdet step"):
        grp = mdet_grp.create_group(mdet_step)
        for col in d.dtype.names:
            dt = d[col].dtype
            grp.create_dataset(
                col, 
                dtype=dt,
                shape=(150_000_000,),
                maxshape=(None,),
            )

for mdet_step in PBar(["noshear", "1p", "1m", "2p", "2m"], desc="mdet step"):
    loc = 0
    for chunk in PBar(range(nchunks), desc="%s columns" % mdet_step):
        max_loc = min(loc + chunk_size, len(COLS))
        cols = list(COLS[loc:max_loc])
        d = _get_col(pf_fnames, cols, mdet_step)
        for col in cols:
            with h5py.File("metadetect_desdmv4_cutsv3.h5", "a") as fp:
                fp["mdet/" + mdet_step][col].resize(len(d[col]), axis=0)
                fp["mdet/" + mdet_step][col][:] = d[col]
        del fp
        del d
        gc.collect()
        loc += chunk_size

mdet step: |####################| 5/5 100% [elapsed: 00:00 left: 00:00]
noshear columns: |--------------------| 0/19   0% [elapsed: 00:00 left: ?][Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 tasks      | elapsed:    2.9s
[Parallel(n_jobs=8)]: Done   9 tasks      | elapsed:    4.7s
[Parallel(n_jobs=8)]: Done  16 tasks      | elapsed:    6.2s
[Parallel(n_jobs=8)]: Done  25 tasks      | elapsed:    8.4s
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:   10.4s
[Parallel(n_jobs=8)]: Done  45 tasks      | elapsed:   12.6s
[Parallel(n_jobs=8)]: Done  56 tasks      | elapsed:   15.8s
[Parallel(n_jobs=8)]: Done  69 tasks      | elapsed:   19.0s
[Parallel(n_jobs=8)]: Done  82 tasks      | elapsed:   22.7s
[Parallel(n_jobs=8)]: Done  97 tasks      | elapsed:   26.4s
[Parallel(n_jobs=8)]: Done 112 tasks      | elapsed:   30.1s
[Parallel(n_jobs=8)]: Done 129 tasks      | elapsed:   34.3s
[Parallel(n_jobs=8)]: Done 146 tasks      | ela

[Parallel(n_jobs=8)]: Done   9 tasks      | elapsed:    1.5s
[Parallel(n_jobs=8)]: Done  16 tasks      | elapsed:    2.2s
[Parallel(n_jobs=8)]: Done  25 tasks      | elapsed:    3.4s
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    4.8s
[Parallel(n_jobs=8)]: Done  45 tasks      | elapsed:    6.0s
[Parallel(n_jobs=8)]: Done  56 tasks      | elapsed:    7.3s
[Parallel(n_jobs=8)]: Done  69 tasks      | elapsed:    9.4s
[Parallel(n_jobs=8)]: Done  82 tasks      | elapsed:   11.4s
[Parallel(n_jobs=8)]: Done  97 tasks      | elapsed:   13.0s
[Parallel(n_jobs=8)]: Done 112 tasks      | elapsed:   14.9s
[Parallel(n_jobs=8)]: Done 129 tasks      | elapsed:   16.9s
[Parallel(n_jobs=8)]: Done 146 tasks      | elapsed:   19.2s
[Parallel(n_jobs=8)]: Done 165 tasks      | elapsed:   21.8s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:   24.2s
[Parallel(n_jobs=8)]: Done 200 out of 200 | elapsed:   26.0s finished
noshear columns: |########------------| 8/19  42% [elapsed: 05:33 left: 07:3

[Parallel(n_jobs=8)]: Done  45 tasks      | elapsed:    5.1s
[Parallel(n_jobs=8)]: Done  56 tasks      | elapsed:    6.5s
[Parallel(n_jobs=8)]: Done  69 tasks      | elapsed:    8.2s
[Parallel(n_jobs=8)]: Done  82 tasks      | elapsed:    9.9s
[Parallel(n_jobs=8)]: Done  97 tasks      | elapsed:   11.5s
[Parallel(n_jobs=8)]: Done 112 tasks      | elapsed:   13.2s
[Parallel(n_jobs=8)]: Done 129 tasks      | elapsed:   15.1s
[Parallel(n_jobs=8)]: Done 146 tasks      | elapsed:   17.0s
[Parallel(n_jobs=8)]: Done 165 tasks      | elapsed:   19.5s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:   21.9s
[Parallel(n_jobs=8)]: Done 200 out of 200 | elapsed:   23.5s finished
noshear columns: |###############-----| 15/19  78% [elapsed: 09:00 left: 02:24][Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 tasks      | elapsed:    0.7s
[Parallel(n_jobs=8)]: Done   9 tasks      | elapsed:    1.3s
[Parallel(n_jobs=8)]: Done  16 tasks      | 

[Parallel(n_jobs=8)]: Done  82 tasks      | elapsed:   15.7s
[Parallel(n_jobs=8)]: Done  97 tasks      | elapsed:   18.6s
[Parallel(n_jobs=8)]: Done 112 tasks      | elapsed:   21.5s
[Parallel(n_jobs=8)]: Done 129 tasks      | elapsed:   24.6s
[Parallel(n_jobs=8)]: Done 146 tasks      | elapsed:   27.7s
[Parallel(n_jobs=8)]: Done 165 tasks      | elapsed:   31.6s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:   35.6s
[Parallel(n_jobs=8)]: Done 200 out of 200 | elapsed:   38.0s finished
1p columns: |###-----------------| 3/19  15% [elapsed: 02:36 left: 13:56][Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 tasks      | elapsed:    1.2s
[Parallel(n_jobs=8)]: Done   9 tasks      | elapsed:    2.3s
[Parallel(n_jobs=8)]: Done  16 tasks      | elapsed:    3.1s
[Parallel(n_jobs=8)]: Done  25 tasks      | elapsed:    4.8s
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    6.5s
[Parallel(n_jobs=8)]: Done  45 tasks      | elapse

[Parallel(n_jobs=8)]: Done 165 tasks      | elapsed:   26.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:   29.0s
[Parallel(n_jobs=8)]: Done 200 out of 200 | elapsed:   31.3s finished
1p columns: |##########----------| 10/19  52% [elapsed: 06:46 left: 06:05][Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 tasks      | elapsed:    0.8s
[Parallel(n_jobs=8)]: Done   9 tasks      | elapsed:    1.7s
[Parallel(n_jobs=8)]: Done  16 tasks      | elapsed:    2.3s
[Parallel(n_jobs=8)]: Done  25 tasks      | elapsed:    3.7s
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    4.8s
[Parallel(n_jobs=8)]: Done  45 tasks      | elapsed:    6.1s
[Parallel(n_jobs=8)]: Done  56 tasks      | elapsed:    7.9s
[Parallel(n_jobs=8)]: Done  69 tasks      | elapsed:    9.5s
[Parallel(n_jobs=8)]: Done  82 tasks      | elapsed:   11.7s
[Parallel(n_jobs=8)]: Done  97 tasks      | elapsed:   13.6s
[Parallel(n_jobs=8)]: Done 112 tasks      | elaps

1p columns: |#################---| 17/19  89% [elapsed: 10:20 left: 01:13][Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 tasks      | elapsed:    0.9s
[Parallel(n_jobs=8)]: Done   9 tasks      | elapsed:    1.6s
[Parallel(n_jobs=8)]: Done  16 tasks      | elapsed:    2.5s
[Parallel(n_jobs=8)]: Done  25 tasks      | elapsed:    4.0s
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    5.3s
[Parallel(n_jobs=8)]: Done  45 tasks      | elapsed:    6.6s
[Parallel(n_jobs=8)]: Done  56 tasks      | elapsed:    8.3s
[Parallel(n_jobs=8)]: Done  69 tasks      | elapsed:   10.5s
[Parallel(n_jobs=8)]: Done  82 tasks      | elapsed:   12.6s
[Parallel(n_jobs=8)]: Done  97 tasks      | elapsed:   14.6s
[Parallel(n_jobs=8)]: Done 112 tasks      | elapsed:   17.0s
[Parallel(n_jobs=8)]: Done 129 tasks      | elapsed:   19.5s
[Parallel(n_jobs=8)]: Done 146 tasks      | elapsed:   22.2s
[Parallel(n_jobs=8)]: Done 165 tasks      | elapsed:   25.

[Parallel(n_jobs=8)]: Done   9 tasks      | elapsed:    1.6s
[Parallel(n_jobs=8)]: Done  16 tasks      | elapsed:    2.6s
[Parallel(n_jobs=8)]: Done  25 tasks      | elapsed:    4.0s
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    5.3s
[Parallel(n_jobs=8)]: Done  45 tasks      | elapsed:    6.9s
[Parallel(n_jobs=8)]: Done  56 tasks      | elapsed:    8.5s
[Parallel(n_jobs=8)]: Done  69 tasks      | elapsed:   10.8s
[Parallel(n_jobs=8)]: Done  82 tasks      | elapsed:   12.7s
[Parallel(n_jobs=8)]: Done  97 tasks      | elapsed:   15.2s
[Parallel(n_jobs=8)]: Done 112 tasks      | elapsed:   16.9s
[Parallel(n_jobs=8)]: Done 129 tasks      | elapsed:   19.7s
[Parallel(n_jobs=8)]: Done 146 tasks      | elapsed:   22.5s
[Parallel(n_jobs=8)]: Done 165 tasks      | elapsed:   25.7s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:   28.8s
[Parallel(n_jobs=8)]: Done 200 out of 200 | elapsed:   31.0s finished
1m columns: |######--------------| 6/19  31% [elapsed: 04:32 left: 09:50][Pa

[Parallel(n_jobs=8)]: Done  56 tasks      | elapsed:    7.1s
[Parallel(n_jobs=8)]: Done  69 tasks      | elapsed:    8.8s
[Parallel(n_jobs=8)]: Done  82 tasks      | elapsed:   10.6s
[Parallel(n_jobs=8)]: Done  97 tasks      | elapsed:   12.4s
[Parallel(n_jobs=8)]: Done 112 tasks      | elapsed:   14.2s
[Parallel(n_jobs=8)]: Done 129 tasks      | elapsed:   16.2s
[Parallel(n_jobs=8)]: Done 146 tasks      | elapsed:   18.1s
[Parallel(n_jobs=8)]: Done 165 tasks      | elapsed:   21.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:   23.3s
[Parallel(n_jobs=8)]: Done 200 out of 200 | elapsed:   25.1s finished
1m columns: |#############-------| 13/19  68% [elapsed: 08:20 left: 03:51][Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 tasks      | elapsed:    0.7s
[Parallel(n_jobs=8)]: Done   9 tasks      | elapsed:    1.5s
[Parallel(n_jobs=8)]: Done  16 tasks      | elapsed:    1.9s
[Parallel(n_jobs=8)]: Done  25 tasks      | elaps

[Parallel(n_jobs=8)]: Done  97 tasks      | elapsed:   24.3s
[Parallel(n_jobs=8)]: Done 112 tasks      | elapsed:   28.1s
[Parallel(n_jobs=8)]: Done 129 tasks      | elapsed:   32.3s
[Parallel(n_jobs=8)]: Done 146 tasks      | elapsed:   36.3s
[Parallel(n_jobs=8)]: Done 165 tasks      | elapsed:   41.7s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:   47.1s
[Parallel(n_jobs=8)]: Done 200 out of 200 | elapsed:   50.9s finished
2p columns: |#-------------------| 1/19   5% [elapsed: 00:58 left: 17:39][Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 tasks      | elapsed:    1.3s
[Parallel(n_jobs=8)]: Done   9 tasks      | elapsed:    3.0s
[Parallel(n_jobs=8)]: Done  16 tasks      | elapsed:    3.9s
[Parallel(n_jobs=8)]: Done  25 tasks      | elapsed:    6.3s
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    8.5s
[Parallel(n_jobs=8)]: Done  45 tasks      | elapsed:   10.5s
[Parallel(n_jobs=8)]: Done  56 tasks      | elapse

[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:   25.9s
[Parallel(n_jobs=8)]: Done 200 out of 200 | elapsed:   27.9s finished
2p columns: |########------------| 8/19  42% [elapsed: 05:41 left: 07:49][Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 tasks      | elapsed:    0.8s
[Parallel(n_jobs=8)]: Done   9 tasks      | elapsed:    1.9s
[Parallel(n_jobs=8)]: Done  16 tasks      | elapsed:    2.4s
[Parallel(n_jobs=8)]: Done  25 tasks      | elapsed:    3.9s
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    5.2s
[Parallel(n_jobs=8)]: Done  45 tasks      | elapsed:    6.8s
[Parallel(n_jobs=8)]: Done  56 tasks      | elapsed:    8.4s
[Parallel(n_jobs=8)]: Done  69 tasks      | elapsed:   10.6s
[Parallel(n_jobs=8)]: Done  82 tasks      | elapsed:   12.6s
[Parallel(n_jobs=8)]: Done  97 tasks      | elapsed:   14.8s
[Parallel(n_jobs=8)]: Done 112 tasks      | elapsed:   16.9s
[Parallel(n_jobs=8)]: Done 129 tasks      | elapse

[Parallel(n_jobs=8)]: Done   2 tasks      | elapsed:    0.7s
[Parallel(n_jobs=8)]: Done   9 tasks      | elapsed:    1.6s
[Parallel(n_jobs=8)]: Done  16 tasks      | elapsed:    2.3s
[Parallel(n_jobs=8)]: Done  25 tasks      | elapsed:    3.5s
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    4.6s
[Parallel(n_jobs=8)]: Done  45 tasks      | elapsed:    5.8s
[Parallel(n_jobs=8)]: Done  56 tasks      | elapsed:    7.5s
[Parallel(n_jobs=8)]: Done  69 tasks      | elapsed:    9.4s
[Parallel(n_jobs=8)]: Done  82 tasks      | elapsed:   11.0s
[Parallel(n_jobs=8)]: Done  97 tasks      | elapsed:   13.2s
[Parallel(n_jobs=8)]: Done 112 tasks      | elapsed:   15.3s
[Parallel(n_jobs=8)]: Done 129 tasks      | elapsed:   17.5s
[Parallel(n_jobs=8)]: Done 146 tasks      | elapsed:   19.8s
[Parallel(n_jobs=8)]: Done 165 tasks      | elapsed:   22.4s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:   25.3s
[Parallel(n_jobs=8)]: Done 200 out of 200 | elapsed:   27.3s finished
2p columns: |##

2m columns: |###-----------------| 3/19  15% [elapsed: 02:38 left: 14:06][Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 tasks      | elapsed:    1.2s
[Parallel(n_jobs=8)]: Done   9 tasks      | elapsed:    2.2s
[Parallel(n_jobs=8)]: Done  16 tasks      | elapsed:    3.1s
[Parallel(n_jobs=8)]: Done  25 tasks      | elapsed:    4.8s
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    6.2s
[Parallel(n_jobs=8)]: Done  45 tasks      | elapsed:    8.0s
[Parallel(n_jobs=8)]: Done  56 tasks      | elapsed:   10.1s
[Parallel(n_jobs=8)]: Done  69 tasks      | elapsed:   12.7s
[Parallel(n_jobs=8)]: Done  82 tasks      | elapsed:   15.3s
[Parallel(n_jobs=8)]: Done  97 tasks      | elapsed:   17.8s
[Parallel(n_jobs=8)]: Done 112 tasks      | elapsed:   20.5s
[Parallel(n_jobs=8)]: Done 129 tasks      | elapsed:   23.5s
[Parallel(n_jobs=8)]: Done 146 tasks      | elapsed:   26.6s
[Parallel(n_jobs=8)]: Done 165 tasks      | elapsed:   30.5

[Parallel(n_jobs=8)]: Done  16 tasks      | elapsed:    2.3s
[Parallel(n_jobs=8)]: Done  25 tasks      | elapsed:    3.7s
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    4.9s
[Parallel(n_jobs=8)]: Done  45 tasks      | elapsed:    6.1s
[Parallel(n_jobs=8)]: Done  56 tasks      | elapsed:    7.6s
[Parallel(n_jobs=8)]: Done  69 tasks      | elapsed:    9.6s
[Parallel(n_jobs=8)]: Done  82 tasks      | elapsed:   11.4s
[Parallel(n_jobs=8)]: Done  97 tasks      | elapsed:   13.3s
[Parallel(n_jobs=8)]: Done 112 tasks      | elapsed:   15.1s
[Parallel(n_jobs=8)]: Done 129 tasks      | elapsed:   17.7s
[Parallel(n_jobs=8)]: Done 146 tasks      | elapsed:   19.8s
[Parallel(n_jobs=8)]: Done 165 tasks      | elapsed:   22.9s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:   25.3s
[Parallel(n_jobs=8)]: Done 200 out of 200 | elapsed:   27.1s finished
2m columns: |###########---------| 11/19  57% [elapsed: 07:13 left: 05:14][Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurre

[Parallel(n_jobs=8)]: Done  69 tasks      | elapsed:   10.3s
[Parallel(n_jobs=8)]: Done  82 tasks      | elapsed:   12.2s
[Parallel(n_jobs=8)]: Done  97 tasks      | elapsed:   14.5s
[Parallel(n_jobs=8)]: Done 112 tasks      | elapsed:   16.4s
[Parallel(n_jobs=8)]: Done 129 tasks      | elapsed:   18.6s
[Parallel(n_jobs=8)]: Done 146 tasks      | elapsed:   21.0s
[Parallel(n_jobs=8)]: Done 165 tasks      | elapsed:   24.5s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:   27.2s
[Parallel(n_jobs=8)]: Done 200 out of 200 | elapsed:   29.2s finished
2m columns: |##################--| 18/19  94% [elapsed: 10:53 left: 00:36][Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 tasks      | elapsed:    0.9s
[Parallel(n_jobs=8)]: Done   9 tasks      | elapsed:    1.8s
[Parallel(n_jobs=8)]: Done  16 tasks      | elapsed:    2.3s
[Parallel(n_jobs=8)]: Done  25 tasks      | elapsed:    3.6s
[Parallel(n_jobs=8)]: Done  34 tasks      | elaps

In [14]:
f = h5py.File("metadetect_desdmv4_cutsv3.h5")

In [22]:
f["/mdet/noshear/"].keys()

<KeysViewHDF5 ['dec', 'gauss_T_err', 'gauss_T_ratio', 'gauss_g_1', 'gauss_g_2', 'gauss_g_cov_1_1', 'gauss_g_cov_1_2', 'gauss_g_cov_2_2', 'gauss_psf_T', 'gauss_s2n', 'mdet_step', 'mfrac', 'mfrac_img', 'nepoch_g', 'nepoch_i', 'nepoch_r', 'nepoch_z', 'patch_num', 'pgauss_T', 'pgauss_T_err', 'pgauss_band_flux_err_g', 'pgauss_band_flux_err_i', 'pgauss_band_flux_err_r', 'pgauss_band_flux_err_z', 'pgauss_band_flux_g', 'pgauss_band_flux_i', 'pgauss_band_flux_r', 'pgauss_band_flux_z', 'pgauss_psf_T', 'psfrec_T', 'psfrec_g_1', 'psfrec_g_2', 'ra', 'slice_id', 'tilename', 'uid', 'x', 'y']>

In [23]:
f.close()

## code to reformat to parquet

In [None]:


def _reformat_one(fname, odir):
    d = fitsio.read(fname)
#     msk = make_mdet_cuts(d, "3")
#     d = d[msk]
    d = repack_fields(d[[col for col in COLS if col in d.dtype.names]])
    
    d = pd.DataFrame(d)
    fastparquet.write(
        os.path.join(odir, os.path.basename(fname)[:-len(".fits")] + ".parq"), 
        d,
        has_nulls=False,
        write_index=False,
        compression="SNAPPY",
    )

def _reformat(fnames, odir, n_jobs=2):
    os.makedirs(odir, exist_ok=True)
    jobs = [
        joblib.delayed(_reformat_one)(fname, odir)
        for fname in fnames
    ]
    with joblib.Parallel(n_jobs=n_jobs, verbose=100) as par:
        par(jobs)

fnames = glob.glob(
    "/gpfs02/astro/desdata/esheldon/lensing/"
    "des-lensing/y6patches/patches/*.fits"
)

_reformat(fnames, "nocuts")