In [1]:
import h5py
import numpy as np
import os

In [2]:
path = '/global/cfs/projectdirs/lsst/groups/LSS/HSC_reanalysis/data_javi/2023_reanalysis'
output_file = os.path.join(path,'shear_sourcecatalog_hsc_ALL_nonmetacal_11_06.h5')

In [3]:
input_files = [os.path.join(path,'shear_sourcecatalog_hsc_GAMA09H_nonmetacal_05_22.h5'),
               os.path.join(path,'shear_sourcecatalog_hsc_GAMA15H_nonmetacal_05_22.h5'),
               os.path.join(path,'shear_sourcecatalog_hsc_VVDS_nonmetacal_05_22.h5'),
               os.path.join(path,'shear_sourcecatalog_hsc_WIDE12H_nonmetacal_05_22.h5'),
               os.path.join(path,'shear_sourcecatalog_hsc_XMM_nonmetacal_05_22.h5')
              ]

with h5py.File(output_file, 'a') as output_h5:
    for file_path in input_files:
        # Open each input H5 file in read mode
        with h5py.File(file_path, 'r') as input_h5:
            print('>>', file_path, '<<')
            # Iterate over the groups in the input file
            for group_name in input_h5.keys():
                # Check if the group already exists in the output file
                if group_name not in output_h5:
                    print('Creating group name: ', group_name, ' in output file')
                    g = output_h5.create_group(group_name)
                    # Inserting HSC catalog type metadata
                    metadata = {'catalog_type':'hsc'}
                    g.attrs.update(metadata)
                else:
                    print('Appending to ', group_name, ' group')

                # Iterate over the datasets in the group
                for dataset_name in input_h5[group_name].keys():
                    # Check if the dataset already exists in the output file
                    if dataset_name in output_h5[group_name]:
                        print('Dataset name:', dataset_name, ' already in output file')
                        # Stack the datasets along the appropriate axis
                        existing_data = output_h5[group_name][dataset_name][:]
                        # print(existing_data.shape)
                        new_data = input_h5[group_name][dataset_name][:]
                        # print(new_data.shape)
                        stacked_data = np.append(existing_data, new_data)
                        # print(stacked_data.shape)
                        # Resize the dataset
                        output_h5[group_name][dataset_name].resize(stacked_data.shape)
                        # Update the dataset in the output file
                        output_h5[group_name][dataset_name][:] = stacked_data
                    else:
                        print('Creating: ', dataset_name, ' in output file')
                        # If the dataset doesn't exist, create a new one
                        shape = input_h5[group_name][dataset_name].shape
                        dtype = input_h5[group_name][dataset_name].dtype
                        g.create_dataset(dataset_name, shape, dtype, maxshape=(None, ), chunks=True)

                        # Copy data from the input file to the output file
                        output_h5[group_name][dataset_name][:] = input_h5[group_name][dataset_name][:]

>> /global/cfs/projectdirs/lsst/groups/LSS/HSC_reanalysis/data_javi/2023_reanalysis/shear_sourcecatalog_hsc_GAMA09H_nonmetacal_05_22.h5 <<
Creating group name:  shear  in output file
Creating:  T  in output file
Creating:  c1  in output file
Creating:  c2  in output file
Creating:  dec  in output file
Creating:  flags  in output file
Creating:  g1  in output file
Creating:  g2  in output file
Creating:  lensfit_weight  in output file
Creating:  m  in output file
Creating:  mag_err_i  in output file
Creating:  mag_err_r  in output file
Creating:  mag_i  in output file
Creating:  mag_r  in output file
Creating:  mean_z  in output file
Creating:  objectId  in output file
Creating:  psf_T_mean  in output file
Creating:  psf_g1  in output file
Creating:  psf_g2  in output file
Creating:  ra  in output file
Creating:  redshift_true  in output file
Creating:  s2n  in output file
Creating:  sigma_e  in output file
Creating:  snr_i  in output file
Creating:  snr_r  in output file
Creating:  wei