In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import h5py
import glob
from scipy import stats
import numpy as np
import sys
sys.path.append('../src/scripts/')
import warnings
warnings.filterwarnings("ignore", "Wswiglal-redir-stdio")
import weighting
from utils import chi_effective_prior_from_isotropic_spins
from gwpopulation.models.spin import iid_spin_orientation_gaussian_isotropic
import pesummary
import re
import os
import gc
from pesummary.io import read

In [2]:
folder1 = "../../GW_2025/GWTC-21"
folder2 = "../../GW_2025/GWTC-3"
files = glob.glob(os.path.join(folder1, "*_nocosmo.h5"))
files += glob.glob(os.path.join(folder2, "*_nocosmo.h5"))

In [3]:
INCLUDE_LIST=[]
with open("INCLUDE_LIST.txt", "r") as f:
    INCLUDE_LIST = set(line.strip() for line in f if line.strip())

In [4]:
filtered_files = []
for f in files:
    filename = os.path.basename(f)
    parts = re.split("_|-", filename)
    if len(parts) >= 2:
        event_name = parts[3] + "_" + parts[4]
        if event_name in INCLUDE_LIST:
            filtered_files.append(f)

print(f"Filtered to {len(filtered_files)} files.")

Filtered to 59 files.


In [5]:
def get_samples_from_event(file, desired_pop_weight=None, far_threshold=1, zmax = 1.9):    
    with h5py.File(file, 'r') as f:
        if 'PublicationSamples' in f.keys():
            # O3a files
            samples = np.array(f['PublicationSamples/posterior_samples'])
        elif 'C01:Mixed' in f.keys():
            # O3b files
            samples = np.array(f['C01:Mixed/posterior_samples'])
        elif 'PrecessingSpinIMRHM' in f.keys(): #what waveform approximation did we use
            samples = np.array(f['PrecessingSpinIMRHM/posterior_samples'])        
        else:   
            print(f"Available keys in file {file}: {list(f.keys())}")
            return None
            
    mask = samples['redshift'] < zmax
    m1_det = samples['mass_1'][()][mask]
    qs = samples['mass_ratio'][()][mask]
    dLs = samples['luminosity_distance'][()][mask] / 1e3
        
    prior = dLs**2 * m1_det
    
    return m1_det, qs, dLs, prior

In [6]:
event_df = pd.DataFrame()


In [7]:
PE_dfs = []
for file in filtered_files:
    result = get_samples_from_event(file)
    if result is None:
        continue
    df_here = pd.DataFrame()
    df_here["mass_1"], df_here["mass_ratio"], df_here["luminosity_distance_Gpc"], df_here["prior_m1d_q_dL"] = result
    try:
        df_here = df_here.sample(3000, replace=False)
    except Exception as e:
        print(e)
        continue
    filename = os.path.basename(file)
    parts = re.split("_|-", filename)
    event_here = parts[3] + "_" + parts[4]
    df_here['evt'] = event_here
    print(f"Done {event_here}")
    PE_dfs.append(df_here)

Done GW190803_022701
Done GW190725_174728
Done GW190925_232845
Done GW190708_232457
Cannot take a larger sample than population when 'replace=False'
Done GW190701_203306
Done GW190720_000836
Done GW190828_065509
Done GW190929_012149
Done GW190512_180714
Done GW190413_134308
Done GW190527_092055
Done GW190519_153544
Done GW190620_030421
Done GW190521_074359
Done GW190731_140936
Done GW190513_205428
Done GW190924_021846
Done GW190630_185205
Done GW190728_064510
Done GW190915_235702
Done GW190719_215514
Done GW190828_063405
Done GW190408_181802
Done GW190602_175927
Done GW190413_052954
Done GW190727_060333
Done GW190412_053044
Done GW190930_133541
Done GW190707_093326
Done GW190805_211137
Done GW190517_055101
Done GW190706_222641
Done GW190421_213856
Done GW190910_112807
Done GW190503_185404
Done GW200208_130117
Done GW200112_155838
Done GW200316_215756
Done GW200202_154313
Done GW200224_222234
Done GW191127_050227
Done GW191109_010717
Done GW191105_143521
Done GW191215_223052
Done GW2001

In [8]:
final_df = pd.concat(PE_dfs, ignore_index=True)

In [9]:
final_df.to_hdf('./pe_samples.h5', key='samples', mode='w')
