# Reading GWTC localization data

In [None]:
import h5py
import pandas as pd
import numpy as np

In [None]:
!ls ..

In [None]:
path_to_localization_file = "../gwtc-localization-samples-best-localized-analysis=Mixed.hdf5"

In [None]:
localization_file = h5py.File(path_to_localization_file)
localization_file

In [None]:
gravitational_wave_event_list = list(localization_file.keys())
events = pd.Index(gravitational_wave_events).sort_values()
events

Each event is a group in the HDF5 file, containing the posterior samples for the luminosity distance $D_L$ (Mpc) as well as the sky location of the source - its right ascension $\alpha$ (rad) and declination $\delta$ (rad) in the geocentric frame. Let us use as example GW150914:

In [None]:
gw150914 = events[0]
localization_file[gw150914].keys()

In [None]:
def get_event_samples(file: h5py.File, event: str) -> dict[str, np.ndarray]:
    return {k: v[()] for k,v in file[event].items()}

gw150914_samples = get_event_samples(localization_file, gw150914)
pd.DataFrame(gw150914_samples) # pretty print

In addition, we also have information of the 90% credible area in the sky where each event is localized, given in square degrees:

In [None]:
for k, v in localization_file[gw150914].attrs.items():
    print(f"{k}: {v}")

In [None]:
def get_sky_localization_area_per_event(file: h5py.File, sky_localization_area_label: str = "confidence_area_90") -> pd.Series:
    index = pd.Index(file.keys())
    data = [file[event].attrs[sky_localization_area_label] for event in index]
    return pd.Series(data=data, index=index)

localization_areas = get_sky_localization_area_per_event(localization_file)
localization_areas.sort_values()