# Generate LVC-External GW Catalogs

## 1: Intallations + data download

Install pip packages in the current Jupyter kernel

In [None]:
import sys
!{sys.executable} -m pip install -r requirements.txt --quiet

Download IAS and PyCBC posterior samples to the following data-dirs, and also the LVC GWTC-2 catalog.

In [None]:
! mkdir -p data/pycbc_search data/ias_search
! cat data_files.txt | xargs -n 3 -P 2 wget -q --show-progress

## 2: Generate External Catalogs

We can generate catalogs with the posterior samples we have just downloaded. The code for the catalog generation from posterior samples is stored in the `catalog_generators` module.

In [None]:
LVC_CATALOG = "./data/lvc_catalog.json"
IAS_CATALOG = "./data/ias_catalog.json"
PYCBC_CATALOG = "./data/pycbc_catalog.json"
CATALOGS = [LVC_CATALOG, IAS_CATALOG, PYCBC_CATALOG]

In [None]:
from catalog_generators import generate_ias_catalog
generate_ias_catalog(
    data_dir='./data/ias_search/',
    out_catalog_fname=IAS_CATALOG
)

In [None]:
from catalog_generators import generate_pycbc_catalog
generate_pycbc_catalog(
    data_dir='./data/pycbc_search/',
    out_catalog_fname=PYCBC_CATALOG
)

## 3: Display plot of Catalog Event Masses

Define functions to parse the catalog json files.

In [None]:
import pandas as pd
import json

def read_json(json_path)->dict:
    with open(json_path, 'r') as f:
        return json.load(f)

def catalog_to_dataframe(catalog_path):
    catalog_dict = read_json(catalog_path)
    events = catalog_dict['events']
    return pd.DataFrame([v for v in events.values()])

Combine the catalogs into one Pandas DataFrame.

In [None]:
catalogs_df = pd.concat([catalog_to_dataframe(c) for c in CATALOGS])
catalogs_df

Plot the masses of the catalogs

In [None]:
from matplotlib import pyplot as plt
from matplotlib import rcParams
rcParams["font.size"] = 20
rcParams["font.family"] = "serif"
rcParams["font.sans-serif"] = ["Computer Modern Sans"]
rcParams["text.usetex"] = False
rcParams['axes.labelsize'] = 30
rcParams['axes.titlesize'] = 30
rcParams['axes.labelpad'] = 20


def plot_masses(catalog_df):
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 10))
    ax.set_xlabel("Source Mass 1", fontsize="x-large")
    ax.set_ylabel("Source Mass 2", fontsize="x-large")
    ax.set_yscale("log")
    ax.set_xscale("log")
    different_catalogs = set(list(catalog_df["catalog.shortName"]))
    for cat_name in different_catalogs:
        data = catalog_df[catalog_df["catalog.shortName"] == cat_name]
        ax.scatter(data['mass_1_source'], data['mass_2_source'], label=cat_name)
    ax.legend(bbox_to_anchor=(1,1), loc="upper left", frameon=False)

plot_masses(catalogs_df)