Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add PyGRB Exclusion Distance Table Executable #4756

Merged
merged 18 commits into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions bin/pygrb/pycbc_pygrb_efficiency
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import sys
import os
import logging
import h5py
import json
import matplotlib.pyplot as plt
from matplotlib import rc
import numpy as np
Expand Down Expand Up @@ -100,6 +101,8 @@ parser.add_argument("--background-output-file", default=None, required=True,
help="Detection efficiency output file.")
parser.add_argument("--onsource-output-file", default=None, required=True,
help="Exclusion distance output file.")
parser.add_argument("--exclusion-dist-output-file", default=None,
help="JSON file containing exclusion distances.")
pannarale marked this conversation as resolved.
Show resolved Hide resolved
parser.add_argument("-g", "--glitch-check-factor", action="store",
type=float, default=1.0, help="When deciding " +
"exclusion efficiencies this value is multiplied " +
Expand All @@ -111,10 +114,13 @@ parser.add_argument("--found-missed-file", action="store", type=str,
parser.add_argument("--injection-set-name", action="store", type=str,
default="", help="Name of the injection set to be " +
"used in the plot title.")
parser.add_argument("--trial-name", action="store", type=str,
help="Name of trial used for this run" +
"(i.e. ONSOURCE, OFFTRIAL)")
parser.add_argument("-C", "--cluster-window", action="store", type=float,
default=0.1, help="The cluster window used " +
"to cluster triggers in time.")
parser.add_argument("--bank-file", action="store", type=str,
parser.add_argument("--bank-file", action="store", type=str, required=True,
help="Location of the full template bank used.")
ppu.pygrb_add_injmc_opts(parser)
ppu.pygrb_add_bestnr_cut_opt(parser)
Expand Down Expand Up @@ -606,7 +612,17 @@ if do_injections:
excl_dist = 0
msg = "Efficiency below %d%% in first bin!" % (percentile)
logging.warning(msg)
# TODO: include percentile, excl_dist on output pages

# Write 50% and 90% exclusion distances to JSON file
# Also include injection set name and trial name
if opts.exclusion_dist_output_file:
excl_dist_dict = {}
excl_dist_dict['inj_set'] = inj_set_name
Copy link
Contributor

@pannarale pannarale May 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The user must provide opts.injection_set_name if opts.exclusion_dist_output_file and/or opts.found_missed_file are provided.
Looking at this made me wonder about something I had not caught in the past: does it even make sense to have opts.found_missed_file and opts.onsource_file as optional? I.e., if do_injections and/or onsource_file are False, does the code do something meaningful (and not crash)? Otherwise we can promote those options to required and remove the ifs on do_injections andonsource_file.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The injections part of the code uses results from the onsource analysis, so I'd say it wouldn't hurt anything to make both files required, but there could be cases I'm not considering.

excl_dist_dict['trial_name'] = opts.trial_name
excl_dist_dict['50%'] = sens_dist
excl_dist_dict['90%'] = excl_dist
with open(opts.exclusion_dist_output_file, 'w') as excl_dist_file:
json.dump(excl_dist_dict, excl_dist_file)

# Plot efficiency using loudest foreground
fig = plt.figure()
Expand Down
109 changes: 109 additions & 0 deletions bin/pygrb/pycbc_pygrb_exclusion_dist_table
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#!/usr/bin/env python

# Copyright (C) 2024 Jacob Buchanan
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

"""Create table of exclusion distances."""

import sys
import argparse
import json
import pycbc.version
import pycbc.results


__author__ = "Jacob Buchanan <jacob.buchanan@ligo.org>"
__version__ = pycbc.version.git_verbose_msg
__date__ = pycbc.version.date
__program__ = "pycbc_pygrb_exclusion_dist_table"

parser = argparse.ArgumentParser(description=__doc__, formatter_class=
argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("--version", action="version", version=__version__)
parser.add_argument("--input-files", nargs="+", required=True,
help="List of JSON input files" +
" output by pycbc_pygrb_efficiency" +
" containing exclusion distances.")
parser.add_argument("--output-file", required=True,
help="HTML output file containing table" +
" of exclusion distances.")
opts = parser.parse_args()

# Load JSON files as a list of dictionaries
file_contents = []
for file_name in opts.input_files:
with open(file_name, "r") as file:
file_contents.append(json.load(file))

# Get list of trials (i.e. OFFTRIAL_i, ONSOURCE, etc.)
trials = []
for fc in file_contents:
trials.append(fc["trial_name"])
# Check that at least one trial exists
if len(trials) == 0:
raise ValueError("No trials found in input files.")
Comment on lines +56 to +57
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we make --trial-name required in pycbc_pygrb_efficiency to prevent this? This way we have a check in place here for the standalone script, and a way to dodge the error in the workflow.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that I looked better, pycbc_pygrb_efficiency must check that opts.trial_name is not None when exclusion_dist_output_file is not None.

# Remove duplicates and sort
trials = list(set(trials))
trials.sort()

# Get names of injection sets
injection_sets = []
for fc in file_contents:
injection_sets.append(fc["inj_set"])
# Check that at least one injection set exists
if len(injection_sets) == 0:
raise ValueError("No injection sets found in input files.")
# Remove duplicates and sort
injection_sets = list(set(injection_sets))
injection_sets.sort()
jakeb245 marked this conversation as resolved.
Show resolved Hide resolved

# Prepare headers
headers = ["Trial Name (percent)"]
for injection_set in injection_sets:
headers.append(f"{injection_set} (Mpc)")

# Organize data into a dictionary
# trial_name -> injection_set -> percent -> exclusion distance
results = {}
for fc in file_contents:
# Don't duplicate trial names
if fc["trial_name"] not in results:
results[fc["trial_name"]] = {}
# Prepare dictionary for each trial + injection set
results[fc["trial_name"]][fc["inj_set"]] = {}
# Add exclusion distances to dictionary
for percent in ('50%', '90%'):
results[fc["trial_name"]][fc["inj_set"]][percent] = fc[percent]


# Set up rows for table
data = []
for percent in ('50%', '90%'):
for trial in trials:
row = [f"{trial} ({percent})"]
for injection_set in injection_sets:
row.append(results[trial][injection_set][percent])
data.append(row)

# Create static table
html = str(pycbc.results.static_table(data, headers))

# Write as figure
title = "Exclusion Distances"
caption = "Table of exclusion distances for each trial and injection set."
pycbc.results.save_fig_with_metadata(html, opts.output_file,
cmd=' '.join(sys.argv),
title=title, caption=caption)
Loading