Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Background estimation #71

Merged
merged 8 commits into from
Sep 30, 2020
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
17 changes: 12 additions & 5 deletions docs/notebooks/comparison_with_EventDisplay.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -598,8 +598,9 @@
"# Data from EventDisplay\n",
"h = irf_eventdisplay[\"BGRate\"]\n",
"x = 0.5 * (10**h.edges[:-1] + 10**h.edges[1:])\n",
"xerr = np.diff(10**h.edges) / 2\n",
"y = h.values\n",
"width = np.diff(10**h.edges)\n",
"xerr = width / 2\n",
"y = h.values / width\n",
"yerr = np.sqrt(h.variances)\n",
"\n",
"# pyirf data\n",
Expand All @@ -620,15 +621,14 @@
"\n",
"# undo normalization\n",
"rate_bin *= cone_solid_angle(theta_cut)\n",
"rate_bin *= np.diff(reco_bins)\n",
"\n",
"\n",
"# Plot function\n",
"plt.errorbar(x, y, xerr=xerr, yerr=yerr, ls='', label=\"EventDisplay\")\n",
"\n",
"plt.errorbar(\n",
" 0.5 * (bg_rate['ENERG_LO'] + bg_rate['ENERG_HI']).to_value(u.TeV)[1:-1],\n",
" rate_bin.to_value(u.s**-1)[1:-1],\n",
" rate_bin.to_value(1 / (u.TeV * u.s))[1:-1],\n",
" xerr=np.diff(reco_bins).to_value(u.TeV)[1:-1] / 2,\n",
" ls='',\n",
" label='pyirf',\n",
Expand All @@ -637,13 +637,20 @@
"# Style settings\n",
"plt.xscale(\"log\")\n",
"plt.xlabel(r\"$E_\\mathrm{Reco} / \\mathrm{TeV}$\")\n",
"plt.ylabel(\"Background rate / s⁻¹\")\n",
"plt.ylabel(\"Background rate / (s⁻¹ TeV⁻¹) \")\n",
"plt.grid(which=\"both\")\n",
"plt.legend(loc=\"best\")\n",
"plt.yscale('log')\n",
"\n",
"None # to remove clutter by mpl objects"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
57 changes: 27 additions & 30 deletions examples/calculate_eventdisplay_irfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
create_histogram_table,
)
from pyirf.cuts import calculate_percentile_cut, evaluate_binned_cut
from pyirf.sensitivity import calculate_sensitivity
from pyirf.sensitivity import calculate_sensitivity, estimate_background
from pyirf.utils import calculate_theta, calculate_source_fov_offset
from pyirf.benchmarks import energy_bias_resolution, angular_resolution

Expand Down Expand Up @@ -56,7 +56,10 @@
# scaling between on and off region.
# Make off region 5 times larger than on region for better
# background statistics
ALPHA = 0.05
ALPHA = 0.2

# Radius to use for calculating bg rate
MAX_BG_RADIUS = 1 * u.deg

# gh cut used for first calculation of the binned theta cuts
INITIAL_GH_CUT = 0.0
Expand All @@ -77,13 +80,6 @@
}


def get_bg_cuts(cuts, alpha):
"""Rescale the cut values to enlarge the background region"""
cuts = cuts.copy()
cuts["cut"] /= np.sqrt(alpha)
return cuts


def main():
logging.basicConfig(level=logging.INFO)
logging.getLogger("pyirf").setLevel(logging.DEBUG)
Expand Down Expand Up @@ -113,6 +109,7 @@ def main():
background = table.vstack(
[particles["proton"]["events"], particles["electron"]["events"]]
)
background = background
maxnoe marked this conversation as resolved.
Show resolved Hide resolved

log.info(f"Using fixed G/H cut of {INITIAL_GH_CUT} to calculate theta cuts")

Expand All @@ -138,12 +135,6 @@ def main():
gammas["selected_theta"] = evaluate_binned_cut(
gammas["theta"], gammas["reco_energy"], theta_cuts, operator.le
)
# we make the background region larger by a factor of ALPHA,
# so the radius by sqrt(ALPHA) to get better statistics for the background
theta_cuts_bg = get_bg_cuts(theta_cuts, ALPHA)
background["selected_theta"] = evaluate_binned_cut(
background["theta"], background["reco_energy"], theta_cuts_bg, operator.le
)

# same bins as event display uses
sensitivity_bins = add_overflow_bins(
Expand All @@ -155,15 +146,18 @@ def main():
log.info("Optimizing G/H separation cut for best sensitivity")
sensitivity_step_2, gh_cuts = optimize_gh_cut(
gammas[gammas["selected_theta"]],
background[background["selected_theta"]],
bins=sensitivity_bins,
cut_values=np.arange(-1.0, 1.005, 0.05),
background,
reco_energy_bins=sensitivity_bins,
gh_cut_values=np.arange(-1.0, 1.005, 0.05),
theta_cuts=theta_cuts,
op=operator.ge,
alpha=ALPHA,
background_radius=MAX_BG_RADIUS,
)

# now that we have the optimized gh cuts, we recalculate the theta
# cut as 68 percent containment on the events surviving these cuts.
log.info('Recalculating theta cut for optimized GH Cuts')
for tab in (gammas, background):
tab["selected_gh"] = evaluate_binned_cut(
tab["gh_score"], tab["reco_energy"], gh_cuts, operator.ge
Expand All @@ -178,21 +172,21 @@ def main():
min_value=0.05 * u.deg,
)

theta_cuts_opt_bg = get_bg_cuts(theta_cuts_opt, ALPHA)

for tab, cuts in zip([gammas, background], [theta_cuts_opt, theta_cuts_opt_bg]):
tab["selected_theta"] = evaluate_binned_cut(
tab["theta"], tab["reco_energy"], cuts, operator.le
)
tab["selected"] = tab["selected_theta"] & tab["selected_gh"]
gammas["selected_theta"] = evaluate_binned_cut(
gammas["theta"], gammas["reco_energy"], theta_cuts_opt, operator.le
)
gammas["selected"] = gammas["selected_theta"] & gammas["selected_gh"]

signal_hist = create_histogram_table(
gammas[gammas["selected"]], bins=sensitivity_bins
)
background_hist = create_histogram_table(
background[background["selected"]], bins=sensitivity_bins
background_hist = estimate_background(
background[background["selected_gh"]],
reco_energy_bins=sensitivity_bins,
theta_cuts=theta_cuts_opt,
alpha=ALPHA,
background_radius=MAX_BG_RADIUS,
)

sensitivity = calculate_sensitivity(signal_hist, background_hist, alpha=ALPHA)

# scale relative sensitivity by Crab flux to get the flux sensitivity
Expand All @@ -201,7 +195,7 @@ def main():
s["reco_energy_center"]
)

# write OGADF output file
log.info('Calculating IRFs')
hdus = [
fits.PrimaryHDU(),
fits.BinTableHDU(sensitivity, name="SENSITIVITY"),
Expand Down Expand Up @@ -286,10 +280,13 @@ def main():
psf, true_energy_bins, source_offset_bins, fov_offset_bins,
))
hdus.append(create_rad_max_hdu(
theta_bins, fov_offset_bins, rad_max=theta_cuts_opt["cut"][:, np.newaxis]
theta_bins, fov_offset_bins,
rad_max=theta_cuts_opt["cut"][:, np.newaxis],
))
hdus.append(fits.BinTableHDU(ang_res, name="ANGULAR_RESOLUTION"))
hdus.append(fits.BinTableHDU(bias_resolution, name="ENERGY_BIAS_RESOLUTION"))

log.info('Writing outputfile')
fits.HDUList(hdus).writeto("pyirf_eventdisplay.fits.gz", overwrite=True)


Expand Down
4 changes: 4 additions & 0 deletions pyirf/binning.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
from astropy.table import QTable


def bin_center(edges):
return 0.5 * (edges[:-1] + edges[1:])


def add_overflow_bins(bins, positive=True):
"""
Add under and overflow bins to a bin array.
Expand Down
51 changes: 34 additions & 17 deletions pyirf/cut_optimization.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import numpy as np
from astropy.table import Table
from astropy.table import QTable
import astropy.units as u
from tqdm import tqdm

from .cuts import evaluate_binned_cut
from .sensitivity import calculate_sensitivity
from .sensitivity import calculate_sensitivity, estimate_background
from .binning import create_histogram_table


Expand All @@ -13,23 +13,33 @@
]


def optimize_gh_cut(signal, background, bins, cut_values, op, alpha=1.0, progress=True):
def optimize_gh_cut(
signal,
background,
reco_energy_bins,
gh_cut_values,
theta_cuts,
op,
background_radius=1 * u.deg,
alpha=1.0,
progress=True,
):
"""
Optimize the gh-score in every energy bin.
Theta Squared Cut should already be applied on the input tables.
"""

# we apply each cut for all bins globally, calculate the
# we apply each cut for all reco_energy_bins globally, calculate the
# sensitivity and then lookup the best sensitivity for each
# bin independently

sensitivities = []
for cut_value in tqdm(cut_values, disable=not progress):
for cut_value in tqdm(gh_cut_values, disable=not progress):

# create appropriate table for ``evaluate_binned_cut``
cut_table = Table()
cut_table["low"] = bins[0:-1]
cut_table["high"] = bins[1:]
cut_table = QTable()
cut_table["low"] = reco_energy_bins[0:-1]
cut_table["high"] = reco_energy_bins[1:]
cut_table["cut"] = cut_value

# apply the current cut
Expand All @@ -43,22 +53,29 @@ def optimize_gh_cut(signal, background, bins, cut_values, op, alpha=1.0, progres

# create the histograms
signal_hist = create_histogram_table(
signal[signal_selected], bins, "reco_energy"
signal[signal_selected], reco_energy_bins, "reco_energy"
)
background_hist = create_histogram_table(
background[background_selected], bins, "reco_energy"

background_hist = estimate_background(
events=background[background_selected],
reco_energy_bins=reco_energy_bins,
theta_cuts=theta_cuts,
alpha=alpha,
background_radius=background_radius
)

sensitivity = calculate_sensitivity(signal_hist, background_hist, alpha=alpha,)
sensitivity = calculate_sensitivity(
signal_hist, background_hist, alpha=alpha,
)
sensitivities.append(sensitivity)

best_cut_table = Table()
best_cut_table["low"] = bins[0:-1]
best_cut_table["high"] = bins[1:]
best_cut_table = QTable()
best_cut_table["low"] = reco_energy_bins[0:-1]
best_cut_table["high"] = reco_energy_bins[1:]
best_cut_table["cut"] = np.nan

best_sensitivity = sensitivities[0].copy()
for bin_id in range(len(bins) - 1):
for bin_id in range(len(reco_energy_bins) - 1):
sensitivities_bin = [s["relative_sensitivity"][bin_id] for s in sensitivities]

if not np.all(np.isnan(sensitivities_bin)):
Expand All @@ -69,6 +86,6 @@ def optimize_gh_cut(signal, background, bins, cut_values, op, alpha=1.0, progres
best = 0

best_sensitivity[bin_id] = sensitivities[best][bin_id]
best_cut_table["cut"][bin_id] = cut_values[best]
best_cut_table["cut"][bin_id] = gh_cut_values[best]

return best_sensitivity, best_cut_table
16 changes: 10 additions & 6 deletions pyirf/cuts.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
from astropy.table import Table
from astropy.table import Table, QTable

from .binning import calculate_bin_indices
from .binning import calculate_bin_indices, bin_center


def calculate_percentile_cut(
Expand Down Expand Up @@ -34,9 +34,10 @@ def calculate_percentile_cut(

table["bin_index"] = calculate_bin_indices(table["bin_values"].quantity, bins)

cut_table = Table()
cut_table = QTable()
cut_table["low"] = bins[:-1]
cut_table["high"] = bins[1:]
cut_table["center"] = bin_center(bins)
cut_table["cut"] = fill_value

# use groupby operations to calculate the percentile in each bin
Expand All @@ -46,7 +47,7 @@ def calculate_percentile_cut(
cut_table["cut"][by_bin.groups.keys["bin_index"]] = (
by_bin["values"]
.groups.aggregate(lambda g: np.percentile(g, percentile))
.quantity.to_value(cut_table["cut"].unit)
.quantity.to(cut_table["cut"].unit)
)

if min_value is not None:
Expand Down Expand Up @@ -81,6 +82,9 @@ def evaluate_binned_cut(values, bin_values, cut_table, op):
A function taking two arguments, comparing element-wise and
returning an array of booleans.
"""
bins = np.append(cut_table["low"].quantity, cut_table["high"].quantity[-1])
if not isinstance(cut_table, QTable):
raise ValueError('cut_table needs to be an astropy.table.QTable')

bins = np.append(cut_table["low"], cut_table["high"][-1])
bin_index = calculate_bin_indices(bin_values, bins)
return op(values, cut_table["cut"][bin_index].quantity)
return op(values, cut_table["cut"][bin_index])