Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

First implementation of general cut optimizations #123

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
193 changes: 193 additions & 0 deletions pyirf/cut_optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import astropy.units as u
from tqdm import tqdm
import operator
from itertools import product

from .cuts import evaluate_binned_cut, calculate_percentile_cut
from .sensitivity import calculate_sensitivity, estimate_background
Expand Down Expand Up @@ -149,3 +150,195 @@ def optimize_gh_cut(
best_cut_table["cut"][bin_id] = gh_cuts[best]["cut"][bin_id]

return best_sensitivity, best_cut_table





def optimize_cuts(
signal,
background,
reco_energy_bins,
gh_efficiencies,
theta_efficiencies,
multiplicities,
gh_op=operator.ge,
background_radius=1 * u.deg,
alpha=1.0,
progress=True,
n_jobs=1,
**kwargs
):
"""
Optimize the gh-score cut in every energy bin of reconstructed energy
for best sensitivity.

This procedure is EventDisplay-like, since it only applies a
pre-computed theta cut and then optimizes only the gamma/hadron separation
cut.

Parameters
----------
signal: astropy.table.QTable
event list of simulated signal events.
Required columns are `theta`, `reco_energy`, 'weight', `gh_score`
No directional (theta) or gamma/hadron cut should already be applied.
background: astropy.table.QTable
event list of simulated background events.
Required columns are `reco_source_fov_offset`, `reco_energy`,
'weight', `gh_score`.
No directional (theta) or gamma/hadron cut should already be applied.
reco_energy_bins: astropy.units.Quantity[energy]
Bins in reconstructed energy to use for sensitivity computation
gh_cut_efficiencies: np.ndarray[float, ndim=1]
The cut efficiencies to scan for best sensitivity.
theta_cuts: astropy.table.QTable
cut definition of the energy dependent theta cut,
e.g. as created by ``calculate_percentile_cut``
op: comparison function with signature f(a, b) -> bool
The comparison function to use for the gamma hadron score.
Returning true means an event passes the cut, so is not discarded.
E.g. for gammaness-like score, use `operator.ge` (>=) and for a
hadroness-like score use `operator.le` (<=).
background_radius: astropy.units.Quantity[angle]
Radius around the field of view center used for background rate
estimation.
alpha: float
Size ratio of off region / on region. Will be used to
scale the background rate.
progress: bool
If True, show a progress bar during cut optimization
**kwargs are passed to ``calculate_sensitivity``
"""

# we apply each cut for all reco_energy_bins globally, calculate the
# sensitivity and then lookup the best sensitivity for each
# bin independently


def calculate_cuts(events, gh_efficiency, theta_efficiency, reco_energy_bins, gh_op=operator.ge):
theta_cut = calculate_percentile_cut(
events['theta'], events['reco_energy'],
bins=reco_energy_bins,
fill_value=0.0 * u.deg,
max_value=0.32 * u.deg,
percentile=100 * theta_efficiency
)

# calculate necessary percentile needed for
# ``calculate_percentile_cut`` with the correct efficiency.
# Depends on the operator, since we need to invert the
# efficiency if we compare using >=, since percentile is
# defines as <=.
if gh_op(-1, 1): # if operator behaves like "<=", "<" etc:
percentile = 100 * gh_efficiency
fill_value = signal['gh_score'].min()
else: # operator behaves like ">=", ">"
percentile = 100 * (1 - gh_efficiency)
fill_value = signal['gh_score'].max()

gh_cut = calculate_percentile_cut(
signal['gh_score'], signal['reco_energy'],
bins=reco_energy_bins,
fill_value=fill_value, percentile=percentile,
)

return theta_cut, gh_cut


def evaluate_cuts(events, multiplicity, theta_cut, gh_cut=None, gh_op=operator.ge):
selected = events['multiplicity'] >= multiplicity

selected[selected] &= evaluate_binned_cut(
events["theta"][selected],
events["reco_energy"][selected], theta_cut, operator.le
)

if gh_cut is not None:
selected[selected] &= evaluate_binned_cut(
events["gh_score"][selected],
events["reco_energy"][selected], gh_cut, gh_op
)

return selected



results = []

it = product(multiplicities, gh_efficiencies, theta_efficiencies)
total = len(multiplicities) * len(gh_efficiencies) * len(theta_efficiencies)
for multiplicity, gh_efficiency, theta_efficiency in tqdm(it, disable=not progress, total=total):


theta_cut, gh_cut = calculate_cuts(
signal, gh_efficiency, theta_efficiency, reco_energy_bins, gh_op
)


# apply the current cuts
signal_selected = evaluate_cuts(signal, multiplicity, theta_cut, gh_cut, gh_op)

# background only gets gh cut applied and scaled to theta cut size in estimate_background
background_selected = evaluate_cuts(background, multiplicity, theta_cut, gh_cut=None)

# create the histograms
signal_hist = create_histogram_table(
signal[signal_selected], reco_energy_bins, "reco_energy"
)

background_hist = estimate_background(
events=background[background_selected],
reco_energy_bins=reco_energy_bins,
theta_cuts=theta_cut,
alpha=alpha,
background_radius=background_radius
)

sensitivity = calculate_sensitivity(
signal_hist, background_hist, alpha=alpha,
**kwargs,
)
results.append({
'sensitivity': sensitivity,
'multiplicity': multiplicity,
'gh_cut': gh_cut,
'theta_cut': theta_cut,
})

# initilize cut tables
gh_cut = QTable()
gh_cut["low"] = reco_energy_bins[0:-1]
gh_cut["center"] = bin_center(reco_energy_bins)
gh_cut["high"] = reco_energy_bins[1:]

theta_cut = gh_cut.copy()
multiplicity_cut = gh_cut.copy()

for t in (gh_cut, theta_cut, multiplicity_cut):
t["cut"] = np.nan

theta_cut["cut"].unit = u.deg

best_sensitivity = results[0]['sensitivity'].copy()

for bin_id in range(len(reco_energy_bins) - 1):
sensitivities_bin = [
r['sensitivity']["relative_sensitivity"][bin_id]
for r in results
]

if not np.all(np.isnan(sensitivities_bin)):
# nanargmin won't return the index of nan entries
best = np.nanargmin(sensitivities_bin)
else:
# if all are invalid, just use the first one
best = 0

best_result = results[best]
best_sensitivity[bin_id] = best_result['sensitivity'][bin_id]
gh_cut["cut"][bin_id] = best_result["gh_cut"]["cut"][bin_id]
theta_cut["cut"][bin_id] = best_result["theta_cut"]["cut"][bin_id]
multiplicity_cut["cut"][bin_id] = best_result["multiplicity"]["cut"][bin_id]

return best_sensitivity, theta_cut, gh_cut, multiplicity_cut