In [18]:
DL2_DIR = r"../dl2_gamma"
PATTERNS = ["gamma_*.h5", "proton_*.h5"]  

import os, glob, h5py, textwrap, inspect
from pathlib import Path

import numpy as np
import pandas as pd

import astropy.units as u
from astropy.table import Table

from ctapipe.io import read_table

# Optional: matplotlib quick plots
import matplotlib.pyplot as plt


In [19]:
files = []
for pat in PATTERNS:
    files.extend(sorted(glob.glob(str(Path(DL2_DIR) / pat))))
print(f"Found {len(files)} files")
for f in files[:5]:
    print(" -", Path(f).name)


Found 12 files
 - gamma_200_300E3GeV_20_20deg_testing_dl1_dl2.h5
 - gamma_200_300E3GeV_30_30deg_testing_dl1_dl2.h5
 - gamma_200_300E3GeV_40_40deg_testing_dl1_dl2.h5
 - gamma_200_300E3GeV_60_60deg_testing_dl1_dl2.h5
 - gamma_point_50_300E3GeV_20_20deg_testing_dl1_dl2.h5


In [27]:
# List available DL2 parameter tables in the FIRST matching DL2 file, using your existing DL2_DIR and PATTERNS variables

import glob, h5py
from pathlib import Path

# Use existing variables; fall back gracefully if PATTERNS not defined
try:
    patterns = PATTERNS
except NameError:
    patterns = ["*.h5"]

files = []
for pat in patterns:
    files.extend(sorted(glob.glob(str(Path(DL2_DIR) / pat))))

assert files, f"No DL2 files found in {DL2_DIR} with patterns {patterns}"

first_file = files[0]
print("Inspecting file:", Path(first_file).name)

def list_parameter_tables(h5file):
    out = []
    with h5py.File(h5file, "r") as h5:
        base = "/dl2/event/telescope/parameters"
        if base not in h5:
            print("Not found:", base)
            return out
        for name in h5[base].keys():
            out.append(f"{base}/{name}")
    return out

candidates = list_parameter_tables(first_file)
print("Found parameter tables:")
for p in candidates:
    print(" -", p)


Inspecting file: gamma_200_300E3GeV_20_20deg_testing_dl1_dl2.h5
Found parameter tables:
 - /dl2/event/telescope/parameters/stereo


In [28]:
from ctapipe.io import read_table
import numpy as np
import astropy.units as u

table_path = "/dl2/event/telescope/parameters/stereo"
tab = read_table(first_file, table_path)

print(f"Rows: {len(tab)} | Columns: {len(tab.colnames)}")
print("First 30 columns:", tab.colnames[:30])

expected = ["true_energy","reco_energy","gammaness","true_alt","true_az","reco_alt","reco_az"]
missing = [c for c in expected if c not in tab.colnames]
print("Missing expected columns:", missing)

def to_value_tev(col):
    try:
        return np.asarray(col.to_value(u.TeV), dtype=float)
    except Exception:
        pass
    try:
        return np.asarray(col.quantity.to_value(u.TeV), dtype=float)
    except Exception:
        pass
    return np.asarray(col, dtype=float)

# try common fallbacks for energy names if needed
true_name = "true_energy" if "true_energy" in tab.colnames else None
reco_name = "reco_energy" if "reco_energy" in tab.colnames else None
for alt in ["mc_energy","sim_energy_true"]:
    if true_name is None and alt in tab.colnames:
        true_name = alt
for alt in ["energy","reco_energy_mean"]:
    if reco_name is None and alt in tab.colnames:
        reco_name = alt

if true_name and reco_name:
    te = to_value_tev(tab[true_name])
    re = to_value_tev(tab[reco_name])
    print(f"Energy columns used: true='{true_name}', reco='{reco_name}'")
    print("true_energy range [TeV]:", float(np.nanmin(te)), "→", float(np.nanmax(te)))
    print("reco_energy range [TeV]:", float(np.nanmin(re)), "→", float(np.nanmax(re)))
else:
    print("Could not resolve energy column names automatically.")


Rows: 393483 | Columns: 35
First 30 columns: ['obs_id', 'event_id', 'true_az_tel', 'true_alt_tel', 'HillasReconstructor_core_x', 'HillasReconstructor_core_y', 'HillasReconstructor_h_max', 'true_az', 'true_alt', 'true_energy', 'log_true_energy', 'true_core_x', 'true_core_y', 'true_h_first_int', 'true_x_max', 'true_shower_primary_id', 'true_camera_x', 'true_camera_y', 'min_true_energy_cut', 'log_reco_energy', 'reco_energy', 'gammaness', 'camera_frame_hillas_intensity_tel1', 'camera_frame_hillas_width_tel1', 'camera_frame_hillas_length_tel1', 'leakage_intensity_width_2_tel1', 'camera_frame_hillas_intensity_tel2', 'camera_frame_hillas_width_tel2', 'camera_frame_hillas_length_tel2', 'leakage_intensity_width_2_tel2']
Missing expected columns: []
Energy columns used: true='true_energy', reco='reco_energy'
true_energy range [TeV]: 0.3180175721645355 → 299.97344970703125
reco_energy range [TeV]: 0.6018515435939218 → 279.7540775007091


In [29]:
import numpy as np
import astropy.units as u

# confirm columns exist
needed = ["true_alt","true_az","reco_alt","reco_az","gammaness"]
present = {c: (c in tab.colnames) for c in needed}
print("Column presence:", present)

def to_value_rad(col):
    try:
        return np.asarray(col.to_value(u.rad))
    except Exception:
        pass
    try:
        return np.asarray(col.quantity.to_value(u.rad))
    except Exception:
        pass
    return np.asarray(col)

def theta_from_altaz(true_alt, true_az, reco_alt, reco_az):
    talt = to_value_rad(true_alt); taz = to_value_rad(true_az)
    ralt = to_value_rad(reco_alt); raz = to_value_rad(reco_az)
    cos_th = (np.sin(talt)*np.sin(ralt) + np.cos(talt)*np.cos(ralt)*np.cos(taz-raz))
    cos_th = np.clip(cos_th, -1.0, 1.0)
    return np.arccos(cos_th)

if all(present[c] for c in ["true_alt","true_az","reco_alt","reco_az"]):
    theta_deg = np.rad2deg(theta_from_altaz(tab["true_alt"], tab["true_az"], tab["reco_alt"], tab["reco_az"]))
    p50, p90 = np.nanpercentile(theta_deg, [50, 90])
    print(f"theta_deg p50={p50:.3f}, p90={p90:.3f}")
else:
    print("Theta cannot be computed because one or more angle columns are missing.")

if present["gammaness"]:
    gh = np.asarray(tab["gammaness"], dtype=float)
    percs = {p: float(np.nanpercentile(gh, p)) for p in (50, 80, 90, 95, 99)}
    print("gammaness percentiles:", percs)
else:
    print("No 'gammaness' column to summarize.")


Column presence: {'true_alt': True, 'true_az': True, 'reco_alt': True, 'reco_az': True, 'gammaness': True}
theta_deg p50=9.079, p90=35.321
gammaness percentiles: {50: 0.7627079732898246, 80: 0.8948992435384067, 90: 0.9347678921632108, 95: 0.9565131945032468, 99: 0.9802342503879063}


In [30]:
from ctapipe.io import read_table
import numpy as np

def safe_read(path):
    try:
        return read_table(first_file, path)
    except Exception:
        return None

print("Reading metadata from:", Path(first_file).name)

sd = safe_read("/simulation/service/shower_distribution")
print("Has /simulation/service/shower_distribution:", sd is not None)

if sd is not None:
    print("Columns:", sd.colnames)
    if "n_entries" in sd.colnames:
        n_showers = int(np.nansum(np.asarray(sd["n_entries"])))
        print("sum(n_entries) =", n_showers)
    else:
        n_showers = None
        print("No 'n_entries' column found.")
    if "bins_energy" in sd.colnames:
        eb = sd["bins_energy"][0]
        print("energy bins TeV min/max ≈", float(np.min(eb)), float(np.max(eb)))
    if "bins_core_dist" in sd.colnames:
        cb = sd["bins_core_dist"][0]
        print("max core dist (m) ≈", float(np.max(cb)))
    if "viewcone" in sd.colnames:
        try:
            print("viewcone (deg) ≈", float(sd["viewcone"][0]))
        except Exception:
            pass

for alt in ("/simulation/run_config", "/simulation/config"):
    t = safe_read(alt)
    print(f"{alt} present:", t is not None)
    if t is not None:
        print(f"{alt} columns:", t.colnames[:20])
        try:
            print(t[:1])
        except Exception:
            pass


Reading metadata from: gamma_200_300E3GeV_20_20deg_testing_dl1_dl2.h5
Has /simulation/service/shower_distribution: True
Columns: ['obs_id', 'hist_id', 'n_entries', 'bins_energy', 'bins_core_dist', 'histogram']
sum(n_entries) = 349860000
energy bins TeV min/max ≈ 0.001 1000.0
max core dist (m) ≈ 1760.0
/simulation/run_config present: False
/simulation/config present: False


In [34]:
import numpy as np
import astropy.units as u
from astropy.table import Table
import inspect

from pyirf.binning import create_bins_per_decade
from pyirf.irf import effective_area_per_energy
from pyirf.simulations import SimulatedEventsInfo

# ---- build SimulatedEventsInfo, handling both API variants ----
sig = inspect.signature(SimulatedEventsInfo)
params = set(sig.parameters.keys())

kwargs = dict(
    n_showers=349_860_000,       # from sum(n_entries)
    energy_min=0.001 * u.TeV,
    energy_max=1000.0 * u.TeV,
    max_impact=1760.0 * u.m,
    spectral_index=-2.0,
)

if {"viewcone_min", "viewcone_max"} <= params:
    kwargs.update(viewcone_min=0.0 * u.deg, viewcone_max=0.0 * u.deg)
elif "viewcone" in params:
    kwargs.update(viewcone=0.0 * u.deg)
else:
    print("Warning: SimulatedEventsInfo signature unexpected:", params)

sim_info = SimulatedEventsInfo(**kwargs)

# ---- build bins from the actual data range to avoid empty bins ----
def to_value_tev(col):
    try:
        return np.asarray(col.to_value(u.TeV), dtype=float)
    except Exception:
        pass
    try:
        return np.asarray(col.quantity.to_value(u.TeV), dtype=float)
    except Exception:
        pass
    return np.asarray(col, dtype=float)

true_e = to_value_tev(tab["true_energy"])
emin = max(0.05, float(np.nanpercentile(true_e, 1)))
emax = min(40.0, float(np.nanpercentile(true_e, 99.5)))
true_bins = create_bins_per_decade((emin * u.TeV), (emax * u.TeV), bins_per_decade=10)

tmini = Table()
tmini["true_energy"] = (true_e * u.TeV)

aeff = effective_area_per_energy(
    selected_events=tmini,
    simulation_info=sim_info,
    true_energy_bins=true_bins
)

vals = aeff.to_value(u.m**2)
print("Aeff shape:", vals.shape)
print("Nonzero bins:", int(np.count_nonzero(vals)), "/", len(vals))
print("First 10 Aeff(m^2):", np.round(vals[:10], 2))
print("Bin edges (TeV):", np.round(true_bins.to_value(u.TeV)[:6], 3), "…")


Aeff shape: (16,)
Nonzero bins: 16 / 16
First 10 Aeff(m^2): [ 1061580.25  2241499.92  3665457.77  5176082.75  6911585.7   9043213.99
 11365037.89 14043049.95 17575390.66 21799004.04]
Bin edges (TeV): [0.989 1.245 1.567 1.973 2.483 3.126] …


In [37]:
import numpy as np
import inspect
import astropy.units as u
from pyirf.cuts import calculate_percentile_cut, evaluate_binned_cut

# use existing variables: tab, true_bins
e_true_edges_TeV = true_bins.to_value(u.TeV)

# choose efficiencies (keep these many events within each true-E bin)
gh_eff = 0.70   # keep top 70% in gammaness (upper tail)
th_eff = 0.70   # keep best 70% in theta (lower tail)

def to_value_rad(col):
    try:
        return np.asarray(col.to_value(u.rad))
    except Exception:
        pass
    try:
        return np.asarray(col.quantity.to_value(u.rad))
    except Exception:
        pass
    return np.asarray(col)

def to_value_tev(col):
    try:
        return np.asarray(col.to_value(u.TeV), dtype=float)
    except Exception:
        pass
    try:
        return np.asarray(col.quantity.to_value(u.TeV), dtype=float)
    except Exception:
        pass
    return np.asarray(col, dtype=float)

def theta_from_altaz(true_alt, true_az, reco_alt, reco_az):
    talt = to_value_rad(true_alt); taz = to_value_rad(true_az)
    ralt = to_value_rad(reco_alt); raz = to_value_rad(reco_az)
    cos_th = (np.sin(talt)*np.sin(ralt) + np.cos(talt)*np.cos(ralt)*np.cos(taz-raz))
    cos_th = np.clip(cos_th, -1.0, 1.0)
    return np.arccos(cos_th)

e_true = to_value_tev(tab["true_energy"])
gh = np.asarray(tab["gammaness"], dtype=float)
theta_deg = np.rad2deg(theta_from_altaz(tab["true_alt"], tab["true_az"], tab["reco_alt"], tab["reco_az"]))

def safe_percentile_cut(values, bin_values, bins, eff_keep, keep_upper_tail=False):
    # calculate_percentile_cut expects either 'efficiency' (0..1) or 'percentile' (0..100) of the *kept* side.
    # For upper-tail keep (gammaness), we pass (1-eff) because pyirf computes '<= cut' by default.
    sig = inspect.signature(calculate_percentile_cut)
    kwargs = dict(values=np.asarray(values, float), bins=np.asarray(bins, float), bin_values=np.asarray(bin_values, float))
    target = (1.0 - eff_keep) if keep_upper_tail else eff_keep
    try:
        if "efficiency" in sig.parameters:
            return calculate_percentile_cut(**kwargs, efficiency=target)
        else:
            return calculate_percentile_cut(**kwargs, percentile=target * 100.0)
    except Exception as e:
        # manual per-bin percentile fallback
        edges = np.asarray(bins, float)
        cut = np.full(len(edges)-1, np.nan)
        for i in range(len(edges)-1):
            m = (kwargs["bin_values"] >= edges[i]) & (kwargs["bin_values"] < edges[i+1])
            if np.any(m):
                pct = (1.0 - eff_keep) * 100.0 if keep_upper_tail else eff_keep * 100.0
                cut[i] = np.percentile(kwargs["values"][m], pct)
        return cut

gh_cut = safe_percentile_cut(gh, e_true, e_true_edges_TeV, eff_keep=gh_eff, keep_upper_tail=True)
th_cut = safe_percentile_cut(theta_deg, e_true, e_true_edges_TeV, eff_keep=th_eff, keep_upper_tail=False)

# evaluate selection per bin using pyirf (falls back to manual if needed)
try:
    sel_gh = evaluate_binned_cut(values=gh,    bins=e_true_edges_TeV, bin_values=e_true, cut=gh_cut, operator=">=")
    sel_th = evaluate_binned_cut(values=theta_deg, bins=e_true_edges_TeV, bin_values=e_true, cut=th_cut, operator="<=")
except Exception:
    idx = np.digitize(e_true, e_true_edges_TeV) - 1
    sel_gh = np.zeros_like(gh, dtype=bool); sel_th = np.zeros_like(gh, dtype=bool)
    good = (idx >= 0) & (idx < len(gh_cut))
    sel_gh[good] = gh[good] >= gh_cut[idx[good]]
    sel_th[good] = theta_deg[good] <= th_cut[idx[good]]

sel = sel_gh & sel_th

# print compact summary
med_gh_cut = float(np.nanmedian(gh_cut))
med_th_cut = float(np.nanmedian(th_cut))
kept = int(np.count_nonzero(sel))
print(f"Per-bin cuts computed: gh_eff={gh_eff:.2f} (upper-tail) → median cut={med_gh_cut:.3f}; "
      f"th_eff={th_eff:.2f} → median cut={med_th_cut:.3f} deg")
print(f"Selected events: {kept} / {len(gh)}  ({kept/len(gh)*100:.1f}%)")

# show a few bin-wise stats
edges = e_true_edges_TeV
bin_ids = np.digitize(e_true, edges) - 1
counts = np.array([np.count_nonzero(bin_ids==i) for i in range(len(edges)-1)])
kept_counts = np.array([np.count_nonzero(sel & (bin_ids==i)) for i in range(len(edges)-1)])
nonempty = counts > 0
print("Bins with data:", int(np.count_nonzero(nonempty)), "/", len(counts))
print("First 8 bins (Etrue_min→max TeV):")
for i in range(min(8, len(counts))):
    print(f"  [{edges[i]:.3f}, {edges[i+1]:.3f})  N={counts[i]:6d}  kept={kept_counts[i]:6d}  gh_cut={gh_cut[i]:.3f}  th_cut={th_cut[i]:.3f}")


Per-bin cuts computed: gh_eff=0.70 (upper-tail) → median cut=0.698; th_eff=0.70 → median cut=13.270 deg
Selected events: 156866 / 393483  (39.9%)
Bins with data: 16 / 16
First 8 bins (Etrue_min→max TeV):
  [0.989, 1.245)  N=  7940  kept=  4131  gh_cut=0.624  th_cut=19.266
  [1.245, 1.567)  N= 13317  kept=  6944  gh_cut=0.664  th_cut=17.059
  [1.567, 1.973)  N= 17298  kept=  9040  gh_cut=0.696  th_cut=15.188
  [1.973, 2.483)  N= 19403  kept= 10144  gh_cut=0.717  th_cut=13.905
  [2.483, 3.126)  N= 20580  kept= 10756  gh_cut=0.730  th_cut=13.008
  [3.126, 3.936)  N= 21389  kept= 11316  gh_cut=0.733  th_cut=12.445
  [3.936, 4.955)  N= 21352  kept= 11402  gh_cut=0.735  th_cut=11.947
  [4.955, 6.238)  N= 20957  kept= 11303  gh_cut=0.731  th_cut=11.938


In [39]:
import numpy as np
import astropy.units as u
from astropy.table import Table
from pyirf.irf import effective_area_per_energy
from pyirf.simulations import SimulatedEventsInfo
import inspect

# Rebuild SimulatedEventsInfo (handles both pyirf signatures)
sig = inspect.signature(SimulatedEventsInfo)
params = set(sig.parameters.keys())

kwargs = dict(
    n_showers=349_860_000,       # from metadata
    energy_min=0.001 * u.TeV,
    energy_max=1000.0 * u.TeV,
    max_impact=1760.0 * u.m,
    spectral_index=-2.0,
)
if {"viewcone_min", "viewcone_max"} <= params:
    kwargs.update(viewcone_min=0.0 * u.deg, viewcone_max=0.0 * u.deg)
elif "viewcone" in params:
    kwargs.update(viewcone=0.0 * u.deg)

sim_info = SimulatedEventsInfo(**kwargs)

# Build selected events table (pyirf needs Quantity 'true_energy')
def to_value_tev(col):
    try:
        return np.asarray(col.to_value(u.TeV), dtype=float)
    except Exception:
        pass
    try:
        return np.asarray(col.quantity.to_value(u.TeV), dtype=float)
    except Exception:
        pass
    return np.asarray(col, dtype=float)

true_e = to_value_tev(tab["true_energy"])

# 'sel' and 'true_bins' are from the previous cell
t_selected = Table()
t_selected["true_energy"] = (true_e[sel] * u.TeV)

aeff_sel = effective_area_per_energy(
    selected_events=t_selected,
    simulation_info=sim_info,
    true_energy_bins=true_bins
).to_value(u.m**2)

print("Aeff(after cuts) shape:", aeff_sel.shape)
print("Nonzero bins:", int(np.count_nonzero(aeff_sel)), "/", len(aeff_sel))
print("First 10 Aeff(m^2) after cuts:", np.round(aeff_sel[:10], 2))


Aeff(after cuts) shape: (16,)
Nonzero bins: 16 / 16
First 10 Aeff(m^2) after cuts: [  552315.87  1168804.94  1915582.05  2706085.83  3612294.26  4784375.59
  6068947.27  7574013.15  9568017.7  11877621.61]


In [40]:
import numpy as np
from scipy.stats import moyal, skewnorm
import astropy.units as u

# uses: tab, sel, true_bins from earlier cells

def to_value_tev(col):
    try:
        return np.asarray(col.to_value(u.TeV), dtype=float)
    except Exception:
        pass
    try:
        return np.asarray(col.quantity.to_value(u.TeV), dtype=float)
    except Exception:
        pass
    return np.asarray(col, dtype=float)

true_e_all = to_value_tev(tab["true_energy"])
reco_e_all = to_value_tev(tab["reco_energy"])

true_edges = true_bins.to_value(u.TeV)

mu_loc = np.full(len(true_edges)-1, np.nan)
mu_scale = np.full(len(true_edges)-1, np.nan)
mu_a = np.full(len(true_edges)-1, np.nan)
model = np.array(["log10_moyal"]*(len(true_edges)-1), dtype=object)
n_used = np.zeros(len(true_edges)-1, dtype=int)

for i in range(len(true_edges)-1):
    m = sel & (true_e_all >= true_edges[i]) & (true_e_all < true_edges[i+1])
    n_used[i] = int(np.count_nonzero(m))
    if n_used[i] < 20:
        continue
    rel = np.log10(np.clip(reco_e_all[m], 1e-20, None) / np.clip(true_e_all[m], 1e-20, None))
    med = np.nanmedian(rel)
    mad = np.nanmedian(np.abs(rel - med))
    sigma = max(1.4826 * mad, 1e-3)
    clean = rel[np.abs(rel - med) < 3.0 * sigma]
    if clean.size < 20:
        clean = rel

    # try moyal
    try:
        loc_m, scale_m = moyal.fit(clean, loc=med, scale=sigma)
    except Exception:
        loc_m, scale_m = med, sigma
    ok_m = np.isfinite(loc_m) and np.isfinite(scale_m) and (abs(loc_m) < 2.0) and (0.005 < scale_m < 3.0)

    # try skewnorm
    try:
        a_s, loc_s, scale_s = skewnorm.fit(clean, loc=med, scale=sigma)
        ok_s = (np.isfinite(a_s) and np.isfinite(loc_s) and np.isfinite(scale_s)
                and abs(a_s) < 30 and abs(loc_s) < 2.0 and 0.005 < scale_s < 3.0)
    except Exception:
        ok_s = False
        a_s = loc_s = scale_s = np.nan

    if ok_s and (abs(a_s) > 0.5):
        mu_loc[i], mu_scale[i], mu_a[i] = float(loc_s), float(scale_s), float(a_s)
        model[i] = "log10_skewnorm"
    elif ok_m:
        mu_loc[i], mu_scale[i] = float(loc_m), float(scale_m)
        mu_a[i] = np.nan
        model[i] = "log10_moyal"
    else:
        mu_loc[i], mu_scale[i] = float(med), float(sigma)
        mu_a[i] = np.nan
        model[i] = "log10_moyal"

# print compact table for first 10 bins
print("Bin  Etrue_min–Etrue_max [TeV]  Nsel    mu_loc    mu_scale     mu_a    model")
for i in range(min(10, len(mu_loc))):
    print(f"{i:02d}   {true_edges[i]:.3f}–{true_edges[i+1]:.3f}    {n_used[i]:5d}   "
          f"{mu_loc[i]:8.4f}   {mu_scale[i]:9.4f}   {mu_a[i]:7.3f}   {model[i]}")


Bin  Etrue_min–Etrue_max [TeV]  Nsel    mu_loc    mu_scale     mu_a    model
00   0.989–1.245     4131     0.0323      0.1006     1.986   log10_skewnorm
01   1.245–1.567     6944     0.0017      0.0903     1.540   log10_skewnorm
02   1.567–1.973     9040    -0.0073      0.0407       nan   log10_moyal
03   1.973–2.483    10144    -0.0238      0.0407       nan   log10_moyal
04   2.483–3.126    10756    -0.0408      0.0822     1.333   log10_skewnorm
05   3.126–3.936    11316    -0.0351      0.0407       nan   log10_moyal
06   3.936–4.955    11402    -0.0366      0.0403       nan   log10_moyal
07   4.955–6.238    11303    -0.0392      0.0402       nan   log10_moyal
08   6.238–7.853    11342    -0.0394      0.0402       nan   log10_moyal
09   7.853–9.886    11184    -0.0410      0.0395       nan   log10_moyal


In [41]:
import re, glob, h5py
from pathlib import Path
import numpy as np
import astropy.units as u
from ctapipe.io import read_table

# 1) derive the zenith token from the gamma file name (e.g. "_20_20deg_")
name = Path(first_file).name
m = re.search(r"_[0-9]{1,2}_[0-9]{1,2}deg_", name)
zen_token = m.group(0) if m else ""
print("Gamma file:", name)
print("Zenith token:", zen_token or "(none)")

# 2) find proton files in DL2_DIR matching same zenith token (fallback: any proton_*.h5)
proton_files = sorted(glob.glob(str(Path(DL2_DIR) / f"proton*{zen_token}*.h5"))) if zen_token else []
if not proton_files:
    proton_files = sorted(glob.glob(str(Path(DL2_DIR) / "proton_*.h5")))
print(f"Found {len(proton_files)} proton files")
print("First few:", [Path(f).name for f in proton_files[:5]])

assert proton_files, "No proton files found."

pfile = proton_files[0]
print("Inspecting proton file:", Path(pfile).name)

# 3) find available parameter tables under /dl2/event/telescope/parameters
def list_parameter_tables(h5file):
    out = []
    with h5py.File(h5file, "r") as h5:
        base = "/dl2/event/telescope/parameters"
        if base in h5:
            for name in h5[base].keys():
                out.append(f"{base}/{name}")
    return out

pcands = list_parameter_tables(pfile)
print("Proton parameter tables:", pcands)

# prefer stereo if present
ptable = next((p for p in pcands if p.endswith("/stereo")), (pcands[0] if pcands else None))
assert ptable, "No proton parameter table found."

# 4) load table and inspect
ptab = read_table(pfile, ptable)
print(f"Proton rows: {len(ptab)} | cols: {len(ptab.colnames)}")
needed = ["reco_energy","gammaness","true_alt","true_az","reco_alt","reco_az"]
missing = [c for c in needed if c not in ptab.colnames]
print("Missing expected columns:", missing)

def to_value_tev(col):
    try:
        return np.asarray(col.to_value(u.TeV), dtype=float)
    except Exception:
        pass
    try:
        return np.asarray(col.quantity.to_value(u.TeV), dtype=float)
    except Exception:
        pass
    return np.asarray(col, dtype=float)

ereco = to_value_tev(ptab["reco_energy"]) if "reco_energy" in ptab.colnames else None
if ereco is not None:
    print("Proton reco_energy range [TeV]:", float(np.nanmin(ereco)), "→", float(np.nanmax(ereco)))
else:
    print("No proton 'reco_energy' to summarize.")


Gamma file: gamma_200_300E3GeV_20_20deg_testing_dl1_dl2.h5
Zenith token: _20_20deg_
Found 1 proton files
First few: ['proton_400_500E3GeV_20_20deg_testing_dl1_dl2.h5']
Inspecting proton file: proton_400_500E3GeV_20_20deg_testing_dl1_dl2.h5
Proton parameter tables: ['/dl2/event/telescope/parameters/stereo']
Proton rows: 2754879 | cols: 35
Missing expected columns: []
Proton reco_energy range [TeV]: 0.5921603736083508 → 284.53092990263553


In [43]:
import numpy as np
import astropy.units as u
from pyirf.cuts import evaluate_binned_cut
from pyirf.binning import create_bins_per_decade

# Inputs reused from previous cells:
# - tab : gamma table (already loaded)
# - ptab: proton table (already loaded)
# - true_bins, e_true_edges_TeV : true-energy bin edges (from earlier)
# - gh_cut, th_cut : per-true-E-bin cuts we computed on gamma
# - theta_from_altaz, to_value_rad, to_value_tev : helpers defined earlier (redefine briefly here)

def to_value_rad(col):
    try:
        return np.asarray(col.to_value(u.rad))
    except Exception:
        pass
    try:
        return np.asarray(col.quantity.to_value(u.rad))
    except Exception:
        pass
    return np.asarray(col)

def to_value_tev(col):
    try:
        return np.asarray(col.to_value(u.TeV), dtype=float)
    except Exception:
        pass
    try:
        return np.asarray(col.quantity.to_value(u.TeV), dtype=float)
    except Exception:
        pass
    return np.asarray(col, dtype=float)

def theta_from_altaz(true_alt, true_az, reco_alt, reco_az):
    talt = to_value_rad(true_alt); taz = to_value_rad(true_az)
    ralt = to_value_rad(reco_alt); raz = to_value_rad(reco_az)
    cos_th = (np.sin(talt)*np.sin(ralt) + np.cos(talt)*np.cos(ralt)*np.cos(taz-raz))
    cos_th = np.clip(cos_th, -1.0, 1.0)
    return np.arccos(cos_th)

# 1) Selection masks on protons using the gamma-derived cuts (per TRUE-E bin)
p_true = to_value_tev(ptab["true_energy"])
p_gh   = np.asarray(ptab["gammaness"], dtype=float)
p_th   = np.rad2deg(theta_from_altaz(ptab["true_alt"], ptab["true_az"], ptab["reco_alt"], ptab["reco_az"]))

try:
    p_sel_gh = evaluate_binned_cut(values=p_gh, bins=e_true_edges_TeV, bin_values=p_true, cut=gh_cut, operator=">=")
    p_sel_th = evaluate_binned_cut(values=p_th, bins=e_true_edges_TeV, bin_values=p_true, cut=th_cut, operator="<=")
except Exception:
    idx = np.digitize(p_true, e_true_edges_TeV) - 1
    p_sel_gh = np.zeros_like(p_gh, dtype=bool); p_sel_th = np.zeros_like(p_gh, dtype=bool)
    good = (idx >= 0) & (idx < len(gh_cut))
    p_sel_gh[good] = p_gh[good] >= gh_cut[idx[good]]
    p_sel_th[good] = p_th[good] <= th_cut[idx[good]]

p_sel = p_sel_gh & p_sel_th
print(f"Proton selected: {int(np.count_nonzero(p_sel))} / {len(p_sel)} "
      f"({100*np.count_nonzero(p_sel)/len(p_sel):.1f}%)")

# 2) Background shape over RECO-energy bins (distribute total rate)
p_reco = to_value_tev(ptab["reco_energy"])
# sensible reco binning from data range
pr_emin = max(0.05, float(np.nanpercentile(p_reco, 0.5)))
pr_emax = min(60.0, float(np.nanpercentile(p_reco, 99.5)))
e_reco_bins = create_bins_per_decade((pr_emin*u.TeV), (pr_emax*u.TeV), bins_per_decade=8).to_value(u.TeV)

counts_sel, _ = np.histogram(p_reco[p_sel], bins=e_reco_bins)
total_rate_hz = 20.0  # choose a realistic ROI total rate for a quick sanity check
rates = total_rate_hz * counts_sel.astype(float) / max(1, counts_sel.sum())
rates = np.maximum(rates, 1e-12)

print("Reco-energy bins:", len(e_reco_bins)-1, "  Nonzero rate bins:", int(np.count_nonzero(rates)))
print("First 10 bins summary:")
for i in range(min(10, len(rates))):
    print(f"  [{e_reco_bins[i]:.3f}, {e_reco_bins[i+1]:.3f})  N_sel={counts_sel[i]:7d}  rate={rates[i]:.6f} Hz")

# Also report display cuts (medians) that we would store in the background CSV
gh_display = float(np.nanmedian(gh_cut))
th_display = float(np.nanmedian(th_cut))
print(f"Display cuts → Gammaness_cut={gh_display:.3f}, Theta_cut_deg={th_display:.3f}")


Proton selected: 60298 / 2754879 (2.2%)
Reco-energy bins: 13   Nonzero rate bins: 13
First 10 bins summary:
  [1.201, 1.602)  N_sel=   2708  rate=0.904491 Hz
  [1.602, 2.136)  N_sel=   4763  rate=1.590875 Hz
  [2.136, 2.848)  N_sel=   4955  rate=1.655004 Hz
  [2.848, 3.798)  N_sel=   4874  rate=1.627950 Hz
  [3.798, 5.065)  N_sel=   5161  rate=1.723810 Hz
  [5.065, 6.754)  N_sel=   5728  rate=1.913192 Hz
  [6.754, 9.007)  N_sel=   6485  rate=2.166035 Hz
  [9.007, 12.011)  N_sel=   6996  rate=2.336712 Hz
  [12.011, 16.016)  N_sel=   7271  rate=2.428564 Hz
  [16.016, 21.358)  N_sel=   6443  rate=2.152007 Hz
Display cuts → Gammaness_cut=0.698, Theta_cut_deg=13.270


In [45]:
import re
import numpy as np
import pandas as pd
from pathlib import Path
import astropy.units as u

# --- inputs reused from previous cells ---
# first_file (gamma), pfile (proton), tab (gamma table), ptab (proton table)
# true_bins, aeff_sel
# mu_loc, mu_scale, mu_a, model (from migration fit)
# e_reco_bins, rates, gh_display, th_display (from background shape step)

# 1) derive zenith from filename
name = Path(first_file).name
m = re.search(r"_([0-9]{1,2})_([0-9]{1,2})deg_", name)
zenith = int(m.group(1)) if m else -1
print("Zenith angle (deg):", zenith)

# 2) edges for gamma CSV
e_true_edges_TeV = true_bins.to_value(u.TeV)

# 3) assemble gamma CSV rows
gamma_rows = []
for i in range(len(e_true_edges_TeV) - 1):
    row = {
        "ZD_deg": float(zenith),
        "Etrue_min_TeV": float(e_true_edges_TeV[i]),
        "Etrue_max_TeV": float(e_true_edges_TeV[i+1]),
        "Aeff_m2": float(max(aeff_sel[i], 0.0)),
        "emig_mu_loc": float(mu_loc[i]) if np.isfinite(mu_loc[i]) else 0.0,
        "emig_mu_scale": float(mu_scale[i]) if np.isfinite(mu_scale[i]) else 0.2,
        "emig_mu_a": float(mu_a[i]) if np.isfinite(mu_a[i]) else np.nan,
        "emig_model": str(model[i]),
    }
    gamma_rows.append(row)
df_gamma = pd.DataFrame(gamma_rows)

# 4) assemble background CSV rows
back_rows = []
for i in range(len(e_reco_bins) - 1):
    row = {
        "ZD_deg": float(zenith),
        "Ereco_min_TeV": float(e_reco_bins[i]),
        "Ereco_max_TeV": float(e_reco_bins[i+1]),
        "BckgRate_per_second": float(rates[i]),
        "Theta_cut_deg": float(th_display),
        "Gammaness_cut": float(gh_display),
    }
    back_rows.append(row)
df_back = pd.DataFrame(back_rows)

# 5) save to disk (adjust OUTPUT_DIR if you like)
OUTPUT_DIR = Path(r".\SST1M_csv")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

gh_eff = 0.70
th_eff = 0.70
gamma_csv = OUTPUT_DIR / f"SST1M_gamma_irf_gheffi_{gh_eff:.2f}_theffi_{th_eff:.2f}.csv"
backg_csv = OUTPUT_DIR / f"SST1M_backg_irf_gheffi_{gh_eff:.2f}_theffi_{th_eff:.2f}.csv"

# append if files exist, otherwise write with header
if gamma_csv.exists():
    df_gamma.to_csv(gamma_csv, mode="a", header=False, index=False)
else:
    df_gamma.to_csv(gamma_csv, index=False)

if backg_csv.exists():
    df_back.to_csv(backg_csv, mode="a", header=False, index=False)
else:
    df_back.to_csv(backg_csv, index=False)

print("Saved gamma CSV:", gamma_csv)
print("Saved backg CSV:", backg_csv)
print("Gamma head:\n", df_gamma.head(3))
print("Backg head:\n", df_back.head(3))


Zenith angle (deg): 20
Saved gamma CSV: SST1M_csv\SST1M_gamma_irf_gheffi_0.70_theffi_0.70.csv
Saved backg CSV: SST1M_csv\SST1M_backg_irf_gheffi_0.70_theffi_0.70.csv
Gamma head:
    ZD_deg  Etrue_min_TeV  Etrue_max_TeV       Aeff_m2  emig_mu_loc  \
0    20.0       0.988613       1.244591  5.523159e+05     0.032257   
1    20.0       1.244591       1.566847  1.168805e+06     0.001696   
2    20.0       1.566847       1.972543  1.915582e+06    -0.007345   

   emig_mu_scale  emig_mu_a      emig_model  
0       0.100580    1.98594  log10_skewnorm  
1       0.090290    1.54035  log10_skewnorm  
2       0.040725        NaN     log10_moyal  
Backg head:
    ZD_deg  Ereco_min_TeV  Ereco_max_TeV  BckgRate_per_second  Theta_cut_deg  \
0    20.0       1.201065       1.601645             0.904491       13.27004   
1    20.0       1.601645       2.135828             1.590875       13.27004   
2    20.0       2.135828       2.848173             1.655004       13.27004   

   Gammaness_cut  
0       

In [49]:
# ONE-CELL PIPELINE: build IRF CSVs for all zeniths using gamma + gamma_point (signal) and proton (background)
# Efficiencies: 0.40, 0.70, 0.90
# Output: ./final_CSV/SST1M_gamma_irf_gheffi_XX_theffi_XX.csv and SST1M_backg_irf_gheffi_XX_theffi_XX.csv

import re, glob, inspect
from pathlib import Path
import numpy as np
import pandas as pd
import astropy.units as u
from astropy.table import vstack, Table
import h5py

from ctapipe.io import read_table

from pyirf.cuts import calculate_percentile_cut, evaluate_binned_cut
from pyirf.binning import create_bins_per_decade
from pyirf.irf import effective_area_per_energy
from pyirf.simulations import SimulatedEventsInfo

# ---------------- utilities ----------------
def zd_from_filename(path):
    m = re.search(r"_([0-9]{1,2})_([0-9]{1,2})deg_", Path(path).name)
    if not m:
        return None
    return float(int(m.group(1)))

def to_value_tev(col):
    for attr in ("to_value", "quantity"):
        try:
            if attr == "to_value": return np.asarray(col.to_value(u.TeV), dtype=float)
            return np.asarray(col.quantity.to_value(u.TeV), dtype=float)
        except Exception: pass
    return np.asarray(col, dtype=float)

def to_value_rad(col):
    for attr in ("to_value", "quantity"):
        try:
            if attr == "to_value": return np.asarray(col.to_value(u.rad))
            return np.asarray(col.quantity.to_value(u.rad))
        except Exception: pass
    return np.asarray(col)

def theta_from_altaz(true_alt, true_az, reco_alt, reco_az):
    talt = to_value_rad(true_alt); taz = to_value_rad(true_az)
    ralt = to_value_rad(reco_alt); raz = to_value_rad(reco_az)
    cos_th = (np.sin(talt)*np.sin(ralt) + np.cos(talt)*np.cos(ralt)*np.cos(taz-raz))
    return np.arccos(np.clip(cos_th, -1.0, 1.0))

def read_param_table(path):
    with h5py.File(path, "r") as h5:
        base = "/dl2/event/telescope/parameters"
        if base not in h5:
            raise KeyError(f"{base} missing in {path}")
        # prefer stereo
        cand = [f"{base}/stereo"] + [f"{base}/{k}" for k in h5[base].keys() if k != "stereo"]
        for p in cand:
            if p in h5:
                return read_table(path, p)
    raise KeyError("No parameters table found")

def stack_tables(paths):
    tabs = []
    for p in paths:
        try:
            t = read_param_table(p)
            t.meta = {}
            tabs.append(t)
        except Exception as e:
            print(f"[skip] {Path(p).name}: {e}")
    if not tabs:
        return None
    return vstack(tabs, metadata_conflicts="silent")

def read_shower_info(path):
    try:
        sd = read_table(path, "/simulation/service/shower_distribution")
    except Exception:
        return None
    out = {}
    out["n_entries"] = int(np.nansum(np.asarray(sd["n_entries"]))) if "n_entries" in sd.colnames else 0
    if "bins_energy" in sd.colnames:
        eb = sd["bins_energy"][0]
        out["emin"] = float(np.min(eb)); out["emax"] = float(np.max(eb))
    if "bins_core_dist" in sd.colnames:
        cb = sd["bins_core_dist"][0]
        out["rmax"] = float(np.max(cb))
    return out

def aggregate_sim_info(paths):
    tot_n = 0; emin = None; emax = None; rmax = None
    for p in paths:
        info = read_shower_info(p)
        if not info: continue
        tot_n += info.get("n_entries", 0)
        if "emin" in info:
            emin = info["emin"] if emin is None else min(emin, info["emin"])
        if "emax" in info:
            emax = info["emax"] if emax is None else max(emax, info["emax"])
        if "rmax" in info:
            rmax = info["rmax"] if rmax is None else max(rmax, info["rmax"])
    # conservative fallbacks
    if emin is None: emin = 0.001
    if emax is None: emax = 1000.0
    if rmax is None: rmax = 300.0
    return dict(n_showers=tot_n, emin=emin, emax=emax, rmax=rmax)

def sim_info_from_agg(agg):
    sig = inspect.signature(SimulatedEventsInfo)
    params = set(sig.parameters.keys())
    kwargs = dict(
        n_showers=int(max(agg["n_showers"], 1)),
        energy_min=float(agg["emin"]) * u.TeV,
        energy_max=float(agg["emax"]) * u.TeV,
        max_impact=float(agg["rmax"]) * u.m,
        spectral_index=-2.0,
    )
    if {"viewcone_min","viewcone_max"} <= params:
        kwargs.update(viewcone_min=0.0*u.deg, viewcone_max=0.0*u.deg)
    elif "viewcone" in params:
        kwargs.update(viewcone=0.0*u.deg)
    return SimulatedEventsInfo(**kwargs)

def as_float_array(x):
    if hasattr(x, 'colnames'):
        name = 'cut' if 'cut' in x.colnames else x.colnames[0]
        col = x[name]
        try: return np.asarray(col.to_value(u.one), dtype=float)
        except Exception: return np.asarray(col, dtype=float)
    if isinstance(x, np.ndarray) and x.dtype.names:
        fld = 'cut' if 'cut' in x.dtype.names else x.dtype.names[0]
        return np.asarray(x[fld], dtype=float)
    try: return np.asarray(x.to_value(u.one), dtype=float)
    except Exception: return np.asarray(x, dtype=float)

def percentile_cut_compat(values, bin_values, edges, eff_keep, upper_tail=False):
    sig = inspect.signature(calculate_percentile_cut)
    params = set(sig.parameters.keys())
    kwargs = dict(values=np.asarray(values, float),
                  bins=np.asarray(edges, float),
                  bin_values=np.asarray(bin_values, float))
    target = (1.0 - eff_keep) if upper_tail else eff_keep
    if "efficiency" in params:
        kwargs["efficiency"] = target
    else:
        kwargs["percentile"] = target * 100.0
    if "fill_value" in params:
        kwargs["fill_value"] = np.nan
    return as_float_array(calculate_percentile_cut(**kwargs))

def eval_binned(values, bin_values, edges, cut, op):
    cut_arr = as_float_array(cut)
    try:
        return evaluate_binned_cut(values=np.asarray(values,float),
                                   bins=np.asarray(edges,float),
                                   bin_values=np.asarray(bin_values,float),
                                   cut=cut_arr,
                                   operator=op)
    except Exception:
        idx = np.digitize(bin_values, edges) - 1
        sel = np.zeros_like(values, dtype=bool)
        good = (idx>=0) & (idx<len(cut_arr))
        if op == ">=": sel[good] = values[good] >= cut_arr[idx[good]]
        else:          sel[good] = values[good] <= cut_arr[idx[good]]
        return sel

# ---------------- gather files by zenith ----------------
DL2 = Path(DL2_DIR)
gamma_files       = sorted(glob.glob(str(DL2 / "gamma_*.h5")))
gamma_point_files = sorted(glob.glob(str(DL2 / "gamma_point_*.h5")))
proton_files      = sorted(glob.glob(str(DL2 / "proton_*.h5")))

def group_by_zenith(paths):
    d = {}
    for p in paths:
        z = zd_from_filename(p)
        if z is None: continue
        d.setdefault(z, []).append(p)
    return d

G = group_by_zenith(gamma_files)
GP = group_by_zenith(gamma_point_files)
P = group_by_zenith(proton_files)

zeniths = sorted(set(G.keys()) | set(GP.keys()) | set(P.keys()))
if not zeniths:
    raise RuntimeError("No DL2 files grouped by zenith found.")

# ---------------- build CSVs per efficiency, appending all zeniths ----------------
OUT = Path("./final_CSV")
OUT.mkdir(parents=True, exist_ok=True)
eff_list = [0.40, 0.70, 0.90]

for eff in eff_list:
    gamma_csv = OUT / f"SST1M_gamma_irf_gheffi_{eff:.2f}_theffi_{eff:.2f}.csv"
    backg_csv = OUT / f"SST1M_backg_irf_gheffi_{eff:.2f}_theffi_{eff:.2f}.csv"
    if gamma_csv.exists(): gamma_csv.unlink()
    if backg_csv.exists(): backg_csv.unlink()

    for zd in zeniths:
        g_paths = G.get(zd, []) + GP.get(zd, [])
        p_paths = P.get(zd, [])
        if not g_paths or not p_paths:
            print(f"[skip zd={zd}] gamma_paths={len(g_paths)} proton_paths={len(p_paths)}")
            continue

        g_tab = stack_tables(g_paths)
        p_tab = stack_tables(p_paths)
        if g_tab is None or p_tab is None:
            print(f"[skip zd={zd}] could not stack tables")
            continue

        g_true = to_value_tev(g_tab["true_energy"]); g_reco = to_value_tev(g_tab["reco_energy"])
        g_gh = np.asarray(g_tab["gammaness"], float)
        g_th = np.rad2deg(theta_from_altaz(g_tab["true_alt"], g_tab["true_az"], g_tab["reco_alt"], g_tab["reco_az"]))

        p_true = to_value_tev(p_tab["true_energy"]); p_reco = to_value_tev(p_tab["reco_energy"])
        p_gh = np.asarray(p_tab["gammaness"], float)
        p_th = np.rad2deg(theta_from_altaz(p_tab["true_alt"], p_tab["true_az"], p_tab["reco_alt"], p_tab["reco_az"]))

        sim_agg = aggregate_sim_info(g_paths)
        sim_g = sim_info_from_agg(sim_agg)

        t_emin = max(0.05, float(np.nanpercentile(g_true, 1)))
        t_emax = min(40.0, float(np.nanpercentile(g_true, 99.5)))
        true_bins = create_bins_per_decade((t_emin*u.TeV), (t_emax*u.TeV), bins_per_decade=10)
        e_true_edges = true_bins.to_value(u.TeV)

        r_emin = max(0.05, float(np.nanpercentile(p_reco, 0.5)))
        r_emax = min(80.0, float(np.nanpercentile(p_reco, 99.7)))
        reco_bins = create_bins_per_decade((r_emin*u.TeV), (r_emax*u.TeV), bins_per_decade=8)
        e_reco_edges = reco_bins.to_value(u.TeV)

        gh_cut_reco = percentile_cut_compat(g_gh, g_reco, e_reco_edges, eff_keep=eff, upper_tail=True)
        th_cut_reco = percentile_cut_compat(g_th, g_reco, e_reco_edges, eff_keep=eff, upper_tail=False)

        p_sel_gh = eval_binned(p_gh, p_reco, e_reco_edges, gh_cut_reco, op=">=")
        p_sel_th = eval_binned(p_th, p_reco, e_reco_edges, th_cut_reco, op="<=")
        p_sel = p_sel_gh & p_sel_th

        counts_sel, _ = np.histogram(p_reco[p_sel], bins=e_reco_edges)
        total_rate_hz = 20.0
        rates = total_rate_hz * counts_sel.astype(float) / max(1, counts_sel.sum())
        rates = np.maximum(rates, 1e-12)

        df_back = pd.DataFrame({
            "ZD_deg": np.full(len(e_reco_edges)-1, zd, dtype=float),
            "Ereco_min_TeV": e_reco_edges[:-1].astype(float),
            "Ereco_max_TeV": e_reco_edges[1:].astype(float),
            "BckgRate_per_second": rates.astype(float),
            "Theta_cut_deg": th_cut_reco.astype(float),
            "Gammaness_cut": gh_cut_reco.astype(float),
        })
        df_back.to_csv(backg_csv, mode="a", header=not backg_csv.exists(), index=False)

        gh_cut_true = percentile_cut_compat(g_gh, g_true, e_true_edges, eff_keep=eff, upper_tail=True)
        th_cut_true = percentile_cut_compat(g_th, g_true, e_true_edges, eff_keep=eff, upper_tail=False)

        g_sel_gh = eval_binned(g_gh, g_true, e_true_edges, gh_cut_true, op=">=")
        g_sel_th = eval_binned(g_th, g_true, e_true_edges, th_cut_true, op="<=")
        g_sel = g_sel_gh & g_sel_th

        t_selected = Table()
        t_selected["true_energy"] = (g_true[g_sel] * u.TeV)
        aeff_sel = effective_area_per_energy(t_selected, sim_g, true_bins).to_value(u.m**2)

        # energy-migration fit on ratio Ereco/Etrue per true-E bin
        mu_loc = np.full(len(e_true_edges)-1, np.nan)
        mu_scale = np.full(len(e_true_edges)-1, np.nan)
        mu_a = np.full(len(e_true_edges)-1, np.nan)
        model = np.array(["moyal"]*(len(e_true_edges)-1), dtype=object)

        from scipy.stats import moyal, skewnorm
        for i in range(len(e_true_edges)-1):
            m = g_sel & (g_true >= e_true_edges[i]) & (g_true < e_true_edges[i+1])
            if np.count_nonzero(m) < 20:
                continue
            ratio = np.clip(g_reco[m] / np.clip(g_true[m], 1e-20, None), 1e-6, 1e6)
            med = float(np.nanmedian(ratio))
            mad = float(np.nanmedian(np.abs(ratio - med)))
            sigma = max(1.4826 * mad, 1e-3)
            clean = ratio[np.abs(ratio - med) < 3.0 * sigma]
            if clean.size < 20: clean = ratio
            try:
                loc_m, scale_m = moyal.fit(clean, loc=med, scale=sigma)
            except Exception:
                loc_m, scale_m = med, sigma
            ok_m = np.isfinite(loc_m) and np.isfinite(scale_m) and (0.01 < scale_m < 3.0)
            try:
                a_s, loc_s, scale_s = skewnorm.fit(clean, loc=med, scale=sigma)
                ok_s = (np.isfinite(a_s) and np.isfinite(loc_s) and np.isfinite(scale_s)
                        and abs(a_s) < 30 and (0.01 < scale_s < 3.0))
            except Exception:
                ok_s = False; a_s = loc_s = scale_s = np.nan
            if ok_s and (abs(a_s) > 0.5):
                mu_loc[i], mu_scale[i], mu_a[i] = float(loc_s), float(scale_s), float(a_s); model[i] = "skewnorm"
            elif ok_m:
                mu_loc[i], mu_scale[i], mu_a[i] = float(loc_m), float(scale_m), np.nan; model[i] = "moyal"
            else:
                mu_loc[i], mu_scale[i], mu_a[i] = med, sigma, np.nan; model[i] = "moyal"

        df_gamma = pd.DataFrame({
            "ZD_deg": np.full(len(e_true_edges)-1, zd, dtype=float),
            "Etrue_min_TeV": e_true_edges[:-1].astype(float),
            "Etrue_max_TeV": e_true_edges[1:].astype(float),
            "Aeff_m2": aeff_sel.astype(float),
            "emig_mu_loc": mu_loc.astype(float),
            "emig_mu_scale": mu_scale.astype(float),
            "emig_mu_a": mu_a.astype(float),
            "emig_model": model,
        })
        df_gamma.to_csv(gamma_csv, mode="a", header=not gamma_csv.exists(), index=False)

        print(f"[zd={zd:>2.0f} eff={eff:.2f}] rows: back={len(df_back)}, gamma={len(df_gamma)}")

print("Done. CSVs in:", OUT.resolve())


[zd=20 eff=0.40] rows: back=14, gamma=16
[zd=30 eff=0.40] rows: back=13, gamma=15
[zd=40 eff=0.40] rows: back=12, gamma=13
[zd=60 eff=0.40] rows: back=7, gamma=7
[zd=20 eff=0.70] rows: back=14, gamma=16
[zd=30 eff=0.70] rows: back=13, gamma=15
[zd=40 eff=0.70] rows: back=12, gamma=13
[zd=60 eff=0.70] rows: back=7, gamma=7
[zd=20 eff=0.90] rows: back=14, gamma=16
[zd=30 eff=0.90] rows: back=13, gamma=15
[zd=40 eff=0.90] rows: back=12, gamma=13
[zd=60 eff=0.90] rows: back=7, gamma=7
Done. CSVs in: C:\Users\aminnakh\Desktop\nakhle backup2\a.nakhle\sst1mpipe\sst1mpipe\source_simulation\scripts\final_CSV
