In [2]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
import simweights
import pickle
import os, sys
import re
import numpy as np
import matplotlib as mat
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib.colors as colors
import matplotlib.gridspec as gridspec
import pandas as pd
import tables
import h5py
import math
from scipy.stats import mstats
import matplotlib as mpl
import matplotlib.font_manager as font_manager


In [5]:
sys.path.append("/data/user/tvaneede/GlobalFit/reco_processing/notebooks/weighting")
from weights import *
from utils import *
from selections import selection_mask
from fonts import *
from plot_utils import *

In [6]:
# Append the custom module path
sys.path.append("/data/user/tvaneede/GlobalFit/reco_processing")

# Import the datasets module
from datasets import datasets

# set the inputs
reco_versions = ["evtgen_v2_rec_v5", "spice_tau_reco"]

# Dynamically select the desired dataset
simulation_datasets = {}
for reco_version in reco_versions: simulation_datasets[reco_version] = getattr(datasets, reco_version)

In [7]:
livetime_yr = 11.687
livetime_s  = livetime_yr * 365.25 * 24 * 3600 # 11.687 year

In [8]:
plotting_path = f"/data/user/tvaneede/GlobalFit/reco_processing/notebooks/compare_spice_ftp/compare_spice_ftp"
os.system(f"mkdir -p {plotting_path}")

0

In [9]:
# weight functions
spline_file = '/data/ana/Diffuse/NNMFit/MCEq_splines/v1.2.1/MCEq_splines_PRI-Gaisser-H4a_INT-SIBYLL23c_allfluxes.pickle'

# conventional            
flux_keys_conv =  ['conv_antinumu','conv_numu','conv_antinue','conv_nue','conv_antinutau','conv_nutau']
spline_object_conv = SplineHandler(spline_file, flux_keys_conv)
conv_flux = spline_object_conv.return_weight
generator_conv = lambda pdgid, energy, cos_zen: conv_flux(pdgid, energy, cos_zen)

# prompt
flux_keys_pr =  ['pr_antinumu','pr_numu','pr_antinue','pr_nue','pr_antinutau','pr_nutau']
spline_object_pr = SplineHandler(spline_file, flux_keys_pr)
pr_flux = spline_object_pr.return_weight
generator_pr = lambda pdgid, energy, cos_zen: pr_flux(pdgid, energy, cos_zen)

# astro
gamma_astro = 2.87
per_flavor_norm = 2.12
def AstroFluxModel(pdgid, energy, cos_zen):
    flux = 0.5*(per_flavor_norm*1e-18)*(energy/1e5)**-gamma_astro
    return flux

In [10]:
def open_datasets( simulation_dataset, keys_to_merge ):

    # open the files
    for key in simulation_dataset:
        print(f"----- Extracting files for {key}")
        simulation_dataset[key]['hdf_file'] = pd.HDFStore(simulation_dataset[key]['hdf_file_path'],'r')
        simulation_dataset[key]['weighter'] = simweights.NuGenWeighter( simulation_dataset[key]['hdf_file'] ,nfiles=simulation_dataset[key]['nfiles'])

    # merging files
    for new_key in keys_to_merge:
        print(f"----- Creating new key {new_key}")
        simulation_dataset[new_key] = {}
        simulation_dataset[new_key]['variables'] = {}
        simulation_dataset[new_key]['weighter'] = None

        for key in keys_to_merge[new_key]:
            
            print(f"Using {key}")
            # merge the weighters
            if simulation_dataset[new_key]['weighter'] == None:
                simulation_dataset[new_key]['weighter'] = simulation_dataset[key]['weighter']
            else: simulation_dataset[new_key]['weighter'] += simulation_dataset[key]['weighter']

    # calculate weights
    for key in simulation_dataset:
        simulation_dataset[key]['weights_astro'] = simulation_dataset[key]["weighter"].get_weights(AstroFluxModel) * livetime_s
        simulation_dataset[key]['weights_conv'] = simulation_dataset[key]["weighter"].get_weights(generator_conv) * livetime_s
        simulation_dataset[key]['weights_pr'] = simulation_dataset[key]["weighter"].get_weights(generator_pr) * livetime_s


    return simulation_dataset



In [11]:
keys_to_merge = {}

keys_to_merge["evtgen_v2_rec_v5"] = {
    "NuE" : ["NuE_midE", "NuE_highE"],
    "NuMu" : ["NuMu_midE", "NuMu_highE"],
    "NuTau" : ["NuTau_midE", "NuTau_highE"],
    "NuAll" : ['NuE', "NuMu", "NuTau"],
}

keys_to_merge["spice_tau_reco"] = {
    "NuE" : ["NuE_midE1", "NuE_highE1", "NuE_midE2", "NuE_highE2"],
    "NuMu" : ["NuMu_midE1", "NuMu_highE1","NuMu_midE2", "NuMu_highE2"],
    "NuTau" : ["NuTau_midE1", "NuTau_highE1","NuTau_midE2", "NuTau_highE2"],
    "NuAll" : ['NuE', "NuMu", "NuTau"],

}

In [12]:
for key in simulation_datasets: simulation_datasets[key] = open_datasets( simulation_datasets[key], keys_to_merge[key] )

----- Extracting files for NuTau_midE
----- Extracting files for NuTau_highE
----- Extracting files for NuE_midE
----- Extracting files for NuE_highE
----- Extracting files for NuMu_midE
----- Extracting files for NuMu_highE
----- Creating new key NuE
Using NuE_midE
Using NuE_highE
----- Creating new key NuMu
Using NuMu_midE
Using NuMu_highE
----- Creating new key NuTau
Using NuTau_midE
Using NuTau_highE
----- Creating new key NuAll
Using NuE
Using NuMu
Using NuTau
----- Extracting files for NuTau_midE1
----- Extracting files for NuTau_highE1
----- Extracting files for NuTau_midE2
----- Extracting files for NuTau_highE2
----- Extracting files for NuE_midE1
----- Extracting files for NuE_highE1
----- Extracting files for NuE_midE2
----- Extracting files for NuE_highE2
----- Extracting files for NuMu_midE1
----- Extracting files for NuMu_highE1
----- Extracting files for NuMu_midE2
----- Extracting files for NuMu_highE2
----- Creating new key NuE
Using NuE_midE1
Using NuE_highE1
Using Nu

In [13]:
dataset_label_dict = {
    "evtgen_v2_rec_v5" : "ftp-v3",
    "spice_tau_reco" : "spice v3.2.1",
}

check if merging works now, seems to be fine

In [14]:
data = {}

for key in simulation_datasets:

    simulation_dataset = simulation_datasets[key]

    channel_data = {}

    for flavor in ['NuE', "NuMu", "NuTau"]:
        weights = simulation_dataset[flavor]["weighter"].get_weights(AstroFluxModel) * livetime_s
        rate = np.sum(weights)
        error = np.sqrt(np.sum(weights**2))
        channel_data[f"astro_{flavor}"] = f"{rate:.2f} ± {error:.2f}"

    # Conventional
    flavor = "NuAll"
    ConventionalAtmosphericPassingFractions = simulation_dataset[flavor]["weighter"].get_column("ConventionalAtmosphericPassingFractions", "value")
    weights_conv = simulation_dataset[flavor]["weighter"].get_weights(generator_conv) * livetime_s * ConventionalAtmosphericPassingFractions
    rate_conv = np.sum(weights_conv)
    err_conv = np.sqrt(np.sum(weights_conv**2))
    channel_data["conv"] = f"{rate_conv:.2f} ± {err_conv:.2f}"

    # Prompt
    PromptAtmosphericPassingFractions = simulation_dataset[flavor]["weighter"].get_column("PromptAtmosphericPassingFractions", "value")
    weights_prompt = simulation_dataset[flavor]["weighter"].get_weights(generator_pr) * livetime_s * PromptAtmosphericPassingFractions
    rate_prompt = np.sum(weights_prompt)
    err_prompt = np.sqrt(np.sum(weights_prompt**2))
    channel_data["prompt"] = f"{rate_prompt:.2f} ± {err_prompt:.2f}"

    data[ dataset_label_dict[key] ] = channel_data

# Create DataFrame
df = pd.DataFrame.from_dict(data, orient='index')

# Optional: specify column order
columns_order = [f"astro_{flavor}" for flavor in ['NuE', 'NuMu', 'NuTau']] + ["conv", "prompt"]
df = df[columns_order]

# Display as string table
print(df.to_string())

                 astro_NuE    astro_NuMu   astro_NuTau          conv       prompt
ftp-v3        56.92 ± 0.55  20.60 ± 0.25  35.03 ± 0.40  25.68 ± 0.99  7.07 ± 0.09
spice v3.2.1  56.77 ± 0.56  20.42 ± 0.22  34.89 ± 0.43  23.04 ± 0.80  6.93 ± 0.09


Lets check the efficiency and purity of the selections

In [15]:
def analyze_classification_rates_with_cut(
    simulation_dataset, 
    classification_vars=["FinalTopology"], 
    energy_cut_threshold=None
):
    # Assumes the following are defined globally:
    # - AstroFluxModel
    # - generator_conv
    # - generator_pr
    # - livetime_s

    def get_total_rates():
        total = {}
        for flavor in ['NuE', 'NuMu', 'NuTau']:
            weights = simulation_dataset[flavor]["weighter"].get_weights(AstroFluxModel) * livetime_s
            if energy_cut_threshold:
                RecoETot = simulation_dataset[flavor]["weighter"].get_column("RecoETot", "value")
                mask = RecoETot > energy_cut_threshold
                weights = weights[mask]
            total[flavor] = np.sum(weights)
        return total

    total_rates = get_total_rates()

    for classification_var in classification_vars:
        print(20 * "--", classification_var)

        data = {}
        purity = {}
        efficiency = {}

        for name, ch in zip(["Cascade", "Double cascade", "Track"], [1, 2, 3]):
            channel_data = {}
            rates = {"total": 0}

            # Conventional + Prompt (NuAll)
            flavor_all = "NuAll"
            classification = simulation_dataset[flavor_all]["weighter"].get_column(classification_var, "value")
            ConventionalAtmosphericPassingFractions = simulation_dataset[flavor_all]["weighter"].get_column("ConventionalAtmosphericPassingFractions", "value")
            PromptAtmosphericPassingFractions = simulation_dataset[flavor_all]["weighter"].get_column("PromptAtmosphericPassingFractions", "value")            
            RecoETot = simulation_dataset[flavor_all]["weighter"].get_column("RecoETot", "value")
            mask = (classification == ch)
            if energy_cut_threshold:
                mask &= RecoETot > energy_cut_threshold

            weights_conv = simulation_dataset[flavor_all]["weighter"].get_weights(generator_conv) * livetime_s * ConventionalAtmosphericPassingFractions
            rate_conv = np.sum(weights_conv[mask])
            err_conv = np.sqrt(np.sum(weights_conv[mask]**2))
            channel_data["Conventional"] = f"{rate_conv:.2f} ± {err_conv:.2f}"
            rates["total"] += rate_conv

            weights_prompt = simulation_dataset[flavor_all]["weighter"].get_weights(generator_pr) * livetime_s * PromptAtmosphericPassingFractions
            rate_prompt = np.sum(weights_prompt[mask])
            err_prompt = np.sqrt(np.sum(weights_prompt[mask]**2))
            channel_data["Prompt"] = f"{rate_prompt:.2f} ± {err_prompt:.2f}"
            rates["total"] += rate_prompt

            # Astro components
            for flavor in ['NuE', 'NuMu', 'NuTau']:
                classification = simulation_dataset[flavor]["weighter"].get_column(classification_var, "value")
                RecoETot = simulation_dataset[flavor]["weighter"].get_column("RecoETot", "value")
                mask = (classification == ch)
                if energy_cut_threshold:
                    mask &= RecoETot > energy_cut_threshold

                weights = simulation_dataset[flavor]["weighter"].get_weights(AstroFluxModel) * livetime_s
                rate = np.sum(weights[mask])
                err = np.sqrt(np.sum(weights[mask]**2))
                channel_data[f"Astro {flavor}"] = f"{rate:.2f} ± {err:.2f}"
                rates[flavor] = rate
                rates["total"] += rate

            data[name] = channel_data

            # Purity and Efficiency
            purity[name] = {
                f: f"{rates[f]/rates['total']:.2f}" for f in ['NuE', 'NuMu', 'NuTau']
            }
            efficiency[name] = {
                f: f"{rates[f]/total_rates[f]:.2f}" for f in ['NuE', 'NuMu', 'NuTau']
            }

        # Build DataFrames
        df = pd.DataFrame.from_dict(data, orient='index')
        column_order = [f"Astro {f}" for f in ['NuE', 'NuMu', 'NuTau']] + ["Conventional", "Prompt"]
        df = df[column_order]

        purity_df = pd.DataFrame(purity).T.round(4)
        purity_df.columns = ['NuE', 'NuMu', 'NuTau']
        purity_df.index.name = "Class"

        efficiency_df = pd.DataFrame(efficiency).T.round(4)
        efficiency_df.columns = ['NuE', 'NuMu', 'NuTau']
        efficiency_df.index.name = "Class"

        print("\n=== Rates ===\n")
        print(df.to_string())

        print("\n=== Purity ===\n")
        print(purity_df)

        print("\n=== Efficiency ===\n")
        print(efficiency_df)

        print("\n=== Rates ===\n")
        print(df.to_latex(column_format="l" + "c" * len(df.columns), escape=False))

        print("\n=== Purity ===\n")
        print(purity_df.to_latex(column_format="lccc", escape=False))

        print("\n=== Efficiency ===\n")
        print(efficiency_df.to_latex(column_format="lccc", escape=False))

In [16]:
analyze_classification_rates_with_cut(simulation_datasets["evtgen_v2_rec_v5"])

---------------------------------------- FinalTopology

=== Rates ===

                   Astro NuE    Astro NuMu   Astro NuTau  Conventional       Prompt
Cascade         55.58 ± 0.54   9.21 ± 0.17  29.05 ± 0.37  15.32 ± 0.83  6.03 ± 0.08
Double cascade   0.43 ± 0.05   0.56 ± 0.04   4.00 ± 0.10   0.66 ± 0.15  0.16 ± 0.01
Track            0.91 ± 0.07  10.83 ± 0.17   1.98 ± 0.08   9.70 ± 0.52  0.89 ± 0.03

=== Purity ===

                 NuE  NuMu NuTau
Class                           
Cascade         0.48  0.08  0.25
Double cascade  0.07  0.10  0.69
Track           0.04  0.45  0.08

=== Efficiency ===

                 NuE  NuMu NuTau
Class                           
Cascade         0.98  0.45  0.83
Double cascade  0.01  0.03  0.11
Track           0.02  0.53  0.06

=== Rates ===

\begin{tabular}{lccccc}
\toprule
 & Astro NuE & Astro NuMu & Astro NuTau & Conventional & Prompt \\
\midrule
Cascade & 55.58 ± 0.54 & 9.21 ± 0.17 & 29.05 ± 0.37 & 15.32 ± 0.83 & 6.03 ± 0.08 \\
Double cascade &

In [17]:
analyze_classification_rates_with_cut(simulation_datasets["spice_tau_reco"])

---------------------------------------- FinalTopology

=== Rates ===

                   Astro NuE    Astro NuMu   Astro NuTau  Conventional       Prompt
Cascade         53.80 ± 0.55   8.27 ± 0.15  29.17 ± 0.40  11.93 ± 0.55  5.76 ± 0.09
Double cascade   1.15 ± 0.07   0.35 ± 0.03   2.96 ± 0.09   0.42 ± 0.10  0.15 ± 0.01
Track            1.82 ± 0.10  11.80 ± 0.17   2.77 ± 0.11  10.69 ± 0.57  1.02 ± 0.03

=== Purity ===

                 NuE  NuMu NuTau
Class                           
Cascade         0.49  0.08  0.27
Double cascade  0.23  0.07  0.59
Track           0.06  0.42  0.10

=== Efficiency ===

                 NuE  NuMu NuTau
Class                           
Cascade         0.95  0.40  0.84
Double cascade  0.02  0.02  0.08
Track           0.03  0.58  0.08

=== Rates ===

\begin{tabular}{lccccc}
\toprule
 & Astro NuE & Astro NuMu & Astro NuTau & Conventional & Prompt \\
\midrule
Cascade & 53.80 ± 0.55 & 8.27 ± 0.15 & 29.17 ± 0.40 & 11.93 ± 0.55 & 5.76 ± 0.09 \\
Double cascade &

In [18]:
analyze_classification_rates_with_cut(simulation_datasets["evtgen_v2_rec_v5"], energy_cut_threshold=60e3)

---------------------------------------- FinalTopology

=== Rates ===

                   Astro NuE   Astro NuMu   Astro NuTau Conventional       Prompt
Cascade         37.17 ± 0.39  5.66 ± 0.11  17.67 ± 0.24  4.45 ± 0.23  3.41 ± 0.06
Double cascade   0.25 ± 0.03  0.40 ± 0.03   3.70 ± 0.09  0.30 ± 0.06  0.12 ± 0.01
Track            0.57 ± 0.05  7.60 ± 0.12   1.38 ± 0.06  3.75 ± 0.21  0.52 ± 0.02

=== Purity ===

                 NuE  NuMu NuTau
Class                           
Cascade         0.54  0.08  0.26
Double cascade  0.05  0.08  0.78
Track           0.04  0.55  0.10

=== Efficiency ===

                 NuE  NuMu NuTau
Class                           
Cascade         0.98  0.41  0.78
Double cascade  0.01  0.03  0.16
Track           0.02  0.56  0.06

=== Rates ===

\begin{tabular}{lccccc}
\toprule
 & Astro NuE & Astro NuMu & Astro NuTau & Conventional & Prompt \\
\midrule
Cascade & 37.17 ± 0.39 & 5.66 ± 0.11 & 17.67 ± 0.24 & 4.45 ± 0.23 & 3.41 ± 0.06 \\
Double cascade & 0.25 ± 0

In [19]:
analyze_classification_rates_with_cut(simulation_datasets["spice_tau_reco"], energy_cut_threshold=60e3)

---------------------------------------- FinalTopology

=== Rates ===

                   Astro NuE   Astro NuMu   Astro NuTau Conventional       Prompt
Cascade         40.44 ± 0.42  6.00 ± 0.11  20.91 ± 0.29  5.37 ± 0.24  3.84 ± 0.06
Double cascade   0.89 ± 0.06  0.28 ± 0.02   2.69 ± 0.08  0.21 ± 0.05  0.12 ± 0.01
Track            1.34 ± 0.06  9.38 ± 0.13   2.04 ± 0.08  5.45 ± 0.26  0.71 ± 0.02

=== Purity ===

                 NuE  NuMu NuTau
Class                           
Cascade         0.53  0.08  0.27
Double cascade  0.21  0.07  0.64
Track           0.07  0.50  0.11

=== Efficiency ===

                 NuE  NuMu NuTau
Class                           
Cascade         0.95  0.38  0.82
Double cascade  0.02  0.02  0.10
Track           0.03  0.60  0.08

=== Rates ===

\begin{tabular}{lccccc}
\toprule
 & Astro NuE & Astro NuMu & Astro NuTau & Conventional & Prompt \\
\midrule
Cascade & 40.44 ± 0.42 & 6.00 ± 0.11 & 20.91 ± 0.29 & 5.37 ± 0.24 & 3.84 ± 0.06 \\
Double cascade & 0.89 ± 0

In [20]:
def analyze_classification_trueclass_rates_with_cut(
    simulation_dataset, 
    classification_vars=["FinalTopology"], 
    energy_cut_threshold=None
):
    # Assumes the following are defined globally:
    # - AstroFluxModel
    # - generator_conv
    # - generator_pr
    # - livetime_s

    def get_total_rates():
        total = {}
        flavor = "NuAll"
        for MCInteractionEventclass_name,MCInteractionEventclass_int in zip(['TrueCascade', 'TrueDouble', 'TrueTrack'],[1,2,3]):
            weights = simulation_dataset[flavor]["weighter"].get_weights(AstroFluxModel) * livetime_s
            MCInteractionEventclass = simulation_dataset[flavor]["weighter"].get_column("MCInteractionEventclass", "value")
            mask = (MCInteractionEventclass == MCInteractionEventclass_int)
            if energy_cut_threshold:
                RecoETot = simulation_dataset[flavor]["weighter"].get_column("RecoETot", "value")
                mask &= (RecoETot > energy_cut_threshold) 
            weights = weights[mask]
            total[MCInteractionEventclass_name] = np.sum(weights)
        return total

    total_rates = get_total_rates()

    for classification_var in classification_vars:
        print(20 * "--", classification_var)

        data = {}
        purity = {}
        efficiency = {}

        for name, ch in zip(["Cascade", "Double cascade","Track"], [1, 2, 3]):
            channel_data = {}
            rates = {"total": 0}

            # Conventional + Prompt (NuAll)
            flavor_all = "NuAll"
            classification = simulation_dataset[flavor_all]["weighter"].get_column(classification_var, "value")
            RecoETot = simulation_dataset[flavor_all]["weighter"].get_column("RecoETot", "value")
            ConventionalAtmosphericPassingFractions = simulation_dataset[flavor_all]["weighter"].get_column("ConventionalAtmosphericPassingFractions", "value")
            PromptAtmosphericPassingFractions = simulation_dataset[flavor_all]["weighter"].get_column("PromptAtmosphericPassingFractions", "value")            
            mask = (classification == ch)
            if energy_cut_threshold:
                mask &= RecoETot > energy_cut_threshold

            weights_conv = simulation_dataset[flavor_all]["weighter"].get_weights(generator_conv) * livetime_s * ConventionalAtmosphericPassingFractions
            rate_conv = np.sum(weights_conv[mask])
            err_conv = np.sqrt(np.sum(weights_conv[mask]**2))
            channel_data["Conventional"] = f"{rate_conv:.2f} ± {err_conv:.2f}"
            rates["total"] += rate_conv

            weights_prompt = simulation_dataset[flavor_all]["weighter"].get_weights(generator_pr) * livetime_s * PromptAtmosphericPassingFractions
            rate_prompt = np.sum(weights_prompt[mask])
            err_prompt = np.sqrt(np.sum(weights_prompt[mask]**2))
            channel_data["Prompt"] = f"{rate_prompt:.2f} ± {err_prompt:.2f}"
            rates["total"] += rate_prompt

            # true classifications
            for MCInteractionEventclass_name,MCInteractionEventclass_int in zip(['TrueCascade', 'TrueDouble', 'TrueTrack'],[1,2,3]):
                MCInteractionEventclass = simulation_dataset[flavor_all]["weighter"].get_column("MCInteractionEventclass", "value")
                classification = simulation_dataset[flavor_all]["weighter"].get_column(classification_var, "value")
                RecoETot = simulation_dataset[flavor_all]["weighter"].get_column("RecoETot", "value")
                mask = (classification == ch) & (MCInteractionEventclass == MCInteractionEventclass_int)
                if energy_cut_threshold:
                    mask &= RecoETot > energy_cut_threshold

                weights = simulation_dataset[flavor_all]["weighter"].get_weights(AstroFluxModel) * livetime_s

                rate = np.sum(weights[mask])
                err = np.sqrt(np.sum(weights[mask]**2))
                channel_data[f"{MCInteractionEventclass_name}"] = f"{rate:.2f} ± {err:.2f}"
                rates[MCInteractionEventclass_name] = rate
                rates["total"] += rate

            data[name] = channel_data

            # Purity and Efficiency
            purity[name] = {
                f: f"{rates[f]/rates['total']:.2f}" for f in ['TrueCascade', 'TrueDouble', 'TrueTrack']
            }
            efficiency[name] = {
                f: f"{rates[f]/total_rates[f]:.2f}" for f in ['TrueCascade', 'TrueDouble', 'TrueTrack']
            }

        # Build DataFrames
        df = pd.DataFrame.from_dict(data, orient='index')
        column_order = [f"{f}" for f in ['TrueCascade', 'TrueDouble', 'TrueTrack']] + ["Conventional", "Prompt"]
        df = df[column_order]

        purity_df = pd.DataFrame(purity).T.round(4)
        purity_df.columns = ['TrueCascade', 'TrueDouble', 'TrueTrack']
        purity_df.index.name = "Class"

        efficiency_df = pd.DataFrame(efficiency).T.round(4)
        efficiency_df.columns = ['TrueCascade', 'TrueDouble', 'TrueTrack']
        efficiency_df.index.name = "Class"

        print("\n=== Rates ===\n")
        print(df.to_string())

        print("\n=== Purity ===\n")
        print(purity_df)
    
        print("\n=== Efficiency ===\n")
        print(efficiency_df)

        print("\n=== Rates ===\n")
        print(df.to_latex(column_format="l" + "c" * len(df.columns), escape=False))

        print("\n=== Purity ===\n")
        print(purity_df.to_latex(column_format="lccc", escape=False))

        print("\n=== Efficiency ===\n")
        print(efficiency_df.to_latex(column_format="lccc", escape=False))


In [21]:
analyze_classification_trueclass_rates_with_cut(simulation_datasets["evtgen_v2_rec_v5"])

---------------------------------------- FinalTopology

=== Rates ===

                 TrueCascade    TrueDouble     TrueTrack  Conventional       Prompt
Cascade         63.94 ± 0.56  23.32 ± 0.34   6.58 ± 0.16  15.32 ± 0.83  6.03 ± 0.08
Double cascade   0.57 ± 0.06   3.88 ± 0.10   0.53 ± 0.04   0.66 ± 0.15  0.16 ± 0.01
Track            1.06 ± 0.08   0.80 ± 0.05  11.86 ± 0.18   9.70 ± 0.52  0.89 ± 0.03

=== Purity ===

               TrueCascade TrueDouble TrueTrack
Class                                          
Cascade               0.56       0.20      0.06
Double cascade        0.10       0.67      0.09
Track                 0.04       0.03      0.49

=== Efficiency ===

               TrueCascade TrueDouble TrueTrack
Class                                          
Cascade               0.98       0.83      0.35
Double cascade        0.01       0.14      0.03
Track                 0.02       0.03      0.63

=== Rates ===

\begin{tabular}{lccccc}
\toprule
 & TrueCascade & TrueDoubl

In [22]:
analyze_classification_trueclass_rates_with_cut(simulation_datasets["spice_tau_reco"])

---------------------------------------- FinalTopology

=== Rates ===

                 TrueCascade    TrueDouble     TrueTrack  Conventional       Prompt
Cascade         62.10 ± 0.58  23.46 ± 0.37   5.68 ± 0.13  11.93 ± 0.55  5.76 ± 0.09
Double cascade   1.35 ± 0.07   2.81 ± 0.09   0.30 ± 0.03   0.42 ± 0.10  0.15 ± 0.01
Track            2.14 ± 0.10   1.28 ± 0.08  12.98 ± 0.18  10.69 ± 0.57  1.02 ± 0.03

=== Purity ===

               TrueCascade TrueDouble TrueTrack
Class                                          
Cascade               0.57       0.22      0.05
Double cascade        0.27       0.56      0.06
Track                 0.08       0.05      0.46

=== Efficiency ===

               TrueCascade TrueDouble TrueTrack
Class                                          
Cascade               0.95       0.85      0.30
Double cascade        0.02       0.10      0.02
Track                 0.03       0.05      0.68

=== Rates ===

\begin{tabular}{lccccc}
\toprule
 & TrueCascade & TrueDoubl

In [23]:
analyze_classification_trueclass_rates_with_cut(simulation_datasets["evtgen_v2_rec_v5"], energy_cut_threshold=60e3)

---------------------------------------- FinalTopology

=== Rates ===

                 TrueCascade    TrueDouble    TrueTrack Conventional       Prompt
Cascade         42.38 ± 0.40  14.12 ± 0.22  4.00 ± 0.10  4.45 ± 0.23  3.41 ± 0.06
Double cascade   0.34 ± 0.03   3.63 ± 0.09  0.40 ± 0.03  0.30 ± 0.06  0.12 ± 0.01
Track            0.66 ± 0.05   0.63 ± 0.03  8.27 ± 0.13  3.75 ± 0.21  0.52 ± 0.02

=== Purity ===

               TrueCascade TrueDouble TrueTrack
Class                                          
Cascade               0.62       0.21      0.06
Double cascade        0.07       0.76      0.08
Track                 0.05       0.05      0.60

=== Efficiency ===

               TrueCascade TrueDouble TrueTrack
Class                                          
Cascade               0.98       0.77      0.32
Double cascade        0.01       0.20      0.03
Track                 0.02       0.03      0.65

=== Rates ===

\begin{tabular}{lccccc}
\toprule
 & TrueCascade & TrueDouble & True

In [24]:
analyze_classification_trueclass_rates_with_cut(simulation_datasets["spice_tau_reco"], energy_cut_threshold=60e3)

---------------------------------------- FinalTopology

=== Rates ===

                 TrueCascade    TrueDouble     TrueTrack Conventional       Prompt
Cascade         46.33 ± 0.44  16.75 ± 0.27   4.27 ± 0.10  5.37 ± 0.24  3.84 ± 0.06
Double cascade   1.05 ± 0.06   2.58 ± 0.08   0.23 ± 0.02  0.21 ± 0.05  0.12 ± 0.01
Track            1.57 ± 0.07   0.96 ± 0.05  10.23 ± 0.14  5.45 ± 0.26  0.71 ± 0.02

=== Purity ===

               TrueCascade TrueDouble TrueTrack
Class                                          
Cascade               0.61       0.22      0.06
Double cascade        0.25       0.62      0.06
Track                 0.08       0.05      0.54

=== Efficiency ===

               TrueCascade TrueDouble TrueTrack
Class                                          
Cascade               0.95       0.83      0.29
Double cascade        0.02       0.13      0.02
Track                 0.03       0.05      0.69

=== Rates ===

\begin{tabular}{lccccc}
\toprule
 & TrueCascade & TrueDouble & 