In [None]:
import os
import sys

import numpy as np
import math
import uproot as uproot
import pickle
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.colors
from matplotlib.colors import LinearSegmentedColormap
from matplotlib import ticker
from matplotlib.ticker import (AutoMinorLocator, MultipleLocator)
from matplotlib import gridspec

# Add the head direcoty to sys.path
workspace_root = os.getcwd()  
sys.path.insert(0, workspace_root + "/../../../")

# import this repo's classes
import pyanalib.pandas_helpers as ph
from makedf.util import *

import dunestyle.matplotlib as dunestyle


In [None]:
venv_path = os.getenv("PATH")

In [None]:
input_path = "/exp/sbnd/data/users/sungbino/sbnd_samples/cafpyana_outputs/"
mc_file = input_path + "pandoradf_mc2025a_bnb_cosmics.df"
data_file = input_path + "pandoradf_2025a_gold_run_bnbzerobias_cosmics.df"

In [None]:
mc_hdr_df = pd.read_hdf(mc_file, key='hdr')
mc_evt_df = pd.read_hdf(mc_file, key='evt')
mc_nu_df = pd.read_hdf(mc_file, key='mcnu')

data_hdr_df = pd.read_hdf(data_file, key='hdr')
data_evt_df = pd.read_hdf(data_file, key='evt')

In [None]:
mc_hdr_df

In [None]:
data_hdr_df

In [None]:
mc_tot_pot = mc_hdr_df['pot'].sum()
data_tot_pot = data_hdr_df['pot'].sum()

print("mc_tot_pot: %e, data_tot_pot: %e" %(mc_tot_pot, data_tot_pot))
#POT_scale = target_POT / this_pot
#print(POT_scale)

In [None]:
mc_nu_df

In [None]:
mc_nu_df.columns

In [None]:
is_fv = InFV(mc_nu_df.position, 0, 0, 0, 0, "SBND")
is_cc = mc_nu_df.iscc
genie_mode = mc_nu_df.genie_mode
nuint_categ = pd.Series(8, index=mc_nu_df.index)
nuint_categ[~is_fv] = -1  # Out of FV
nuint_categ[is_fv & ~is_cc] = 0  # NC
nuint_categ[is_fv & is_cc & (genie_mode == 3)] = 1  # CCCOH
nuint_categ[is_fv & is_cc & (genie_mode == 0)] = 2  # CCQE
nuint_categ[is_fv & is_cc & (genie_mode == 10)] = 3  # 2p2h
nuint_categ[is_fv & is_cc & (genie_mode != 0) & (genie_mode != 3) & (genie_mode != 10) & (genie_mode == 1)] = 4  # RES
nuint_categ[is_fv & is_cc & (genie_mode != 0) & (genie_mode != 3) & (genie_mode != 10) & (genie_mode == 2)] = 5  # DIS

In [None]:
mc_nu_df['nuint_categ'] = nuint_categ

In [None]:
mc_evt_df

In [None]:
var = data_evt_df.slc.vertex.x 
plt.hist(var, bins=np.linspace(-210., 210., 43), weights=[np.ones_like(data) * 1. for data in var], histtype="step", label=["all"])
plt.xlabel("Neutrino Energy (GeV)")
plt.ylabel(f"Neutrinos (POT = {mc_tot_pot:.2e})")
plt.legend()
plt.show()

In [None]:
data_evt_df

In [None]:
# True nu variables

## 1) nu.E
var = mc_nu_df.E 
plt.hist(var, bins=np.linspace(0., 8., 81), weights=[np.ones_like(data) * 1. for data in var], histtype="step", label=["all"])
plt.xlabel("Neutrino Energy (GeV)")
plt.ylabel(f"Neutrinos (POT = {mc_tot_pot:.2e})")
plt.legend()
plt.show()

In [None]:
mc_matchdf = ph.multicol_merge(mc_evt_df.reset_index(), mc_nu_df.reset_index(),
                            left_on=[("entry", "",""), ("slc","tmatch", "idx", "", "", "")],
                            right_on=[("entry", "",""), ("rec.mc.nu..index", "","")], 
                            how="left") ## -- save all sllices

In [None]:
mc_matchdf

In [None]:
## -- Make unmatched slice to have nuint_categ = -2
notmatcheddf = mc_matchdf[pd.isna(mc_matchdf.nuint_categ)]
matcheddf = mc_matchdf[~pd.isna(mc_matchdf.nuint_categ)]
notmatcheddf.nuint_categ = -2
mc_matchdf = pd.concat([matcheddf, notmatcheddf])

In [None]:
mc_matchdf

In [None]:
## -- Label definitions for plotting
mode_list = [1, 0, 4, 5, 3, 2, -1, -2]
mode_labels = ["CC COH", "NC", "CC RES", "CC IDS", "CC 2p2h", "CC QE", "Non-FV", "Others"]
colors = [#'#d62728',  # Red            
          '#1f77b4',  # Blue
          '#ff7f0e',  # Orange
          '#2ca02c',  # Green
          '#17becf',  # Teal
          '#9467bd',  # Purple
          '#8c564b',  # Brown
          '#e377c2',  # Pink
          '#7f7f7f']  # Gray
          #'#bcbd22',  # Yellow-green
          #'#17becf']  # Teal

def draw_reco_stacked_hist(var, is_logy, title_x, title_y, x_min, x_max, nbins, outname, data_overlay = False, var_data = []):
    plt.figure(figsize=(800/100, 600/100), dpi=100)
    POT_scale = 1.
    # No data overlay — keep original logic
    hist_data, bins, _ = plt.hist(var,
                                    bins=np.linspace(x_min, x_max, nbins + 1),
                                    weights=[np.ones_like(data) * POT_scale for data in var],
                                    stacked=True,
                                    color=colors,
                                    label=mode_labels,
                                    edgecolor='none',
                                    linewidth=0,
                                    density=data_overlay,
                                histtype='stepfilled')
    max_y = np.max([np.sum(vals) for vals in zip(*hist_data)])
    print(max_y)
    plt.xlim(x_min, x_max)
    plt.ylim(0.0, max_y * 1.5)
    plt.xlabel(title_x)
    plt.ylabel(title_y)
    if is_logy:
        plt.ylim(0.1, max_y * 600)
        plt.yscale('log')
    
    if data_overlay:
        plt.ylabel("A.U.")
        # Normalize data by area
        counts, _ = np.histogram(var_data, bins=np.linspace(x_min, x_max, nbins + 1))
        bin_widths = np.diff(np.linspace(x_min, x_max, nbins + 1))
        total_data = np.sum(counts)
        norm_counts = counts / (total_data * bin_widths)
        errors = np.sqrt(counts) / (total_data * bin_widths) if total_data > 0 else np.zeros_like(counts)
        bin_centers = 0.5 * (np.linspace(x_min, x_max, nbins + 1)[:-1] + np.linspace(x_min, x_max, nbins + 1)[1:])

        # -- non-zero mask
        nonzero_mask = counts > 0
        bin_centers = bin_centers[nonzero_mask]
        norm_counts = norm_counts[nonzero_mask]
        errors = errors[nonzero_mask]
        
        # Plot data with error bars
        plt.errorbar(bin_centers, norm_counts, yerr=errors,
                     fmt='o', color='black', label='Data',
                     markersize=5, capsize=3, linewidth=1.5)


    ax = plt.gca()
    for spine in ax.spines.values():
        spine.set_linewidth(2)
    ax.tick_params(width=2, length=10)

    accum_sum = [np.sum(data) for data in hist_data]
    accum_sum = [0.] + accum_sum
    total_sum = accum_sum[-1]
    print(total_sum)
    individual_sums = [accum_sum[i+1] - accum_sum[i] for i in range(len(accum_sum) - 1)]
    fractions = [(count / total_sum) * 100 for count in individual_sums]
    legend_labels = [f"{label} ({frac:.1f}%)" for label, frac in zip(mode_labels[::-1], fractions[::-1])]
    if data_overlay:
        legend_labels.append("Data")
    colors_reversed = colors[::-1]  # Ensure colors match reversed labels
    plt.legend(legend_labels, loc='upper left', fontsize=11, frameon=False, ncol=3, bbox_to_anchor=(0.05, 0.98))
    
    plt.text(0.00, 1.02, "SBND Sample (2025A Dev.), Preliminary", transform=plt.gca().transAxes,
             fontsize = 14, fontweight = 'bold')
    #plt.savefig("../output/plots/reco_slc/" + outname + ".pdf", format='pdf')
    plt.show()

In [None]:
def draw_mc_data_shape_comp_per_slc(mc_df, data_df, column, x_title, y_title, x_min, x_max, n_bins, out_name):
    nuint_categ_col = ('nuint_categ', '', '', '', '', '')
    mc_df_per_slc = mc_df.groupby([('entry'), ('rec.slc..index')])[[column, nuint_categ_col]].first()
    data_df_per_slc = data_df.groupby([('entry'), ('rec.slc..index')])[[column]].first()
    var_mc = [mc_df_per_slc[mc_df_per_slc.nuint_categ == mode][column] for mode in mode_list]
    var_data = data_df_per_slc[column]
    draw_reco_stacked_hist(var_mc, False, x_title, y_title, x_min, x_max, n_bins, out_name, True, var_data)

In [None]:
def draw_reco_valid_plots(mc_df, data_df, suffix):
    ## draw 1) clear cosmic, 2) nu score, 3) vertex x,y and z
    
    ## -- 1) Clear cosmic
    clear_cosmic_col = ('slc', 'is_clear_cosmic', '', '', '', '')
    draw_mc_data_shape_comp_per_slc(mc_df, data_df, clear_cosmic_col, "Is Clear Cosmic", "A.U.", -0.5, 1.5, 2, suffix + "_slc_is_clear_cosmic")

    ## -- 2) nu score
    nu_score_col = ('slc', 'nu_score', '', '', '', '')
    draw_mc_data_shape_comp_per_slc(mc_df, data_df, nu_score_col, "Nu Score", "A.U.", -1.1, 1.1, 44, suffix + "_slc_nu_score")

    ## -- 3) vertex x,y,z
    vtx_x_col = ('slc', 'vertex', 'x', '', '', '')
    vtx_y_col = ('slc', 'vertex', 'y', '', '', '')
    vtx_z_col = ('slc', 'vertex', 'z', '', '', '')
    draw_mc_data_shape_comp_per_slc(mc_df, data_df, vtx_x_col, "Slice Vertex X [cm]", "A.U.", -300, 300, 100, suffix + "_slc_vtx_x")
    draw_mc_data_shape_comp_per_slc(mc_df, data_df, vtx_y_col, "Slice Vertex Y [cm]", "A.U.", -300, 300, 100, suffix + "_slc_vtx_y")
    draw_mc_data_shape_comp_per_slc(mc_df, data_df, vtx_z_col, "Slice Vertex Z [cm]", "A.U.", -100, 600, 100, suffix + "_slc_vtx_z")

    

In [None]:
## Reco 1) Nocut
draw_reco_valid_plots(mc_matchdf, data_evt_df, "nocut")

In [None]:
## Reco 2) FV cut
is_fv_mc = InFV(mc_matchdf.slc.vertex, 0, 0, 0, 0, "SBND")
is_fv_data = InFV(data_evt_df.slc.vertex, 0, 0, 0, 0, "SBND")

mc_matchdf = mc_matchdf[is_fv_mc]
data_evt_df = data_evt_df[is_fv_data]

draw_reco_valid_plots(mc_matchdf, data_evt_df, "nocut")

In [None]:
## Reco 3) !is_clear_cosmic
isnt_clear_cosmic_mc = (mc_matchdf.slc.is_clear_cosmic == 0)
isnt_clear_cosmic_data = (data_evt_df.slc.is_clear_cosmic == 0)

mc_matchdf = mc_matchdf[isnt_clear_cosmic_mc]
data_evt_df = data_evt_df[isnt_clear_cosmic_data]

draw_reco_valid_plots(mc_matchdf, data_evt_df, "is_clear_cosmic")