In [1]:
#Boiler plate imports
import pandas as pd
import sys
from time import time
import matplotlib.pyplot as plt
import numpy as np

from tqdm import tqdm

#SBND imports
s0 = time()
sys.path.append('/exp/sbnd/app/users/brindenc/analyze_sbnd/pyana')
from sbnd.cafclasses.slice import CAFSlice
from sbnd.cafclasses.pfp import PFP

#Constants
from sbnd.numu.numu_constants import *
from sbnd.constants import *
from sbnd.prism import PRISM_BINS

#Plotters
from sbnd.plotlibrary import makeplot
from sbnd.general import plotters

s1 = time()
print(f'SBND imports: {s1-s0:.2f} s')

%load_ext autoreload
%autoreload 2

SBND imports: 36.19 s


In [2]:
#Constants/variables
CUT_MODE = 'roc'
#DATA_DIR  = '/exp/sbnd/data/users/brindenc/analyze_sbnd/numu/v09_78_04_wc_pandora'
#DATA_DIR = '/exp/sbnd/data/users/brindenc/ML/test_fcl/debug_trackid/v4'
DATA_DIR = '/exp/sbnd/data/users/brindenc/analyze_sbnd/numu/v09_82_02_01_pds_gain'
FNAME = f'nom_processed_{CUT_MODE}.df'
HDR_FNAME = 'nom.df'
#FNAME = f'single_processed_{CUT_MODE}.df'
#FNAME = 'single_cut_cosmics_processed.df'
#HDR_FNAME = 'single.df'
#FNAME = f'all_processed_{CUT_MODE}.df'
#FNAME = 'all_processed_roc.df'
#HDR_FNAME = 'all.df'
#FNAME = 'test_processed.df'
#HDR_FNAME = 'test.df'
#NOM_POT = 0.6e20 # stats for first run
LABEL = 'SBND Work in Progress\n0.6e20 POT'
PLOT_DIR = f'Plots/reco_{plotters.day}_{CUT_MODE}/xsec'
SAVE_PLOT = True

In [3]:
#Bins from MicroBooNE_CCInc_XSec_2DPcos_nu - https://github.com/NUISANCEMC/nuisance/blob/master/src/MicroBooNE/MicroBooNE_CCInc_XSec_2DPcos_nu.cxx
#Momentum bin edges - 
EdgesP = [
    [0.00, 0.18, 0.30, 0.45, 0.77, 2.50], # -1 to -0.5
    [0.00, 0.18, 0.30, 0.45, 0.77, 2.50], # -0.5 to 0
    [0.00, 0.18, 0.30, 0.45, 0.77, 2.50], # 0 to 0.27
    [0.00, 0.30, 0.45, 0.77, 2.50], # 0.27 to 0.45 - removed dummy
    [0.00, 0.30, 0.45, 0.77, 2.50], #  0.45 to 0.62 - removed dummy
    [0.00, 0.30, 0.45, 0.77, 2.50], # 0.62 to 0.76 - removed dummy
    [0.00, 0.30, 0.45, 0.77, 1.28, 2.50], # 0.76 to 0.86 
    [0.00, 0.30, 0.45, 0.77, 1.28, 2.50], # 0.86 to 0.94
    [0.00, 0.30, 0.45, 0.77, 1.28, 2.50], # 0.94 to 1
    ]
#Why tf does microboone use these bins? why? only god knows

In [4]:
#Get data and POT
pfp = PFP.load(f'{DATA_DIR}/{FNAME}','pfp'
               ,prism_bins=PRISM_BINS
               ,momentum_bins=MOMENTUM_BINS
               ,costheta_bins=COSTHETA_BINS
               ,pot=NOM_POT)
slc = CAFSlice.load(f'{DATA_DIR}/{FNAME}','slice'
                    ,prism_bins=PRISM_BINS
                    ,pot=NOM_POT)
hdr = pd.read_hdf(f'{DATA_DIR}/{HDR_FNAME}',key='hdr')
SAMPLE_POT = hdr.pot.sum()

#Scale to nominal POT
print(f'--scaled by {NOM_POT/SAMPLE_POT:.2f}')
pfp.scale_to_pot(NOM_POT,SAMPLE_POT)
slc.scale_to_pot(NOM_POT,SAMPLE_POT)

--scaled by 8.05
--scaling to POT: 7.45e+18 -> 6.00e+19
--scaling to POT: 7.45e+18 -> 6.00e+19


In [5]:
CUTS = ['cosmic','fv','trk','has_muon']
pur,eff,f1 = slc.get_pur_eff_f1(['cosmic','fv','trk','has_muon'])
pur,eff,f1

(array([0.22224323, 0.66998959, 0.81309471, 0.85877658, 0.92076491]),
 array([1.        , 0.7206205 , 0.61017594, 0.57319694, 0.49579174]),
 array([1.        , 0.69438333, 0.69717004, 0.68751007, 0.64453142]))

## 2 Get XSec for bins

In [6]:
#First assign binnings, to get efficiency per bin - use truth value
slc.assign_costheta_bins(key='best_muon.costheta',assign_key='best_muon.costheta_bin',costheta_bins=COSTHETA_BINS)

9it [01:02,  6.92s/it]


In [7]:
slc.assign_momentum_bins(key='best_muon.p',assign_key='best_muon.momentum_bin',momentum_bins=MOMENTUM_BINS)

9it [00:51,  5.76s/it]


In [8]:
#Get variables for double differential cross section
d2x_dpdc_dict = {} # keys are (costheta_bin,momentum_bin)

for i,c in tqdm(enumerate(COSTHETA_BINS)):
    if i == len(COSTHETA_BINS)-1: continue
    mom_bins = EdgesP[i]
    _slc = slc.copy()
    _slc.data = _slc.data[_slc.data.best_muon.costheta_bin == i]
    _slc.assign_momentum_bins(momentum_bins=mom_bins) #have to reassign since microboone uses wtf bins
    for j,p in enumerate(mom_bins): #We will use MicroBoone binning for now
        if j == len(mom_bins)-1: continue
        dc = COSTHETA_BINS[i+1] - c
        dp = mom_bins[j+1] - p
        #Get efficiency
        _slc_ = _slc.copy()
        _slc_.data = _slc_.data[_slc_.data.best_muon.momentum_bin == j]
        pur,eff,_ = _slc_.get_pur_eff_f1(CUTS)
        #Get n_i,b_i
        _slc_cut = _slc_.copy()
        for cut in CUTS:
            _slc_cut.apply_cut(cut)
        n_i = _slc_cut.data.genweight[_slc_cut.data.truth.event_type == 0].sum()
        b_i = _slc_cut.data.genweight[_slc_cut.data.truth.event_type != 0].sum()
        #Get d2dx_dpdc
        d2x_dpdc = (n_i-b_i)/(dc*dp*eff[-1]*NUMBER_TARGETS_FV*NUMU_INTEGRATED_FLUX)
        #Get unc
        stat_unc = 1/np.sqrt(n_i + b_i)
        d2x_dpdc_dict[(i,j)] = {'dc':dc,'dp':dp,'eff':eff[-1],'pur':pur[-1],'n_i':n_i,'b_i':b_i,'stat_unc':stat_unc,'d2x_dpdc':d2x_dpdc,'mom_bins':mom_bins}


5it [00:00,  9.01it/s]
5it [00:00,  5.39it/s]
5it [00:02,  2.06it/s]
4it [00:01,  2.61it/s]
4it [00:01,  2.36it/s]
4it [00:01,  2.39it/s]
5it [00:01,  2.55it/s]
5it [00:01,  3.16it/s]
5it [00:01,  3.56it/s]
10it [00:45,  4.52s/it]


In [9]:
#Get variables for single differential cross section
dx_dp_dict = {} # keys are (momentum_bin)
for i,p in tqdm(enumerate(MOMENTUM_BINS)):
    if i == len(MOMENTUM_BINS)-1: continue
    dp = MOMENTUM_BINS[i+1] - p
    #Get efficiency
    _slc = slc.copy()
    _slc.data = _slc.data[_slc.data.best_muon.momentum_bin == i]
    pur,eff,_ = _slc.get_pur_eff_f1(CUTS)
    #Get n_i,b_i
    _slc_cut = _slc.copy()
    for cut in CUTS:
        _slc_cut.apply_cut(cut)
    n_i = _slc_cut.data.genweight[_slc_cut.data.truth.event_type == 0].sum()
    b_i = _slc_cut.data.genweight[_slc_cut.data.truth.event_type != 0].sum()
    #Get xsec
    dx_dp = (n_i-b_i)/(dp*eff[-1]*NUMBER_TARGETS_FV*NUMU_INTEGRATED_FLUX)
    #Get unc
    stat_unc = 1/np.sqrt(n_i + b_i)
    dx_dp_dict[i] = {'dp':dp,'eff':eff[-1],'pur':pur[-1],'n_i':n_i,'b_i':b_i,'stat_unc':stat_unc,'dx_dp':dx_dp}

10it [00:18,  1.83s/it]


In [10]:
dx_dc_dict = {} # keys are (costheta_bin)
for i,c in tqdm(enumerate(COSTHETA_BINS)):
    if i == len(COSTHETA_BINS)-1: continue
    dc = COSTHETA_BINS[i+1] - c
    #Get efficiency
    _slc = slc.copy()
    _slc.data = _slc.data[_slc.data.best_muon.costheta_bin == i]
    pur,eff,_ = _slc.get_pur_eff_f1(CUTS)
    #Get n_i,b_i
    _slc_cut = _slc.copy()
    for cut in CUTS:
        _slc_cut.apply_cut(cut)
    n_i = _slc_cut.data.genweight[_slc_cut.data.truth.event_type == 0].sum()
    b_i = _slc_cut.data.genweight[_slc_cut.data.truth.event_type != 0].sum()
    #Get xsec
    dx_dc = (n_i-b_i)/(dc*eff[-1]*NUMBER_TARGETS_FV*NUMU_INTEGRATED_FLUX)
    #Get unc
    stat_unc = 1/np.sqrt(n_i + b_i)
    dx_dc_dict[i] = {'dc':dc,'eff':eff[-1],'pur':pur[-1],'n_i':n_i,'b_i':b_i,'stat_unc':stat_unc,'dx_dc':dx_dc}

0it [00:00, ?it/s]

10it [00:18,  1.84s/it]


In [11]:
d2x_dpdc_dict[(0,0)],dx_dc_dict[8],dx_dp_dict[8]

({'dc': 0.5,
  'dp': 0.18,
  'eff': 0.4074074074074073,
  'pur': 0.6111111111111109,
  'n_i': 88.5315527469295,
  'b_i': 56.33826083895514,
  'stat_unc': 0.08308278564283073,
  'd2x_dpdc': 4.237008493644134e-41,
  'mom_bins': [0.0, 0.18, 0.3, 0.45, 0.77, 2.5]},
 {'dc': 0.06000000000000005,
  'eff': 0.7918276072372433,
  'pur': 0.9197166469893743,
  'n_i': 31348.217995390034,
  'b_i': 2736.429812177821,
  'stat_unc': 0.005416523036918624,
  'dx_dc': 2.906220011878148e-38},
 {'dp': 1.0,
  'eff': 0.7602230483271376,
  'pur': 0.9359267734553777,
  'n_i': 3291.7640975903787,
  'b_i': 225.35304335582063,
  'stat_unc': 0.01686190287498597,
  'dx_dp': 1.9465013849046564e-40})

## 3. Single Differential Plots
$\frac{d\sigma}{dx} = \frac{N_i-B_i}{\epsilon_i n_t \Phi dx_i}$

In [15]:
#dx_dc
dx_dcs = np.zeros(len(COSTHETA_BINS)-1)
errs = dx_dcs.copy()
scale = 1e38
for i in range(len(COSTHETA_BINS)-1):
    dx_dcs[i] = dx_dc_dict[i]['dx_dc']*scale
    #TODO: incorperate systematic unc
    errs[i] = dx_dc_dict[i]['stat_unc']*dx_dc_dict[i]['dx_dc']*scale

fig,ax = plt.subplots(figsize=(6,4))
makeplot.plot_hist_edges(COSTHETA_BINS,dx_dcs,errs,'',ax=ax)
ax.set_xlabel(r'$\cos\theta_\mu$')
ax.set_ylabel(r'$\frac{d\sigma}{d\cos\theta_\mu}$ [$10^{-38}$ cm$^2$]')
ax.set_title(r'$\nu_\mu$ CC Inclusive')
plotters.set_style(ax)
plotters.add_label(ax,LABEL+'\nNo Folding',where='topleft')
if SAVE_PLOT:
    plotters.save_plot(f'dx_dc',fig=fig,folder_name=PLOT_DIR)
    #plt.close()

In [16]:
#dx_dp
dx_dps = np.zeros(len(MOMENTUM_BINS)-1)
errs = dx_dps.copy()
scale = 1e38
for i in range(len(MOMENTUM_BINS)-1):
    dx_dps[i] = dx_dp_dict[i]['dx_dp']*scale
    errs[i] = dx_dp_dict[i]['stat_unc']*dx_dp_dict[i]['dx_dp']*scale

fig,ax = plt.subplots(figsize=(6,4))
makeplot.plot_hist_edges(MOMENTUM_BINS,dx_dps,errs,'',ax=ax)
ax.set_xlabel(r'$p_\mu$ [GeV]')
ax.set_ylabel(r'$\frac{d\sigma}{dp_\mu}$ [$10^{-38}$ cm$^2$]')
ax.set_title(r'$\nu_\mu$ CC Inclusive')
plotters.set_style(ax)
plotters.add_label(ax,LABEL+'\nNo Folding',where='topright')
if SAVE_PLOT:
    plotters.save_plot(f'dx_dp',fig=fig,folder_name=PLOT_DIR)
    #plt.close()

## 4. Double Differential

In [17]:
#d2x_dpdc
d2x_dpdc = np.zeros((len(COSTHETA_BINS)-1,len(MOMENTUM_BINS)-1))
errs = d2x_dpdc.copy()
scale = 1e38
for i in range(len(COSTHETA_BINS)-1):
    mom_bins = EdgesP[i]
    for j in range(len(mom_bins)-1):
        d2x_dpdc[i,j] = d2x_dpdc_dict[(i,j)]['d2x_dpdc']*scale
        errs[i,j] = d2x_dpdc_dict[(i,j)]['stat_unc']*d2x_dpdc_dict[(i,j)]['d2x_dpdc']*scale#*8.05

#Make plots in costheta bins
for i,c in enumerate(COSTHETA_BINS):
    if i == len(COSTHETA_BINS)-1: continue
    mom_bins = np.array(EdgesP[i])
    _d2x_dpdc = d2x_dpdc[i][:len(mom_bins)-1]
    _errs = errs[i][:len(mom_bins)-1]
    fig,ax = plt.subplots(figsize=(6,4))
    
    makeplot.plot_hist_edges(mom_bins,_d2x_dpdc,_errs,'',ax=ax)
    ax.set_xlabel(r'$p_\mu$ [GeV]')
    ax.set_ylabel(r'$\frac{d^2\sigma}{dp_\mu \ d\cos\theta_\mu}$ [$10^{-38}$ cm$^2$]')
    ax.set_title(r'$\nu_\mu$ CC Inclusive')
    plotters.set_style(ax)
    plotters.add_label(ax,LABEL+'\nNo Folding\n'+fr'{c} < $\cos\theta_\mu$ < {COSTHETA_BINS[i+1]}',where='topright')
    #plotters.add_label(ax,f'$\cos\\theta_{{\mu}}$ bin {i}',where='topleft')
    if SAVE_PLOT:
        plotters.save_plot(f'd2dx_dpdc_costheta_{i}',fig=fig,folder_name=PLOT_DIR)
        #plt.close()


In [134]:
d2x_dpdc_dict[(0,0)]

{'dc': 0.5,
 'dp': 0.18,
 'eff': 0.5789855072463767,
 'pur': 0.5301924353019243,
 'n_i': 6430.610058617878,
 'b_i': 5698.212667711462,
 'stat_unc': 0.009080100925773547,
 'd2x_dpdc': 6.782689928324905e-40,
 'mom_bins': [0.0, 0.18, 0.3, 0.45, 0.77, 2.5]}

In [111]:
slc.data.best_muon.keys()

MultiIndex([(    'cont_tpc',            '',              '',    ''),
            (    'costheta',            '',              '',    ''),
            (      'dazzle',   'muonScore',              '',    ''),
            (      'dazzle',         'pdg',              '',    ''),
            (      'dazzle',   'pionScore',              '',    ''),
            (      'dazzle', 'protonScore',              '',    ''),
            (         'dir',           'x',              '',    ''),
            (         'dir',           'y',              '',    ''),
            (         'dir',           'z',              '',    ''),
            (         'end',           'x',              '',    ''),
            (         'end',           'y',              '',    ''),
            (         'end',           'z',              '',    ''),
            (      'energy',            '',              '',    ''),
            (         'len',            '',              '',    ''),
            (           'p',      