In [None]:
# Add the relevant scripts from LArMachineLearningData
# Nice the process so it can run with lots of cores on low priority
import os
os.nice(20)

# Add local paths
import sys
hnlDIR = os.environ['_']
sys.path.append('../pyscript')

# From pyscript Library
from Plotting import *
from Dictionary import *
from HelperFunctions import *
from CutFunctions import *

import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import gridspec

import warnings
warnings.filterwarnings("ignore")

In [None]:
ifTune = False
ifSave = True
savePath = "../plot_files/06Feb24/"

<h1> Read in PKL Dataframe </h1>

<h3>HNL</h3>

In [None]:
hnl_path = "./df_hnl.pkl"

file = open(hnl_path, 'rb')
df_hnl = pickle.load(file)
file.close
del hnl_path

In [None]:
df_hnl = df_hnl[["run", "subrun", "event", "slc_idx", "scale_pot", "mod_t", 'slc_comp', 'slc_true_event_type']]

<h3>TPC Neutrino</h3>

In [None]:
nu_path = "./df_nu.pkl"

file = open(nu_path, 'rb')
df_nu = pickle.load(file)
file.close
del nu_path

In [None]:
df_nu = df_nu[["run", "subrun", "event", "slc_idx", "scale_pot", "mod_t", 'slc_comp', 'slc_true_event_type']]

<h3>Intime Cosmics</h3>

In [None]:
cos_path = "./df_cos.pkl"

file = open(cos_path, 'rb')
df_cos = pickle.load(file)
file.close
del cos_path

In [None]:
df_cos = df_cos[["run", "subrun", "event", "slc_idx", "scale_pot", "mod_t", 'slc_comp', 'slc_true_event_type']]

<h1> Load Flux Systematics</h1>

<h3> HNL </h3>

In [None]:
dfflxw_hnl = pd.DataFrame()

for i in range(0, 2):
    hnl_path = "../pkl_files/hnl_m200_50k__flxw_{}.pkl".format(i)
    print(hnl_path)
    
    file = open(hnl_path, 'rb')
    df = pickle.load(file)
    
    df = df.merge(df_hnl, how='inner', on=['run','subrun','event','slc_idx'])
    
    dfflxw_hnl = pd.concat((dfflxw_hnl, df), ignore_index=True)
    file.close()
    
df_hnl = dfflxw_hnl

del dfflxw_hnl

<h3> TPC Neutrino </h3>

In [None]:
dfflxw_nu = pd.DataFrame()

for i in range(0, 4):
    nu_path = "../pkl_files/nu_tpc_100k_flxw_{}.pkl".format(i)
    print(nu_path)
    
    file = open(nu_path, 'rb')
    df = pickle.load(file)
    
    df = df.merge(df_nu, how='inner', on=['run','subrun','event','slc_idx'])
    
    dfflxw_nu = pd.concat((dfflxw_nu, df), ignore_index=True)
    file.close()
    
df_nu = dfflxw_nu

del dfflxw_nu

<h1>Plot Flux Weights</h1>

In [None]:
flux_list = ['slc_flux_weight_expskin'
                ,'slc_flux_weight_horncurrent'
                ,'slc_flux_weight_kminus'
                ,'slc_flux_weight_kplus'
                ,'slc_flux_weight_kzero'
                ,'slc_flux_weight_nucleoninexsec'
                ,'slc_flux_weight_nucleonqexsec'
                ,'slc_flux_weight_nucleontotxsec'
                ,'slc_flux_weight_piminus'
                ,'slc_flux_weight_pioninexsex'
                ,'slc_flux_weight_pionqexsec'
                ,'slc_flux_weight_piontotxsec'
                ,'slc_flux_weight_piplus'
                ]

flux_name = ['Exposure Skin Flux Weight'
                ,'Horn Current Flux Weight'
                ,'Kaon Minus Flux Weight'
                ,'Kaon Plus Flux Weight'
                ,'Neutral Kaon Flux Weight'
                ,'Nucleon Ineslastic Cross Section Flux Weight'
                ,'Nucleon Quasi-Elastic Cross Section Flux Weight'
                ,'Nucleon Total Cross Section Flux Weight'
                ,'Pion Minus Flux Weight'
                ,'Pion Inelastic Cross Section Flux Weight'
                ,'Pion Quasi-Elastic Cross Section Flux Weight'
                ,'Pion Total Cross Section Flux Weight'
                ,'Pion Plus Flux Weight'
                ]

In [None]:
for var, name in zip(flux_list, flux_name):
    pltdf = df_hnl[var]

    pltdf = pltdf.explode()
    
    fig, ax1 = plt.subplots(1, figsize = (6,4))
    xmin=0.75
    xmax=2
    xnbin=40
    xlimmin = xmin
    xlimmax = xmax
    plot_1dhist( pltdf, 
                ax1,
                xmin, xmax, xnbin,
                xlimmin, xlimmax,
                ifnorm =False,
                histtype = 'step',
                linecolor = col_dict['Teal'], linewidth = 2,
                xtitle = name, ytitle = "Entries",
                ifstatbox = True, loc = 'best'
                )
    fig.tight_layout()
    if ifSave:
        plt.savefig(savePath+var+".png", dpi=200)
        
    plt.show()

<h1>Save The Universes Of Flux Weights</h1>

In [None]:
def check_flxw(x):
    if len(x['slc_flux_weight_expskin']) < 1000:
        print('run {0:} subrun {1:} event {2:} slc idx {3:}'.format(x['run'],x['subrun'],x['event'],x['slc_idx']))

In [None]:
def time_flxw(x):
    w_arr = []
    
    if len(x['slc_flux_weight_expskin']) < 1000:
        w_arr.append(1)
    else:
        for i in range(0, 1000):
            w = x['slc_flux_weight_expskin'][i] \
                * x['slc_flux_weight_horncurrent'][i] \
                * x['slc_flux_weight_kminus'][i] \
                * x['slc_flux_weight_kplus'][i] \
                * x['slc_flux_weight_kzero'][i] \
                * x['slc_flux_weight_nucleoninexsec'][i] \
                * x['slc_flux_weight_nucleonqexsec'][i] \
                * x['slc_flux_weight_nucleontotxsec'][i] \
                * x['slc_flux_weight_piminus'][i] \
                * x['slc_flux_weight_pioninexsex'][i] \
                * x['slc_flux_weight_pionqexsec'][i] \
                * x['slc_flux_weight_piontotxsec'][i] \
                * x['slc_flux_weight_piplus'][i] 
            
            w_arr.append(w)
    

    return w_arr

In [None]:
def make_df_flxw(df):
    
    #time all the flux weight together since they're correlated
    df['flxw'] = df.apply(lambda row: time_flxw(row), axis = 1)
    
    #drop no longer used columns
    df = df.drop(columns =['slc_flux_weight_expskin'
                ,'slc_flux_weight_horncurrent'
                ,'slc_flux_weight_kminus'
                ,'slc_flux_weight_kplus'
                ,'slc_flux_weight_kzero'
                ,'slc_flux_weight_nucleoninexsec'
                ,'slc_flux_weight_nucleonqexsec'
                ,'slc_flux_weight_nucleontotxsec'
                ,'slc_flux_weight_piminus'
                ,'slc_flux_weight_pioninexsex'
                ,'slc_flux_weight_pionqexsec'
                ,'slc_flux_weight_piontotxsec'
                ,'slc_flux_weight_piplus'
               ])
    
    #explode array into columns
    df_flxw = pd.DataFrame(df['flxw'].tolist(),index=df.index).add_prefix('flxw_')
    
    #drop column
    df = df.drop(columns='flxw')

    #merge 2 arrays together
    df = pd.concat([df, df_flxw], axis = 1)
    
    #add nominal flux weight = 1
    df['flxw_nom'] = 1
    
    del df_flxw
    return df

In [None]:
df_hnl = make_df_flxw(df_hnl)

In [None]:
#Theses dataframe are empty, dummy holders for now
#df_nu = make_df_flxw(df_nu)
#df_cos = make_df_flxw(df_cos)

<h1>SAVE THIS DATAFRAME</h1>

In [None]:
df_hnl.to_pickle("./df_hnl_flxw.pkl", protocol = 5)
df_nu.to_pickle("./df_nu_flxw.pkl", protocol = 5)
df_cos.to_pickle("./df_cos_flxw.pkl", protocol = 5)

<h1> START FROM HERE!!! LOAD PKL FILE WITH FLX WEIGHT ADDED </h1>

<h3>HNL</h3>

In [None]:
file = open("./df_hnl_flxw.pkl", 'rb')
df_hnl = pickle.load(file)
file.close()

<h3>TPC Neutrino</h3>

In [None]:
file = open("./df_nu_flxw.pkl", 'rb')
df_nu = pickle.load(file)
file.close()

<h3>Intime Cosmics</h3>

In [None]:
file = open("./df_cos_flxw.pkl", 'rb')
df_cos = pickle.load(file)
file.close()

<h1> Make Beam Bucket - Post PID</h1>

In [None]:
#Copy from text file that contains slice count before clear cosmics cut

true_signals = 17442 + 17207
true_nonfv_signals = 7608 + 7443
total_true_signals = 25050 + 24650
start_signals = 16653 + 16435
start_nonfv_signals = 6370 + 6261
total_start_signals = 23023 + 22696

true_counts = total_true_signals
start_counts = total_start_signals
print("true counts = " + str(true_counts))
print("start counts = " + str(start_counts))

In [None]:
scaleHNLPlot = 80

In [None]:
width = 19.02

hist, bins = plot_slc_var(df_hnl, df_nu, df_cos,
                    true_counts, start_counts, 
                    'mod_t', 
                    scaleHNLPlot,
                    xmin = 0, xmax = 19, xnbin = 19,
                    xtitle = 'Opt0 Time Corrected Z % ' + str(width) + ' [ns]'
                    )
plt.show()

In [None]:
df_hnl

<h1>Statistics Uncertainty</h1>

<h3>Scaling to ~10 events for hypothesis testing </h3>

In [None]:
simU = 1e-7
plotU = getUfromScaleFactor(simU, 80)

scaleFit= 1/100
fitU = getUfromScaleFactor(plotU, scaleFit)


print("Simulated U = " + str(simU))
print("Plot U = " + str(plotU))
print("Fit U = " + str(fitU))

In [None]:
df_hnl['scaleFit'] = scaleFit

In [None]:
fig, (ax1, ax2) = plt.subplots(1,2, figsize = (10,4))

xmin, xmax, xnbin = 0, 19, 19
xlimmin, xlimmax = xmin, xmax

pltdf = df_hnl['mod_t']
weights = df_hnl['scale_pot'] 

#-----------------------------------------------------------------#
n_stat_noScale, bins, _ = ax1.hist(
                            pltdf,
                            bins = np.arange(xmin, xmax+(xmax-xmin)/xnbin, (xmax-xmin)/xnbin),
                            weights = weights,
                            density = False,
                            histtype="step",
                            edgecolor = col_dict["Flamingo"],
                            linestyle = "-",
                            linewidth = 2,
                            label = "M = 200 MeV\n|U$_{\mu4}|^{2} = $" +str("{:.3g}".format(plotU))
                        )
ax1.set_xlim(10, 19)
ax1.set_ylim(0, 2200)
ax1.legend(loc = 'upper right',fontsize = 14)
plot_tick(ax1, 16)
plot_title(ax1, "", 'Opt0 Time Corrected Z % ' + str(width) + ' [ns]',  "Slices (1x10$^{21}$ POT)", 16)

#-----------------------------------------------------------------#
weights = df_hnl['scale_pot'] * df_hnl['scaleFit']
n_stat, _, _ = ax2.hist(
                            pltdf,
                            bins = np.arange(xmin, xmax+(xmax-xmin)/xnbin, (xmax-xmin)/xnbin),
                            weights = weights,
                            density = False,
                            histtype="step",
                            edgecolor = col_dict["Flamingo"],
                            linestyle = "-",
                            linewidth = 2,
                            label = "M = 200 MeV\n|U$_{\mu4}|^{2} = $" +str("{:.3g}".format(fitU))
                        )
ax2.set_xlim(10, 19)
ax2.set_ylim(0, 2200*scaleFit)
ax2.legend(loc = 'upper right',fontsize = 14)
plot_tick(ax2, 16)
plot_title(ax2, "", 'Opt0 Time Corrected Z % ' + str(width) + ' [ns]',  "Slices (1x10$^{21}$ POT)", 16)

fig.tight_layout()

plt.show()

In [None]:
print(n_stat)
n_stat = n_stat[10:]
stats_arr = 1/np.sqrt(n_stat)
print(stats_arr)

In [None]:
print(n_stat_noScale)
n_stat_noScale = n_stat_noScale[10:]
stats_noScale_arr = 1/np.sqrt(n_stat_noScale)
print(stats_noScale_arr)

In [None]:
#make array for plotting
n_stat_noScale_plot = np.insert(n_stat_noScale, 0, 0)
bins = bins[10:]
bins_mid = np.convolve(bins, [0.5, 0.5], "valid")

n_stat_plot = np.insert(n_stat, 0, 0)

In [None]:
fig, (ax1, ax2) = plt.subplots(1,2, figsize = (10,4))

xmin, xmax, xnbin = 0, 19, 19
xlimmin, xlimmax = xmin, xmax

pltdf = df_hnl['mod_t']
weights = df_hnl['scale_pot'] 

#-----------------------------------------------------------------#
#nStat NoScale
ax1.step(bins, n_stat_noScale_plot, color = col_dict['Flamingo']
         , label =  "M = 200 MeV\n|U$_{\mu4}|^{2} = $" +str("{:.3g}".format(plotU))
        )

ax1.errorbar(bins_mid, n_stat_noScale, stats_noScale_arr,
            ls='none',
            color = col_dict['Mauve'],
            capsize=8
            ,label = "Statistics Uncertainty"
           )

ax1.set_xlim(10, 19)
ax1.set_ylim(0, 2200)
ax1.legend(loc = 'upper right',fontsize = 14)
plot_tick(ax1, 16)
plot_title(ax1, "", 'Opt0 Time Corrected Z % ' + str(width) + ' [ns]',  "Slices (1x10$^{21}$ POT)", 16)

#-----------------------------------------------------------------#
weights = df_hnl['scale_pot'] * df_hnl['scaleFit']
ax2.step(bins, n_stat_plot, color = col_dict['Flamingo']
         , label =  "M = 200 MeV\n|U$_{\mu4}|^{2} = $" +str("{:.3g}".format(fitU))
        )

ax2.errorbar(bins_mid, n_stat, stats_arr,
            ls='none',
            color = col_dict['Mauve'],
            capsize=8
            ,label = "Statistics Uncertainty"
           )


ax2.set_xlim(10, 19)
ax2.set_ylim(0, 2200*scaleFit)
ax2.legend(loc = 'upper right',fontsize = 14)
plot_tick(ax2, 16)
plot_title(ax2, "", 'Opt0 Time Corrected Z % ' + str(width) + ' [ns]',  "Slices (1x10$^{21}$ POT)", 16)

fig.tight_layout()

if ifSave:
    plt.savefig(savePath+str("beam_bucket_scale_statistics.png"), dpi=200)
    
plt.show()

<h1>Flux Weights</h1>

In [None]:
n_arr = []

fig, ax = plt.subplots(1,1, figsize = (8,6))

xmin, xmax, xnbin = 0, 19, 19
xlimmin, xlimmax = xmin, xmax

pltdf = df_hnl['mod_t']

for i in range(0, 1000):
    weights = df_hnl['flxw_{}'.format(i)] *  df_hnl['scale_pot'] * df_hnl['scaleFit']   
    label = ''
    if i == 0:
        label = "Universes"
        
    n, _, _ = ax.hist(
                            pltdf,
                            bins = np.arange(xmin, xmax+(xmax-xmin)/xnbin, (xmax-xmin)/xnbin),
                            weights = weights,
                            density = False,
                            histtype="step",
                            edgecolor = col_dict["PastelGreen"],
                            linestyle = "-",
                            linewidth = 2,
                            label = label
                        )
    n_arr.append(n)

weights = df_hnl['scale_pot'] * df_hnl['flxw_nom'] * df_hnl['scaleFit']
    
n_cv, bins, _ = ax.hist(
                            pltdf,
                            bins = np.arange(xmin, xmax+(xmax-xmin)/xnbin, (xmax-xmin)/xnbin),
                            weights = weights,
                            density = False,
                            histtype="step",
                            edgecolor = col_dict["Flamingo"],
                            linestyle = "-",
                            linewidth = 2,
                            label = "Central Value \n(Flux Weight = 1)"
                        )

ax.legend(loc = 'upper right',fontsize = 14)

plot_tick(ax, 16)
plot_title(ax, "", 'Opt0 Time Corrected Z % ' + str(width) + ' [ns]',  "Slices (1x10$^{21}$ POT)", 16)

ax.set_xlim(10, 19)
ax.set_ylim(0, 2500*scaleFit)
fig.tight_layout()

if ifSave:
    plt.savefig(savePath+str("beam_bucket_flux_weight_universe.png"), dpi=200)
plt.show()

<h3>Calculate Mean and Std</h3>

In [None]:
#check xbin range
print("xbins range")
print(bins)

n_arr = np.array(n_arr)
print("bins")
print(n_arr)
#check shape
print("shape")
print(n_arr.shape)

#transpose 
n_trans = np.transpose(n_arr)

#get mean
mean_arr = n_trans.mean(axis=1)
mean_arr
print("mean")
print(mean_arr)

#get std
std_arr = np.array(n_trans).std(1)
std_arr
print("std")
print(std_arr)

<h3>Keep only relevant bins</h3>

In [None]:
print(mean_arr[10:])
print(std_arr[10:])
print(n_cv[10:])
print(bins[10:])

n_cv = n_cv[10:]
bins = bins[10:]

mean_arr = mean_arr[10:]
std_arr = std_arr[10:]

In [None]:
#make array for plotting
n_cv_plot = np.insert(n_cv, 0, 0)

bins_mid = np.convolve(bins, [0.5, 0.5], "valid")

mean_arr_plot = np.insert(mean_arr, 0, 0)

In [None]:
fig, ax = plt.subplots(1, 1, figsize = (8, 6))

#nominal
ax.step(bins, n_cv_plot, color = col_dict['Flamingo'], label = "Central Value \n(Flux Weight = 1)")

#universe 1 sigma
ax.errorbar(bins_mid, n_cv, std_arr,
            ls='none',
            color = col_dict['PastelGreen'],
            capsize=8
            ,label = "Universes 1 Sigma"
           )

#central value
ax.step(bins, mean_arr_plot, color = col_dict['MintGreen'], label = "Universes Mean")

#tick stuff
plot_title(ax,"", 'Opt0 Time Corrected Z % ' + str(width) + ' [ns]', "Slices (1x10$^{21}$ POT)", 16)
plot_tick(ax, 16)

ax.legend(loc="best", fontsize=14, fancybox=True, ncol = 1)

ax.set_xlim(10,19)
ax.set_ylim(0, 2200*scaleFit)

fig.tight_layout()
if ifSave:
    plt.savefig(savePath+str("beam_bucket_flux_weight_mean_std.png"), dpi=200)
plt.show()

<h1>Plot Everything</h1>

<h3>Make Fractional Error</h3>

In [None]:
stats_frac_arr = stats_arr / n_cv * 100
std_frac_arr = std_arr / n_cv * 100

In [None]:
print(stats_frac_arr)
print(std_frac_arr)

In [None]:
#make array for plotting
stats_frac_plot = np.insert(stats_frac_arr, 0, 0)
std_frac_plot = np.insert(std_frac_arr, 0, 0)

In [None]:
f, (ax1, ax2) = plt.subplots(2, 1, gridspec_kw={'height_ratios': [3, 1]}, figsize = (8, 8), sharex = True)

#nominal
ax1.step(bins, n_cv_plot, color = col_dict['Flamingo']
         , label = "M = 200 MeV\n|U$_{\mu4}|^{2} = $" +str("{:.3g}".format(fitU))
        )

ax2.step(bins, stats_frac_plot, color = col_dict['Mauve'], label = "Statistics")
ax2.step(bins, std_frac_plot, color = col_dict['PastelGreen'], label = "Flux")

#tick stuff
plot_title(ax1,"", '',"Slices (1x10$^{21}$ POT)" , 16)
plot_tick(ax1, 16)

plot_title(ax2,"", 'Opt0 Time Corrected Z % ' + str(width) + ' [ns]', "Fractional Error [%]", 16)
plot_tick(ax2, 16)

ax1.legend(loc="best", fontsize=14, fancybox=True, ncol = 1)
ax2.legend(loc="best", fontsize=14, fancybox=True, ncol = 1)

ax1.set_xlim(10,19)
ax1.set_ylim(0, 2200*scaleFit)

ax2.set_xlim(10,19)
ax2.set_ylim(0, 30)

fig.tight_layout()

if ifSave:
    plt.savefig(savePath+str("beam_bucket_fractional_error.png"), dpi=200)
plt.show()

<h1>Make Fake Background For Now</h1>

In [None]:
bkg = np.zeros(len(n_cv))

In [None]:
print(bkg)
print(n_cv)

<h1>Save Array</h1>

In [None]:
hnl_dict= {
    '200': {
        'U': fitU,
        'sig': n_cv,
        'sig_flux': std_arr,
        'sig_stats': stats_arr,
        'bkg': bkg,
        'bkg_flux': bkg,
        'bkg_stats': bkg
    }
}

In [None]:
print(hnl_dict)

In [None]:
np.save('./HNL.npy', hnl_dict) 