In [4]:
# This notebook is used to turn ROOT toys to numpy arrays,
# to be used in NumpyToysToMeasurements.ipynb or IntegratedXSec.ipynb
# Separated into parts (with some overlaps and repetitions)

# Integrated cross section - 4 GeV cut toys

In [None]:
# Prerequisites:
# 1) norms.txt - a file with the estimated yearly event rate according to the run plan specified in the paper.
# 2) Gaussian fluxes

In [4]:
import os
import ROOT
import numpy as np
import uproot4 as uproot
from ipynb.fs.full.CoefficientsCalcPlus import GetCoefficientsFlux
from ipynb.fs.full.CoefficientsCalcPlus import get_normalization
from sklearn import linear_model
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import inspect
#plt.style.use('seaborn-colorblind')

%jsroot on

In [None]:
# Comment:
# Since toy production tends to fail occasionally due to grid issues, I examined the output files to figure out a threshold over which all files are successful productions.
# In any case, had I included working toys, the conversion to .npy process would fail. This should be done manually for one's set of toys.

toy_files = []
min_size = 13255*1024 # Should be updated to set a threshold, and see comment above
directory = "IntegratedROOTToys/"

sizes = []
for file in os.listdir(directory):
    size = os.path.getsize(os.path.join(directory,file))
    sizes.append(size)
    if size < min_size:
        continue
    if file.startswith("Toy"):
        toy_files.append(directory+file)
plt.plot(sorted(sizes)[::-1])
toy_nums = [toy_files[i].replace(".","y").split("y")[3] for i in range(len(toy_files))]

normalization = open("norms.txt", "r").read().split("\n")[:-1]
normalization = np.array([float(n) for n in normalization])

In [None]:
for toy in range(len(toy_files)):
    toy_res = []
    if toy%100 == 0:
        print(toy)
    toy_file = ROOT.TFile.Open(toy_files[toy],"read")
    for oa_bin in range(58):
        h = toy_file.Get('OmegaRecoHistToy'+toy_nums[toy]+'OAB'+str(oa_bin+1))
        entries = 1e7 if oa_bin+1!=56 else 9e6 # This was used since in our generated simulation, off-axis bin 56 had 9e6 events instead of 1e7 (due to a technical issue)
        h.Scale(normalization[oa_bin]/entries)
        toy_res.append([h.GetBinContent(i) for i in range(1,16000+1)])
    np.save("NumpyOmega4000CutFixed/OmegaToy"+str(toy),np.array(toy_res))
    toy_file.Close()

In [None]:
full_res = np.array([np.array([np.array(np.load("NumpyOmega4000CutFixed/OmegaToy"+str(k)+".npy")[i]) for i in range(58)]) for k in range(len(toy_files))])
np.save("NumpyOmega4000CutFixed/FullResults.npy",full_res)

# Integrated cross section - no cut on visible energy

In [None]:
import numpy as np
from ipynb.fs.full.CoefficientsCalcPlus import GetCoefficientsFlux
from ipynb.fs.full.CoefficientsCalcPlus import get_normalization
import sklearn.linear_model as linear_model
import scipy
import matplotlib.pyplot as plt
import os
import ROOT

In [None]:
def merge_toy_files(folder_path):
    # List all files in the directory
    files = os.listdir(folder_path)
    
    # Filter out files that match the pattern ELepToy#.npy
    toy_files = sorted([f for f in files if f.startswith('ELepToy') and f.endswith('.npy')], key=lambda x: int(x[7:-4]))
    
    # Load each numpy file and store it in a list
    toy_arrays = [np.load(os.path.join(folder_path, f)) for f in toy_files]
    
    # Stack all arrays along a new first axis
    merged_array = np.stack(toy_arrays, axis=0)
    
    return merged_array

folder_path = 'NumpyELep' #Folder where ELep toys are stored
merged_array = merge_toy_files(folder_path)
print(merged_array.shape)  # Print the shape of the merged array to verify

np.save('NumpyELep/full_res',merged_array)
full_res_no_cut = merged_array

In [None]:
seeds = {0.07:{0.5:30288,0.75:16134,1:29554,1.25:23129,1.5:22920,1.75:22516,2:25490},0.1:{0.5:28604,0.75:17160,1:12196,1.25:29728,1.5:19473},0.078:{0.75:27454}}
loc = 0.75
width = 0.078
locstring = str(loc).replace(".","")
widthstring = str(width).replace(".","")
xsecs = {}
for width in seeds:
    xsecs[width] = {}
    for loc in seeds[width]:
        locstring = str(loc).replace(".","")
        widthstring = str(width).replace(".","")
        file = ROOT.TFile.Open("NuWro/test"+locstring+"l"+widthstring+"w/flat_test"+locstring+"l"+widthstring+"w_Ar_SF_numu_NuWroOut_"+str(seeds[width][loc])+".root")
        t = file.Get("FlatTree_VARS")
        for event in t:
            xsecs[width][loc] = 1e6*event.fScaleFactor
            break
        file.Close()

vals = [xsecs[0.07][x] for x in xsecs[0.07].keys()]
locs = xsecs[0.07].keys()

In [None]:
xsec_no_cut = []
dxsec_sys_no_cut = []
dxsec_stat_no_cut = []
energies = np.array([1e3*loc for loc in locs])
denergies = []
sys_results_no_cut = []
stat_results_no_cut = []
years = 5

full_res_cv_total_no_cut = full_res_no_cut.mean(axis=0).sum(axis=1)
res_stat_no_cut = np.array([np.random.poisson(years*full_res_cv_total_no_cut) for i in range(1000)])

for energy in energies:
    alpha = 1e-12
    coeffs, std = GetCoefficientsFlux(1e-3*energy,0.07,alpha,model=linear_model.Ridge,years=1)
    norm = get_normalization(coeffs)
    
    rebin_factor = 1
    E = (12/201)*1.1e21
    nucleons = 1.3954*(2*3*0.574)*1e3/1.66e-27
    ftilde = 1e38*(1/(E*nucleons*get_normalization(coeffs)))/(1e-3*rebin_factor)
    
    denergies.append(1e3*std)

    ## Sys
    expanded_coeffs = np.expand_dims(coeffs, axis=1)
    result = full_res_no_cut * expanded_coeffs
    result = np.sum(result, axis=1)
    sys_result = np.sum(result, axis=1)
    sys_results_no_cut.append(sys_result)
    
    xsec_no_cut.append((ftilde*sys_result).mean()*0.001)
    
    ## Stat
    temp_res = res_stat_no_cut @ expanded_coeffs
    stat_result = temp_res * ftilde * 0.001 / years
    stat_results_no_cut.append(np.squeeze(stat_result,axis=1))

sys_results_no_cut = np.array(sys_results_no_cut)
stat_results_no_cut = np.array(stat_results_no_cut)

In [None]:
cv = np.array(xsec_no_cut)
true = 1e38*np.array(vals)
if energies[0] == 500:
    energies = np.array([1e-3*energy for energy in energies])
    denergies = np.array([1e-3*denergy for denergy in denergies])

## Systematics
sys_results_t = sys_results_no_cut.T # Shape: [#toys, #energies]
row_sums = np.sum(sys_results_t, axis=1)
expanded_sums = np.expand_dims(row_sums, axis=1)
sys_results_normed = sys_results_t / expanded_sums
sys_results_normed[:,-1] = np.array(row_sums)

cov_test = np.cov(sys_results_normed.T)
sums_mean = sys_results_no_cut.T.sum(axis=1).mean()
#shape = np.sqrt(cov_test.diagonal()[:-1])
shape = np.sqrt(cov_test.diagonal()[:-1])/(sys_results_normed.T[:-1,:].mean(axis=1))
shape_unc = np.abs(cv*np.append(shape,0))
norm_unc = cv*np.sqrt(cov_test[-1,-1])/sums_mean

## Statistics
stat_unc = np.sqrt(np.diagonal(np.cov(stat_results_no_cut)))

## Stat + Shape
stat_shape_unc = np.sqrt(stat_unc**2+shape_unc**2)

## By the end of this block, cv = reco; centers = x axis; shape_unc and norm_unc are the decomposed uncertainties

deco_cov = np.cov(sys_results_normed.T)

In [None]:
final_res = np.stack([cv,stat_shape_unc,shape_unc])
np.save('IntegratedNoCutRes.npy',final_res)

#  ELep toys (systematic) for differential cross section analysis

In [None]:
import os
import ROOT
import numpy as np
import uproot4 as uproot
from ipynb.fs.full.CoefficientsCalcPlus import GetCoefficientsFlux
from ipynb.fs.full.CoefficientsCalcPlus import get_normalization
from sklearn import linear_model
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import inspect
import re
#plt.style.use('seaborn-colorblind')

%jsroot on

In [None]:
# Comment:
# Since toy production tends to fail occasionally due to grid issues, I examined the output files to figure out a threshold over which all files are successful productions.
# In any case, had I included working toys, the conversion to .npy process would fail. This should be done manually for one's set of toys.

toy_files = []
min_size = 13255*1024 # Should be updated to set a threshold, and see comment above
directory = "IntegratedROOTToys/"

sizes = []
for file in os.listdir(directory):
    size = os.path.getsize(os.path.join(directory,file))
    sizes.append(size)
    if size < min_size:
        continue
    if file.startswith("Toy"):
        toy_files.append(directory+file)
plt.plot(sorted(sizes)[::-1])
toy_nums = [toy_files[i].replace(".","y").split("y")[3] for i in range(len(toy_files))]

normalization = open("norms.txt", "r").read().split("\n")[:-1]
normalization = np.array([float(n) for n in normalization])

In [None]:
# True toys - they're unused if the analysis doesn't include unfolding
for toy in range(len(toy_files)):
    toy_res = []
    if toy%100 == 0:
        print(toy)
    toy_file = ROOT.TFile.Open(toy_files[toy],"read")
    for oa_bin in range(58):
        h = toy_file.Get('OmegaTrueHistToy'+toy_nums[toy]+'OAB'+str(oa_bin+1))
        h.Scale(normalization[oa_bin]/h.GetEntries())
        omega_bins = [h.GetBinCenter(i) for i in range(1,16000+1)]
        toy_res.append([h.GetBinContent(i) for i in range(1,16000+1)])
    np.save("NumpyTrueOmega/TrueToy"+str(toy),np.array(toy_res))
    toy_file.Close()

In [None]:
#ELep toys
for toy in range(len(toy_files)):
    toy_res = []
    if toy%100 == 0:
        print(toy)
    toy_file = ROOT.TFile.Open(toy_files[toy],"read")
    for oa_bin in range(58):
        h = toy_file.Get('ELepHistToy'+toy_nums[toy]+'OAB'+str(oa_bin+1))
        h.Scale(normalization[oa_bin]/h.GetEntries())
        omega_bins = [h.GetBinCenter(i) for i in range(1,16000+1)]
        toy_res.append([h.GetBinContent(i) for i in range(1,16000+1)])
    np.save("NumpyELep/ELepToy"+str(toy),np.array(toy_res))
    toy_file.Close()

# ELep toys (statistical) for differential cross section analysis

In [None]:
# Prerequisite: ELep toys in the format "NumpyELep/ELepToy"+str(i)+".npy", created in the block below

In [None]:
full = [np.load("NumpyELep/ELepToy"+str(i)+".npy") for i in range(900)]
full_res = np.array(full).mean(axis=0)
np.save("off_axis_ELep_CV_1MeV",full_res)

In [None]:
off_axis_cvs = np.load("off_axis_ELep_CV_1MeV.npy")
toys = [np.random.poisson(20*off_axis_cvs) for i in range(1000)]
for i in range(len(toys)):
    np.save(f'NumpyELepStat/ELepStatToy{i}.npy',toys[i])

# Mode histograms (CV)

In [None]:
# This block creates .npy files that store 

In [None]:
files = []
path = '/eos/project-n/neutrino-generators/generatorOutput/NuWro/PRISM_studies/SF/proj'

for i in range(1,59):
    for f in os.listdir(path+str(i)):
        if f.startswith('flat') and "flat" in f and f.endswith('.root'):
        #if True:
            if os.path.getsize(path+str(i)+"/"+f) > 1000:
                files.append(f)
                break

files = [path+str(i+1)+"/"+files[i] for i in range(58)]

path = '../SWAN_projects/XSec_ROOT/TTreeMerges/'
files = ["../XSec_ROOT/TTreeMerges/proj"+str(i+1)+"_merged.root" for i in range(58)]

coeffs, std = GetCoefficientsFlux(0.75,0.07,1e-12,model=sklearn.linear_model.Ridge,years=1)
coeffs = np.array(coeffs)
norm = get_normalization(coeffs)
years = 20
res = [] #[toy][omega bin]
rebin = 60
omega_bins = [rebin*i+rebin/2 for i in range(int(-8000/rebin),int(8000/rebin))]
normalization = open("norms.txt", "r").read().split("\n")[:-1]
normalization = np.array([float(n) for n in normalization])

total_Nbins = 16000
bin_min = -8000
bin_max = 8000

modes = {"CCQE": "Mode == 1",
         "2p2h": "Mode == 2",
         "RES": "Mode == 11 || Mode == 12 || Mode == 13",
         "Other": "Mode != 1 && Mode != 2 && Mode != 11 && Mode != 12 && Mode != 13"}

reco_hist = ROOT.TH1F("reco","reco",total_Nbins,bin_min,bin_max)
true_hist = ROOT.TH1F("true","true",total_Nbins,bin_min,bin_max)
mode_hists = [ROOT.TH1F(mode,mode,total_Nbins,bin_min,bin_max) for mode in modes]

modes_arrays = [[],[],[],[]]
bins = 58
for i in range(bins):
    if i%10 == 0:
        print(i)
    file = ROOT.TFile.Open(files[i],"read")
    reco_added = ROOT.TH1F("reco_added"+str(i),"added",total_Nbins,bin_min,bin_max)
    true_added = ROOT.TH1F("true_added"+str(i),"added",total_Nbins,bin_min,bin_max)
    modes_added = [ROOT.TH1F(mode+"_added"+str(i),"added",total_Nbins,bin_min,bin_max) for mode in modes]
    t = file.Get("FlatTree_VARS")
    t.Project("reco_added"+str(i),"750-ELep") #reco
    t.Project("true_added"+str(i),"Enu_true-ELep") #true
    reco_added.Scale(years*normalization[i]/t.GetEntries())
    true_added.Scale(years*normalization[i]/t.GetEntries())
    for mode in modes:
        t.Project(mode+"_added"+str(i),"Enu_true-ELep",modes[mode])
    cnt = 0
    for mode_added in modes_added:
        mode_added.Scale(years*normalization[i]/t.GetEntries())
        modes_arrays[cnt].append([mode_added.GetBinContent(j) for j in range(1,mode_added.GetNbinsX()+1)])
        cnt += 1
    for j in range(total_Nbins):
        reco_added.SetBinError(j,np.sqrt(reco_added.GetBinContent(j)))
    reco_hist.Add(reco_added,coeffs[i])
    true_hist.Add(true_added,coeffs[i])
    for j in range(len(mode_hists)):
        mode_hists[j].Add(modes_added[j],coeffs[i])
    file.Close()
reco_hist.Rebin(rebin)
true_hist.Rebin(rebin)
for mode_hist in mode_hists:
    mode_hist.Rebin(rebin)


In [None]:
for i in range(4):
    print(list(modes.keys())[i])
    name = "Mode"+str(list(modes.keys())[i])+"Numpy_20Years"
    print(name)
    np.save(name,np.array(modes_arrays[i]))

In [None]:
## Create numpy arrays of -Elep for each off-axis angle
files = []
path = '/eos/project-n/neutrino-generators/generatorOutput/NuWro/PRISM_studies/SF/proj'

for i in range(1,59):
    for f in os.listdir(path+str(i)):
        if f.startswith('flat') and "flat" in f and f.endswith('.root'):
        #if True:
            if os.path.getsize(path+str(i)+"/"+f) > 1000:
                files.append(f)
                break

files = [path+str(i+1)+"/"+files[i] for i in range(58)]

path = '../SWAN_projects/XSec_ROOT/TTreeMerges/'
files = ["../XSec_ROOT/TTreeMerges/proj"+str(i+1)+"_merged.root" for i in range(58)]

years = 20
res = [] #[toy][omega bin]
rebin = 60
omega_bins = [rebin*i+rebin/2 for i in range(int(-8000/rebin),int(8000/rebin))]
normalization = open("norms.txt", "r").read().split("\n")[:-1]
normalization = np.array([float(n) for n in normalization])

total_Nbins = 16000
bin_min = -8000
bin_max = 8000

modes = {"CCQE": "Mode == 1",
         "2p2h": "Mode == 2",
         "RES": "Mode == 11 || Mode == 12 || Mode == 13",
         "Other": "Mode != 1 && Mode != 2 && Mode != 11 && Mode != 12 && Mode != 13"}

mode_hists = [ROOT.TH1F(mode,mode,total_Nbins,bin_min,bin_max) for mode in modes]

modes_arrays = [[],[],[],[]]
bins = 58
for i in range(bins):
    if i%10 == 0:
        print(i)
    file = ROOT.TFile.Open(files[i],"read")
    modes_added = [ROOT.TH1F(mode+"_added"+str(i),"added",total_Nbins,bin_min,bin_max) for mode in modes]
    t = file.Get("FlatTree_VARS")
    for mode in modes:
        t.Project(mode+"_added"+str(i),"-ELep",modes[mode])
    cnt = 0
    for mode_added in modes_added:
        mode_added.Scale(years*normalization[i]/t.GetEntries())
        modes_arrays[cnt].append([mode_added.GetBinContent(j) for j in range(1,mode_added.GetNbinsX()+1)])
        cnt += 1
    file.Close()

In [22]:
cv = np.save("ModesELepCV.npy",modes_arrays)