In [None]:
# This notebook turns numpy toys from different off-axis bins, which are events per energy*off-axis angle*universe
# to virtual flux measurements, which are cross section per energy*universe arrays. Virtual flux coefficients and stat
# uncertainties are calculated on the go. The results are then used to generate the plots in DiffXSec.

# Prerequisites:
# 1) Statistical ELep toys in numpy form, location: NumpyELepStat/ELepStatToy{i}.npy
# 2) Systematics ELep toys in numpy form, location: NumpyELep/ELepToy{i}.npy
# 3) True E_nu-ELep toys in numpy form, location: NumpyTrueOmega/TrueToy{i}.npy - Unnecessary for the final plot, used mostly for unfolding studies, but should be commented out if unused
# 4) Mode histograms in numpy form, location: "Mode"+modes[i]+"Numpy_20Years.npy"

In [1]:
# Imports and helper functions

import sys
import numpy as np
import matplotlib.pyplot as plt
from ipynb.fs.full.CoefficientsCalcPlus import GetCoefficientsFlux
from ipynb.fs.full.CoefficientsCalcPlus import get_normalization
import sklearn.linear_model
import sklearn.linear_model as linear_model
import scipy
import warnings; warnings.simplefilter('ignore')
import os
from matplotlib.pyplot import figure
import random as rnd
import matplotlib.ticker as ticker
#import cvxpy as cp
import uproot4 as uproot

def rebin(arr, factor):
    new_size = arr.size // factor
    remainder = arr.size % factor
    
    if remainder != 0:
        arr = arr[:-remainder]
        
    return np.sum(arr.reshape(-1, factor), axis=1)

def rebinned_bin_centers(nbins, xmin, xmax, factor):
    # Compute the bin widths for the original histogram
    bin_width = (xmax - xmin) / nbins
    
    # Compute the bin edges for the original histogram
    edges = np.linspace(xmin, xmax, nbins + 1)
    
    # Compute the new bin widths after rebinning
    new_bin_width = bin_width * factor
    
    # Compute the number of new bins
    new_nbins = int(nbins / factor)
    
    # Compute the bin edges for the rebinned histogram
    new_edges = np.linspace(xmin, xmax, new_nbins + 1)
    
    # Compute the bin centers for the rebinned histogram
    centers = (new_edges[1:] + new_edges[:-1]) / 2
    
    return centers

def shift(toy,energy):
    toy = np.roll(toy,energy,axis=-1)
    toy[..., -energy:] = 0
    return toy

In [None]:
def virtual_measurement(rebin_factor,energy,energy_width,input_flux_alpha=-1):
    if input_flux_alpha == -1:
        flux_alpha_flag = False
    else:
        flux_alpha_flag = True
    flux_alpha_flag = False if input_flux_alpha == -1 else True
    
    flux_alpha = input_flux_alpha if flux_alpha_flag else 1e-12
    
    ## Calling this creates a 900 x 58 x (bins) numpy array containing 900 toys ready for building a virtual flux with center = energy.
    coeffs, std = GetCoefficientsFlux(1e-3*energy,1e-3*energy_width,flux_alpha,model=sklearn.linear_model.Ridge,years=1)
    coeffs = np.array(coeffs)
    normalization = get_normalization(coeffs)
    energy_bin_centers = rebinned_bin_centers(16000,-8000,8000,rebin_factor)/1000
    
    ## Get toys
    # Reco toys are -ELep histograms and are shifted to E-ELep histograms where E is the mode of the virtual flux
    reco_toys = []
    for i in range(733):
        filename = f'NumpyELep/ELepToy{i}.npy'
        toy = np.load(filename)
        toy = shift(toy,energy)
        toy = np.apply_along_axis(rebin,1,toy,rebin_factor)
        reco_toys.append(toy)

    # True toys are E_nu-ELep histograms
    true_toys = []
    for i in range(733):
        filename = f'NumpyTrueOmega/TrueToy{i}.npy'
        toy = np.load(filename)[:,:8000]
        pad_width = [(0, 0), (8000, 0)]  # pad 8000 cells of 0 at the start of each row
        toy = np.pad(toy, pad_width, mode='constant')
        toy = np.apply_along_axis(rebin,1,toy,rebin_factor)
        true_toys.append(toy)
    
    print("Done creating correctly binned toys")
    
    reco_toys_array = np.array(reco_toys)
    true_toys_array = np.array(true_toys)
    
    # Reco distribution are the weighted sums of measurements (= virtual flux measurements)
    reco_array = [np.sum(reco_toys_array[i] * coeffs[:, np.newaxis], axis=0) for i in range(len(reco_toys_array))]
    reco_array = np.array(reco_array)

    # Same for true distributions
    true_array = [np.sum(true_toys_array[i] * coeffs[:, np.newaxis], axis=0) for i in range(len(true_toys_array))]
    true_array = np.array(true_array)
    
    
    # Normalization calculation, which are used to convert events to cross section
    E = (12/201)*1.1e21
    nucleons = 1.3954*(2*3*0.574)*1e3/1.66e-27
    ftilde = 1e38*(1/(E*nucleons*get_normalization(coeffs)))/(1e-3*rebin_factor)
    years = 20
    
    reco = ftilde*reco_array.mean(axis=0)
    true = ftilde*true_array.mean(axis=0)
    np.save("TrueEnergy"+str(energy)+"BinWidth"+str(rebin_factor)+".npy",true)
    
    loc = energy
    
    # Stat toys are toys based on 20 years of data
    rand_recos_stat = []
    for i in range(1000):
        if i % 100 == 0:
            print("Getting reco stat toy "+str(i)+" at "+str(datetime.datetime.now()))
        filename = f'NumpyELepStat/ELepStatToy{i}.npy'
        toy = np.load(filename)
        toy = shift(toy,energy)
        toy = np.apply_along_axis(rebin,1,toy,rebin_factor)
        virtual_flux = toy.transpose().dot(np.array(coeffs))
        rand_recos_stat.append(ftilde*virtual_flux/years)
    rand_recos_stat = np.array(rand_recos_stat)
    
    rand_recos_sys = ftilde*reco_array
    
    print("Recos stat length = "+str(rand_recos_stat.shape))
    print("Recos sys length = "+str(rand_recos_sys.shape))
    
    print("Done getting stat and sys toys")
     
    np.save("VirtualMeasurementResults/SysRecoEnergy"+str(energy)+"BinWidth"+str(rebin_factor)+"FluxWidth"+str(energy_width)+"Alpha"+str(flux_alpha)+".npy",rand_recos_sys.transpose())
    np.save("VirtualMeasurementResults/StatRecoEnergy"+str(energy)+"BinWidth"+str(rebin_factor)+"FluxWidth"+str(energy_width)+"Alpha"+str(flux_alpha)+".npy",rand_recos_stat.transpose())
    
    print("Done!")

In [None]:
# This scripts calls virtual_measurement to generate results for different gaussians with different binning.

modes = ['CCQE','RES','2p2h','Other']
modes_arrays = [np.load("Mode"+modes[i]+"Numpy_20Years.npy") for i in range(len(modes))]

# res = []
for energy in [500,625,750,875,1000]: #[500,625,750,875,1000] MeV gaussian peaks (modes)
    for rebin_factor in [40,60,80,100]: #[40,60,80,100] MeV bins
        for width in [70,100,130]: #[70,100,130] MeV gaussian sigma
            print('('+str(energy)+','+str(width)+','+str(rebin_factor)+')')
            virtual_measurement(rebin_factor,energy,width,input_flux_alpha=1e-12)
            # After calling virtual_measurement results are saved.
            for i in range(len(modes)):
                ## This is for creating mode hists
                coeffs, std = GetCoefficientsFlux(1e-3*energy,1e-3*width,1e-12,model=sklearn.linear_model.Ridge,years=1)
                coeffs = np.array(coeffs)
                normalization = get_normalization(coeffs)

                E = (12/201)*1.1e21
                nucleons = 1.3954*(2*3*0.574)*1e3/1.66e-27
                ftilde = 1e38*(1/(E*nucleons*get_normalization(coeffs)))/(1e-3*rebin_factor*years)

                true_res_modes = rebin(ftilde*np.sum(modes_arrays[i] * coeffs[:, np.newaxis], axis=0),rebin_factor)
                np.save("FlexibleUnfoldingResults/TrueEnergy"+str(energy)+"BinWidth"+str(rebin_factor)+"FluxWidth"+str(width)+"Mode"+modes[i]+".npy",true_res_modes)

In [None]:
# This part creates mode CV histograms based on the .npy defined in ToysROOTtoNumpy.

modes_res = np.load("ModesELepCV.npy")

def modify_array(arr, k):
    if k >= arr.size:
        return np.zeros(k)
    else:
        return np.concatenate((np.zeros(k), arr[:-k]))
    
years = 1
modes = ['CCQE','RES','2p2h','Other']

for energy in [500,625,750,875,1000]: #[500,625,750,875,1000]
    for rebin_factor in [40,60,80,100]: #[40,60,80,100]
        for width in [70,100,130]: #[70,100,130]
            print('('+str(energy)+','+str(width)+','+str(rebin_factor)+')')
            modified_modes_res = np.apply_along_axis(modify_array, axis=2, arr=modes_res, k=energy)
            for i in range(len(modes)):
                ## This is for creating mode hists
                coeffs, std = GetCoefficientsFlux(1e-3*energy,1e-3*width,1e-12,model=linear_model.Ridge,years=1)
                coeffs = np.array(coeffs)
                normalization = get_normalization(coeffs)

                E = (12/201)*1.1e21
                nucleons = 1.3954*(2*3*0.574)*1e3/1.66e-27
                ftilde = 1e38*(1/(E*nucleons*get_normalization(coeffs)))/(1e-3*rebin_factor*years)

                shifted_mode_res = rebin(ftilde*np.sum(modified_modes_res[i] * coeffs[:, np.newaxis], axis=0),rebin_factor)
                print(shifted_mode_res.shape)
                np.save("FlexibleUnfoldingResults/OmegaReco"+str(energy)+"BinWidth"+str(rebin_factor)+"FluxWidth"+str(width)+"Mode"+modes[i]+".npy",shifted_mode_res))