In [1]:
# This notebook can be used to plot our results for the integrated cross section analysis.
# Prerequisites:
# 1) "Perfect" gaussian NuWro files,
# NuWro/test"+locstring+"l"+widthstring+"w/flat_test"+locstring+"l"+widthstring+"w_Ar_SF_numu_NuWroOut_"+str(seeds[width][loc])+".root
# And see the relevant block for more details on the format
# 2) Thin gaussian NuWro generated .root files, same format
# 3) Full set of toys with E_recoil < 4 GeV in the format of a numpy array with shape (N_toys, 58 off-axis bins, 16000 energy bins)
# 4) Background rates, given in "background.txt" and generated with CalculateBackground.txt.
# 5) Prepared results without the E_recoil cut, with shape (3,7) where in the 0 axis,
# res[0] = measurement
# res[1] = uncertainty (stat+shape)
# res[2] = uncertainty (shape only)
# Actually 5) is by no means necessary, one could compute this on the go. The limitations of the SWAN configurations in terms of cached memory made it
# impossible to load 2 sets of toys in the same sessions so I had to resort to precalculating the results. You could do this calculation using the exact same
# code here with simple modifications. In any case the code I used to do this is in ToysROOTtoNumpy.ipynb.

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
from ipynb.fs.full.CoefficientsCalcPlus import GetCoefficientsFlux
from ipynb.fs.full.CoefficientsCalcPlus import get_normalization
import sklearn.linear_model as linear_model
import scipy
import warnings; warnings.simplefilter('ignore')
import os
import re
from matplotlib.pyplot import figure
import random as rnd
import matplotlib.ticker as ticker
#import cvxpy as cp
import uproot4 as uproot
import scipy.stats
import seaborn as sns
import ROOT
from scipy.interpolate import make_interp_spline

Welcome to JupyROOT 6.30/04


In [3]:
# Helper functions

def get_normalization(coeffs):
    with uproot.open("/eos/home-a/amgruber/SWAN_projects/DUNEPRISM_XSec_SDClone_EditedVersion/Fluxes.ND.root") as fFluxes :
        oa_flux = fFluxes['LBNF_numu_flux'].values()
        energy_bins = fFluxes['LBNF_numu_flux'].axis(0).edges() # in GeV
        angle_bins = fFluxes['LBNF_numu_flux'].axis(1).edges() # in milliradians
    oa_flux = oa_flux.transpose()
    first_bin_flux = 8.9769702e-08
    norm = (oa_flux[0].sum()*0.01)/first_bin_flux
    oa_flux = oa_flux/norm
    print(oa_flux[0].sum()*0.01)
    oa_flux = oa_flux.transpose()
    lin_combo = np.matmul(oa_flux, coeffs)
    return lin_combo.sum()*0.01

def get_normalization_cutoff(coeffs,cutoff):
    # Cutoff in GeV
    cutoff_bin = int(cutoff*(800/8))
    with uproot.open("/eos/home-a/amgruber/SWAN_projects/DUNEPRISM_XSec_SDClone_EditedVersion/Fluxes.ND.root") as fFluxes :
        oa_flux = fFluxes['LBNF_numu_flux'].values()
        energy_bins = fFluxes['LBNF_numu_flux'].axis(0).edges() # in GeV
        angle_bins = fFluxes['LBNF_numu_flux'].axis(1).edges() # in milliradians
    oa_flux = oa_flux.transpose()
    first_bin_flux = 8.9769702e-08
    norm = (oa_flux[0].sum()*0.01)/first_bin_flux
    oa_flux = oa_flux/norm
    print(oa_flux[0].sum()*0.01)
    oa_flux = oa_flux.transpose()
    lin_combo = np.matmul(oa_flux, coeffs)
    return np.sum(lin_combo[:cutoff_bin])*0.01

def get_gaussian_std(prediction,energy_bin_centers,oa_events):
    gfit = optimize.curve_fit(gaussian,energy_bin_centers,prediction)[0]
    loc,scale = gfit[0],gfit[1]
    return [scale,loc]

def gaussian(x,loc,scale):
    return (1/(scale*((2*np.pi)**0.5)))*np.exp((-((x-loc)/scale)**2)/2)*rescaling

def get_virtual_flux(energy,width,plot=True,alpha=1e-12):
    # Get DUNE near detector muon neutrino flux as a function of off-axis angle
    with uproot.open("Fluxes.ND.root") as fFluxes :
        oa_flux = fFluxes['LBNF_numu_flux'].values()
        energy_bins = fFluxes['LBNF_numu_flux'].axis(0).edges() # in GeV
        angle_bins = fFluxes['LBNF_numu_flux'].axis(1).edges() # in milliradians

    N_target = 1.435e30 #[nucleon]
    E = 3.62e19 #[POT per year]
    epsilon = 1

    target_loc = 1e-3*energy
    target_scale = 1e-3*width
    rescaling = oa_flux.max()
    print(rescaling)
    energy_bin_centers = np.add(energy_bins[:-1], energy_bins[1:])/2.
    target_flux = [scipy.stats.norm.pdf(x, loc = target_loc, scale = target_scale)*rescaling for x in energy_bin_centers] #here we should scale the gaussian

    clf = linear_model.Ridge(alpha=alpha,fit_intercept=False) #when fit_intercept=False OA(dot)x = predict(x)
    clf.fit(oa_flux,target_flux)
    x_ridge = clf.coef_

    if plot==True:
        #figure(figsize=(8, 6))
        plt.plot(energy_bin_centers, target_flux, label = "Target Flux")
        plt.plot(energy_bin_centers, clf.predict(oa_flux), label = "Virtual Flux")
        plt.xlabel(r"E$_{\nu}$ [GeV]")
        #plt.xlim([0, 1.5])

        plt.legend();
        plt.grid(True, color = "grey", linewidth = "1", linestyle = "-")
        plt.show()
    
    return energy_bin_centers, clf.predict(oa_flux)

def shift(toy,energy):
    toy = np.roll(toy,energy,axis=-1)
    toy[..., -energy:] = 0
    return toy

In [None]:
# Fetch 70 MeV gaussian cross section (using fScaleFactor, which is a NuWro output)
# Seeds are required using the naming convention we used in our NuWro-generated .root files and should be updated manually

seeds = {0.07:{0.5:30288,0.75:16134,1:29554,1.25:23129,1.5:22920,1.75:22516,2:25490},0.1:{0.5:28604,0.75:17160,1:12196,1.25:29728,1.5:19473},0.078:{0.75:27454}}
loc = 0.75
width = 0.078
locstring = str(loc).replace(".","")
widthstring = str(width).replace(".","")
xsecs = {}
for width in seeds:
    xsecs[width] = {}
    for loc in seeds[width]:
        locstring = str(loc).replace(".","")
        widthstring = str(width).replace(".","")
        file = ROOT.TFile.Open("NuWro/test"+locstring+"l"+widthstring+"w/flat_test"+locstring+"l"+widthstring+"w_Ar_SF_numu_NuWroOut_"+str(seeds[width][loc])+".root")
        t = file.Get("FlatTree_VARS")
        for event in t:
            xsecs[width][loc] = 1e6*event.fScaleFactor
            break
        file.Close()

vals = [xsecs[0.07][x] for x in xsecs[0.07].keys()]
locs = xsecs[0.07].keys()

In [None]:
# Load the (N_toys, 58 off-axis bins, 16000 energy bins) numpy array which contains the full set of toys required for simulating the uncertainties

full_res = np.load("../FluxTest/NumpyOmega4000CutFixed/FullResults.npy")

# Load the background

background = np.loadtxt("background.txt")

In [None]:
# Calculate cross sections based on full_res 

xsec = []
dxsec_sys = []
dxsec_stat = []
energies = np.array([1e3*loc for loc in locs])
denergies = [] # Gaussian widths
sys_results = []
stat_results = []
years = 5

full_res_cv_total = full_res.mean(axis=0).sum(axis=1)
res_stat = np.array([np.random.poisson(years*full_res_cv_total) for i in range(1000)])

for energy in energies:
    alpha = 1e-12
    coeffs, std = GetCoefficientsFlux(1e-3*energy,0.07,alpha,model=linear_model.Ridge,years=1)
    norm = get_normalization(coeffs)
    
    # Normalization
    rebin_factor = 1
    E = (12/201)*1.1e21
    nucleons = 1.3954*(2*3*0.574)*1e3/1.66e-27
    ftilde = 1e38*(1/(E*nucleons*get_normalization_cutoff(coeffs,4)))/(1e-3*rebin_factor)
    
    denergies.append(1e3*std)
    
    ## Sys
    expanded_coeffs = np.expand_dims(coeffs, axis=1)
    expanded_coeffs = np.expand_dims(coeffs*(1-background), axis=1)
    result = full_res * expanded_coeffs
    result = np.sum(result, axis=1)
    sys_result = np.sum(result, axis=1)
    sys_results.append(sys_result)
    
    xsec.append((ftilde*sys_result).mean()*0.001)
    
    ## Stat
    temp_res = res_stat @ expanded_coeffs
    stat_result = temp_res * ftilde * 0.001 / years
    stat_results.append(np.squeeze(stat_result,axis=1))

sys_results = np.array(sys_results)
stat_results = np.array(stat_results)

In [6]:
cv = np.array(xsec)
true = 1e38*np.array(vals)
if energies[0] == 500:
    energies = np.array([1e-3*energy for energy in energies])
    denergies = np.array([1e-3*denergy for denergy in denergies])

## Systematics, using Norm-Shape decomposition
sys_results_t = sys_results.T # Shape: [#toys, #energies]
row_sums = np.sum(sys_results_t, axis=1)
expanded_sums = np.expand_dims(row_sums, axis=1)
sys_results_normed = sys_results_t / expanded_sums
sys_results_normed[:,-1] = np.array(row_sums)

cov_test = np.cov(sys_results_normed.T)
sums_mean = sys_results.T.sum(axis=1).mean()
shape = np.sqrt(cov_test.diagonal()[:-1])/(sys_results_normed.T[:-1,:].mean(axis=1))
shape_unc = np.abs(cv*np.append(shape,0))
norm_unc = cv*np.sqrt(cov_test[-1,-1])/sums_mean

## Statistics
stat_unc = np.sqrt(np.diagonal(np.cov(stat_results)))

## Stat + Shape
stat_shape_unc = np.sqrt(stat_unc**2+shape_unc**2)

## By the end of this block, cv = reco; centers = x axis; shape_unc and norm_unc are the decomposed uncertainties

deco_cov = np.cov(sys_results_normed.T)

In [8]:
# Get true cross section, based on thin fluxes (effectively 0.0001 GeV width gaussians)

thin_fluxes_path = "NuWro/"

def process_flat_file(file_path):
    values = []
    file = ROOT.TFile.Open(file_path, "READ")
    t = file.Get("FlatTree_VARS")
    for event in t:
        values.append(1e38 * 1e6 * event.fScaleFactor)
        break
    file.Close()
    return values

def convert_folder_name_to_float(folder_name):
    digits = re.search(r'test(\d+)', folder_name).group(1)
    return float(f"{digits[0]}.{digits[1:]}")

def collect_values(target_directory):
    all_values = []
    folder_floats = []
    for item in os.listdir(target_directory):
        item_path = os.path.join(target_directory, item)
        if os.path.isdir(item_path) and re.match(r'test\d+', item):
            folder_float = convert_folder_name_to_float(item)
            folder_floats.append(folder_float)
            for file in os.listdir(item_path):
                if file.startswith('flat'):
                    flat_file_path = os.path.join(item_path, file)
                    all_values.extend(process_flat_file(flat_file_path))
                    break
    return all_values, folder_floats

target_directory = thin_fluxes_path+'ThinFluxes'  # Change this to your target directory
all_values, folder_floats = collect_values(target_directory)

true_thin = [all_values[folder_floats.index(val)] for val in energies]

# Spline between datapoints
energy_spline = np.linspace(np.array(folder_floats).min(), np.array(folder_floats).max(), 1000)
true_spline = make_interp_spline(folder_floats, all_values)(energy_spline)

In [10]:
# Get integrated result with no cut

nocutres = np.load('../FluxTest/IntegratedNoCutRes.npy')
xsec_nocut = nocutres[0]
dxsec_stat_shape_nocut = nocutres[1]
dxsec_shape_nocut = nocutres[2]

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.colors as mcolors

# Set global plotting parameters
plt.rcParams['text.usetex'] = True
plt.rcParams['font.size'] = 32
plt.rcParams['xtick.labelsize'] = 32
plt.rcParams['ytick.labelsize'] = 32

# Plotting configurations
lwidth = 2.5
capwidth = 1.5
col = 'mediumvioletred'
colors = ['#377eb8', '#ff7f00', '#4daf4a', '#f781bf', '#a65628', 
          '#984ea3', '#999999', '#e41a1c', '#dede00']

# Generate lighter colors for uncertainty bands
lighter_colors = [
    (*[(1 + c) / 2 for c in mcolors.to_rgba(col)[:3]], mcolors.to_rgba(col)[3]) 
    for col in colors
]

# Create a figure with two subplots (main plot and residuals)
fig, (ax1, ax2) = plt.subplots(nrows=2, sharex=True, figsize=(12, 8), 
                               gridspec_kw={'height_ratios': [4, 1]}, dpi=300)
plt.gcf().set_facecolor('white')

# Residual calculation
residuals = -(true_thin - cv)

# Plotting the no-cut data (background gray lines)
nocutcolor = str(0.4)
ax1.errorbar(energies[:-1], xsec_nocut[:-1], xerr=denergies[:-1], yerr=dxsec_stat_shape_nocut[:-1],
             color=nocutcolor, fmt='_', linewidth=lwidth, capsize=3, capthick=2, ecolor=nocutcolor)

# Adding the legend for the no-cut data
legend_label = 'PRISM flux (no cut) \n Shape (inner) $\\bigoplus$ Stat (outer) Unc'
ax1.errorbar([0], [0], xerr=[0], yerr=[0], color='black', fmt='_', 
             label=legend_label, linewidth=lwidth, capsize=2, capthick=capwidth, ecolor='black')

# Plot each energy point with error bars
for i in range(len(energies) - 1):
    # Plot the total uncertainty (lighter color)
    ax1.errorbar(energies[:-1][i], cv[:-1][i], yerr=total_uncertainty[:-1][i],
                 color=lighter_colors[i], fmt=' ', linewidth=lwidth, capsize=3, 
                 capthick=capwidth, ecolor=lighter_colors[i], zorder=10)

    # Plot statistical and shape uncertainties (darker color)
    ax1.errorbar(energies[:-1][i], cv[:-1][i], xerr=denergies[:-1][i], yerr=stat_shape_unc[:-1][i],
                 color=colors[i], fmt='_', linewidth=lwidth, capsize=3, 
                 capthick=capwidth, ecolor=colors[i], zorder=10)
    
    # Plot residuals with error bars
    ax2.errorbar(energies[:-1][i], residuals[:-1][i], yerr=total_uncertainty[:-1][i],
                 color=lighter_colors[i], fmt=' ', linewidth=lwidth, capsize=3, 
                 capthick=capwidth, ecolor=lighter_colors[i], zorder=10)

# Plot the true Gaussian flux for reference
ax1.errorbar(energies[:-1], true[:-1], xerr=[0.07] * len(true[:-1]), fmt='.', 
             color='black', label='Perfect Gaussian flux')

# Plot the theoretical curve
ax1.plot(energy_spline, true_spline, color='black', linestyle='--', 
         label=r'True ($\Phi(E_{\nu}) = \delta(E_{\nu}-\tilde{E}_{\nu})$)')

# Formatting the main plot
ax1.set_xlim(0.3, 2)
ax1.set_ylim(0, 2.5)
ax1.set_ylabel("$\langle \\sigma \\rangle$ [$10^{-38}\mathrm{cm}^{2}$]")
ax1.grid(True, color="lightgray", linewidth=0.8, linestyle="--")

# Formatting the residuals plot
ax2.axhline(y=0, color='black', linestyle='--')
ax2.set_ylim(-0.25, 0.25)
ax2.set_xlabel("$E_{\\nu}$ [$\mathrm{GeV}$]")
ax2.set_ylabel(r"$\rm True - PRISM$")
ax2.grid(True, color="lightgray", linewidth=0.8, linestyle="--")

# Adding a text annotation
ax1.text(0.8, 0.94, r"DUNE Simulation", transform=ax1.transAxes, fontsize=18, 
         color='grey', verticalalignment='bottom', horizontalalignment='left')

# Adding the legend with customized order and style
handles, labels = ax1.get_legend_handles_labels()
order = [0, 2, 1, 3]
legend = ax1.legend([handles[idx] for idx in order], [labels[idx] for idx in order], 
                    fontsize=22, frameon=False, loc='upper left', bbox_to_anchor=(0., 1.05))

plt.subplots_adjust(hspace=0.1)

# Display the plot
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# === Global Plot Settings ===
plt.rcParams['text.usetex'] = True
plt.rcParams['font.size'] = 30
plt.rcParams['xtick.labelsize'] = 28
plt.rcParams['ytick.labelsize'] = 28
plt.gcf().set_facecolor('white')

# === Configuration ===
font = 30
ticks = np.arange(len(energies) - 1)

# === 1️⃣ Statistical Correlation Matrix ===
print("Plotting Statistical Correlation Matrix...")
fig, ax = plt.subplots(figsize=(8, 6), dpi=300)
fig.set_facecolor('white')
covariance_stat = np.cov(stat_results[:-1, :-1])
corrcoef_stat = np.corrcoef(stat_results[:-1, :-1])

# Plot the matrix
im = ax.imshow(corrcoef_stat, cmap='coolwarm')
ax.set_xticks(ticks)
ax.set_xticklabels([f"${energy}$" for energy in energies[:-1]], rotation=45, fontsize=font - 2)
ax.set_yticks(ticks)
ax.set_yticklabels([f"${energy}$" for energy in energies[:-1]], fontsize=font - 2)
ax.set_title("Statistical Correlation", fontsize=font)
ax.set_xlabel("$E_{\\nu}$ [$\mathrm{GeV}$]", fontsize=font)
ax.set_ylabel("$E_{\\nu}$ [$\mathrm{GeV}$]", fontsize=font)

# Add color bar
plt.colorbar(im, fraction=0.046, pad=0.04)
plt.clim(-1, 1)
plt.tight_layout()
plt.show()


# === 2️⃣ Systematic Correlation Matrix ===
print("Plotting Systematic Correlation Matrix...")
fig, ax = plt.subplots(figsize=(8, 6), dpi=300)
fig.set_facecolor('white')
covariance_sys = np.cov(sys_results[:-1, :-1])
corrcoef_sys = np.corrcoef(sys_results[:-1, :-1])

# Plot the matrix
im = ax.imshow(corrcoef_sys, cmap='coolwarm')
ax.set_xticks(ticks)
ax.set_xticklabels([f"${energy}$" for energy in energies[:-1]], rotation=45, fontsize=font - 2)
ax.set_yticks(ticks)
ax.set_yticklabels([f"${energy}$" for energy in energies[:-1]], fontsize=font - 2)
ax.set_title("Systematic Correlation", fontsize=font)
ax.set_xlabel("$E_{\\nu}$ [$\mathrm{GeV}$]", fontsize=font)
ax.set_ylabel("$E_{\\nu}$ [$\mathrm{GeV}$]", fontsize=font)

# Add color bar
plt.colorbar(im, fraction=0.046, pad=0.04)
plt.clim(-1, 1)
plt.tight_layout()
plt.show()


# === 3️⃣ Total Correlation Matrix ===
print("Plotting Total Correlation Matrix...")
fig, ax = plt.subplots(figsize=(8, 6), dpi=300)
fig.set_facecolor('white')
covariance_matrix = covariance_stat + covariance_sys
sqrt_diagonal = np.sqrt(np.diag(covariance_matrix))
correlation_matrix = covariance_matrix / (sqrt_diagonal[:, None] * sqrt_diagonal[None, :])

# Plot the matrix
im = ax.imshow(correlation_matrix, cmap='coolwarm')
ax.set_xticks(ticks)
ax.set_xticklabels([f"${energy}$" for energy in energies[:-1]], rotation=45, fontsize=font - 2)
ax.set_yticks(ticks)
ax.set_yticklabels([f"${energy}$" for energy in energies[:-1]], fontsize=font - 2)
ax.set_title("Total Correlation", fontsize=font)
ax.set_xlabel("$E_{\\nu}$ [$\mathrm{GeV}$]", fontsize=font)
ax.set_ylabel("$E_{\\nu}$ [$\mathrm{GeV}$]", fontsize=font)

# Add color bar
plt.colorbar(im, fraction=0.046, pad=0.04)
plt.clim(-1, 1)
plt.tight_layout()
plt.show()


# === 4️⃣ Decomposed Shape and Normalization Covariance ===
print("Plotting Decomposed Shape and Normalization Covariance...")
fig, ax = plt.subplots(figsize=(8, 6), dpi=300)
fig.set_facecolor('white')
deco_cov = np.cov(sys_results_normed.T)

# Plot the matrix
im = ax.imshow(deco_cov, cmap='coolwarm')
ax.set_xticks(ticks)
ax.set_xticklabels([f"${energy}$" for energy in energies[:-1]], rotation=45, fontsize=font - 2)
ax.set_yticks(ticks)
ax.set_yticklabels([f"${energy}$" for energy in energies[:-1]], fontsize=font - 2)
ax.set_title("Decomposed Shape and Normalization Covariance", fontsize=font)
ax.set_xlabel("$E_{\\nu}$ [$\mathrm{GeV}$]", fontsize=font)
ax.set_ylabel("$E_{\\nu}$ [$\mathrm{GeV}$]", fontsize=font)

# Add color bar
plt.colorbar(im, fraction=0.046, pad=0.04)
plt.clim(-1, 1)
plt.tight_layout()
plt.show()
