In [None]:
# Import sys, then tell python where to find the nEXO-specific classes
import sys
sys.path.append('../../../modules')

In [None]:
# Import useful libraries for analysis
import pandas as pd
import hist
from hist import Hist
import numpy as np
from matplotlib import pyplot as plt
import os
from pathlib import Path 
import hashlib
import itertools
import glob
import io
from contextlib import redirect_stdout, redirect_stderr

import seaborn as sns
sns.set_style('white')
sns.set_context('poster')

plt.rcParams.update({'font.size': 18})
plt.rcParams['figure.figsize'] = (10,8)

from cycler import cycler
plt.rcParams['axes.prop_cycle'] = cycler(color='bgrcmyk')

In [None]:
# Import the nEXO sensitivity classes
import nEXOFitWorkspace
import nEXOFitModel
import nEXOFitLikelihood

# Setup

In [None]:
basedir = Path('/p/lustre2/nexouser/samuele/multivarstudy')
outputdir = basedir / 'output'
plotdir = Path('plots')
os.makedirs(plotdir, exist_ok=True)

In [None]:
# import dnn smoothing results obtained from the dnn_smoothing notebook
dnn_factors_df = pd.read_csv('dnn_smoothing_results.csv')
dnn_factors_df = dnn_factors_df.set_index('dnn_f')

# Set default value based on chosen signal efficiency 
signal_efficiency = 80
dnn_factors_df['dnn_cut_value'] = dnn_factors_df[f'dnn_cut_value_{signal_efficiency}']
dnn_factors_df['bkg_misID'] = dnn_factors_df[f'bkg_misID_{signal_efficiency}']

display(dnn_factors_df)

In [None]:
dnn_factors = dnn_factors_df.index
xe137_scale_factors = [1., 1.2, 1.5, 2., ]     # 1., 1.2, 1.5, 2., 3., 
rn222_scale_factors = [1., 1.2, 1.5, 2., ]     # 1., 1.2, 1.5, 2., 3., 
bkg_scale_factors = [1., 1.2, 1.5, 2., ]       # 1., 1.2, 1.5, 2., 3., 
energy_res_factors = [0.008, 0.01, 0.011, 0.012, 0.014]  # 0.008, 0.01, 0.011, 0.012, 0.014

df = pd.DataFrame(columns=['tag', 'dnn_f', 'bkg_misID', 'xe137_f', 'rn222_f', 'bkg_f', 
                           'energy_res_f', 'convergence_f', 
                           'median', 'sens', 'sens_1e28'])
df = df.set_index('tag')
display(df)

In [None]:
results_file = Path('multivar_study_results.h5')
# delete prior data if needed by uncommenting the following line
# this will result in all data being reloaded which may take some time:
# results_file.unlink()

In [None]:
# load prior data if available
if results_file.is_file():
    df = pd.read_hdf(results_file) 
#     display(df)
# df = df.drop(index=['4A78FC', '1E85A6', 'AD42F2'])
existing_tags = df.index
print(existing_tags)

In [None]:
def get_tag(xe137_scale_factor, rn222_scale_factor, dnn_scale_factor, bkg_scale_factor, energy_res):
    # This should match the hash string used in Compute90PercentLimit script 
    # FIXME: hash should not be calculated both here and Compute90PercentLimit
    s = f'Xe137:{xe137_scale_factor:0>4.4f} ' + \
        f'Rn222:{rn222_scale_factor:0>4.4f} ' + \
        f'DNN:{dnn_scale_factor:0>4.4f} ' + \
        f'Bkg:{bkg_scale_factor:0>4.4f} ' + \
        f'ERes:{energy_res:0>4.4f}'
    return hashlib.md5(s.encode('utf-8')).hexdigest()[:6].upper(), s

In [None]:
def get_expected_counts(df, xe137_scale_factor, rn222_scale_factor, dnn_f, bkg_scale_factor, energy_res_f):
    
    # Note that I use ComponentsTable and Config file that define a much finer binning
    config = '/g/g92/samuele/nEXO/sensitivity/work/SensitivityPaper2020_scripts/' \
             'MultivariationStudy/Sensitivity2020_Optimized_DNN_Standoff_Binning_version1_fineBinning.yaml'
    input_table = basedir / 'ComponentsTables' / f'ComponentsTable_D-024_DNN_factor={dnn_f}_ERes={energy_res_f}_fineBinning.pkl.gz'    
    
    if not input_table.is_file(): 
        return
    
    tag, s = get_tag(xe137_scale_factor, rn222_scale_factor, dnn_f, bkg_scale_factor, energy_res_f)

#     h5_files = glob.glob(f'{outputdir}/*{tag}*.h5')
    
#     if h5_files:
    workspace = nEXOFitWorkspace.nEXOFitWorkspace(config)
    workspace.LoadComponentsTableFromFile(input_table)

    for index, row in workspace.df_components.iterrows():
    # Scale the gamma ray background components, except radon
        isotopes_to_leave_alone = ['Ar42', 'Xe137', 'bb2n', 'bb0n', 'B8nu', 'Rn222', ]  # just for bookkeeping
        isotopes_to_scale = ['K40', 'Co60', 'Al26', 'Th232', 'U238', 'Cs137']
        # The format is <isotope>_<part>, e.g. "Th232_HVCables"
        if row['PDFName'].split('_')[0] in isotopes_to_scale:
            print(f'Scaling {row["PDFName"]}...')
            workspace.df_components.loc[index, 'SpecActiv'] = bkg_scale_factor * row['SpecActiv']
            workspace.df_components.loc[index, 'SpecActivErr'] = bkg_scale_factor * row['SpecActivErr']

        # Scale the Xe137 and Ar42 components.
        if 'Xe137' in row['PDFName'] or 'Ar42' in row['PDFName']:
            workspace.df_components.loc[index, 'SpecActiv'] = xe137_scale_factor * row['SpecActiv']
            workspace.df_components.loc[index, 'SpecActivErr'] = xe137_scale_factor * row['SpecActivErr']

    workspace.CreateGroupedPDFs()

    # Define the ROI within the workspace
    # Note that the ROI energy depends on the resolution since it's +/- FWHM/2
    # The DNN cut is set at the value for 85% signal efficiency
    fwhm = 2457 * energy_res_f * 2.35
    roi_dict = {'DNN': [dnn_factors_df.loc[dnn_f]['dnn_cut_value'], 1.],
                'Energy (keV)': [2457. - fwhm/2., 2457. + fwhm/2.],
                'Standoff (mm)': [104.5, 650.]}
    workspace.DefineROI(roi_dict)
#         print(workspace.df_components.loc[0])

    # Create the likelihood object
    likelihood = nEXOFitLikelihood.nEXOFitLikelihood()
    likelihood.AddPDFDataframeToModel(workspace.df_group_pdfs, workspace.histogram_axis_names)

    initial_guess = likelihood.GetVariableValues()

    # Scale the Rn222 component according to the input value
    rn222_idx = likelihood.model.GetVariableIndexByName('Rn222')
    initial_guess[rn222_idx] *= rn222_scale_factor

    # Update the model in the likelihood object
    likelihood.model.UpdateVariables(initial_guess)
    likelihood.model.GenerateModelDistribution()

    # Print out the number of events in the ROI
    df.at[tag, 'TotalROIBkg'] = likelihood.model.GetIntegralInBinRange(workspace.GetROIBinIndices())

    for component in likelihood.model.variable_list:
        if 'Shape' in component['Name']:
            continue
        num_counts_in_roi = likelihood.model.GetComponentIntegralInBinRange(
            component['Name'], workspace.GetROIBinIndices())
        df.at[tag, component['Name']] = num_counts_in_roi
#         print(f'{component["Name"] + ":":<20}\t'
#               f'{num_counts_in_roi:>10.4}\t'
#               f'{int(1000 * num_counts_in_roi / total_bkg_in_roi) / 10.:>10.4}%')

# Load Data

In [None]:
def load_fit_results(tag, s = None):
    h5_files = glob.glob(f'{outputdir}/*{tag}*.h5')
    if h5_files:
        print(f'Loading data for tag {tag}')
        if s: print(s)
        df_list = [pd.read_hdf(outputdir/Path(filename)) for filename in h5_files]
        df_tmp = pd.concat(df_list, ignore_index=True)
        print(f'---> Loaded {len(df_tmp)} toy datasets')
        return df_tmp
    return None

In [None]:
dfdict = dict()
for dnn_scale_factor, xe137_scale_factor, rn222_scale_factor, bkg_scale_factor, energy_res \
        in itertools.product(dnn_factors, xe137_scale_factors, rn222_scale_factors, bkg_scale_factors,
                             energy_res_factors):

    tag, s = get_tag(xe137_scale_factor, rn222_scale_factor, dnn_scale_factor, bkg_scale_factor, energy_res)
    
    if tag not in existing_tags: 
    
        res = load_fit_results(tag)

        if res is not None:
            dfdict[tag] = res
            data = {'dnn_f':dnn_scale_factor, 'bkg_misID':dnn_factors_df.loc[dnn_scale_factor]['bkg_misID'],
                    'xe137_f':xe137_scale_factor, 'rn222_f':rn222_scale_factor, 
                    'bkg_f':bkg_scale_factor, 'energy_res_f':energy_res, }
            df.loc[tag] = data
        
    
        if tag in df.index:
            with redirect_stdout(io.StringIO()):    # capture output so it won't pollute the terminal
                with redirect_stderr(io.StringIO()):
                    get_expected_counts(df, xe137_scale_factor, rn222_scale_factor, dnn_scale_factor, bkg_scale_factor, energy_res)  
            print(f"---> Loaded expected counts for {tag}. Total ROI counts: {df.at[tag, 'TotalROIBkg']}")
            print('---------------------------')

#     else:
#         print(f'No data found for tag {tag}')

In [None]:
display(df)

In [None]:
# df_tmp = dfdict['A516CD']
# df_tmp.head()
# print("Number of Toys:", len(df_tmp))

# Compute the convergence fraction

In [None]:
def get_good_fit_mask(df_fit):
    good_fit_mask = []
    for index,row in df_fit.iterrows():
        if np.sum(row['fixed_fit_acc_covar']) < len(row['fixed_fit_acc_covar'])-2 \
            or not row['best_fit_covar'] \
            or row['90CL_crossing'] < 0.01\
            or row['best_fit_nll'] > 0.:
            good_fit_mask.append(False)
        else:
            good_fit_mask.append(True)
    return good_fit_mask

In [None]:
good_fit_mask_dict = {}
for tag, df_fit in dfdict.items():
    if tag in existing_tags: continue  # don't process if already exists in dataframe
    good_fit_mask = get_good_fit_mask(df_fit)
    convergence_f = np.sum(good_fit_mask)/len(df_fit)
    print(f'Tag {tag} convergence fraction: {convergence_f:3.3}')
    df.at[tag, 'convergence_f'] = convergence_f
    good_fit_mask_dict[tag] = good_fit_mask

In [None]:
display(df.loc[df['convergence_f']<0.99])

# Compute Median and 90% CL Sensitivity

In [None]:
def calc_atoms_136(enrichment_fraction=0.9):
    """ Number of Xe136 atoms in nEXO fiducial volume """
    mmass134 = 0.133905395  # kg/mol 134
    mmass136 = 0.135907219  # kg/mol 136
    at_frac = enrichment_fraction           # atomic fraction 136 / (136 + 134)
    avog_num = 6.022141E23  # Avogadro's number
    fid_mass = 3281         # mass of fiducial volume [kg]

    atoms136 = (fid_mass * avog_num * at_frac) / ((mmass136 * at_frac) + ((1 - at_frac) * mmass134))

    return atoms136


def ComputeHalflifeFromNumCounts(counts, enrichment_fraction=0.9, lifetime=10):
    """Calculate the sensitivity of nEXO in terms of half-life (years)"""
    atoms136 = calc_atoms_136(enrichment_fraction)
    eff = 0.9598  # hit efficiency
    sensitivity = eff * atoms136 * lifetime * np.log(2) / counts
    return sensitivity


def ComputeCountsFromHalflife(sensitivity, enrichment_fraction=0.9, lifetime=10):
    """Calculate the sensitivity of nEXO in terms of half-life (years)"""
    atoms136 = calc_atoms_136(enrichment_fraction)
    eff = 0.9598  # hit efficiency
    counts = eff * atoms136 * lifetime * np.log(2) / sensitivity
    return counts
    

# def ComputeHalflifeFromNumCounts( counts, enrichment_fraction=0.9 ):
#     '''Returns the half life which would give (on average) the input number of signal counts'''
    
#     N_A = 6.02e23 # atoms/mol
#     FidMass = 3281 # kg
#     AtomicMass = 135.8 # g/mol
#     SigEfficiency = 0.96
#     Livetime = 10 # years
#     return FidMass*1.e3 / AtomicMass * N_A * enrichment_fraction * Livetime * \
#             SigEfficiency * np.log(2) / counts


In [None]:
print('Tag\tMedian [cts]\tSensitivity [y]')
fig, ax = plt.subplots()
for tag, df_fit in dfdict.items():
    if tag in existing_tags: continue  # don't process if already exists in dataframe
    h = Hist.new.Regular(121, 0, 60, name="signal_counts").Double()
    values = df_fit['90CL_crossing'].loc[good_fit_mask_dict[tag]]
    h.fill(values)
    median = np.median(values)
    df.at[tag, 'median'] = median
    df.at[tag, 'sens'] = ComputeHalflifeFromNumCounts(median)
    print(f'{tag}\t{median:.3f}\t{df.at[tag, "sens"]:.3e}')
#     print(f'Results for tag {tag}')
#     print(f'   Median 90%CL on signal counts: {median:.3f}')
#     print(f'   Sensitivity: {df.at[tag, "sens"]:.3e} years')

    h.plot1d(ax=ax, label=f'{tag}')

plt.legend()
plt.xlabel('90% CL on signal counts for given toy')
plt.yscale('log')

In [None]:
# format_mapping = {"sens": "{:.3e}", }
# df.style.format(format_mapping)
# df['sens_str'] = df['sens'].apply('{:.3e}'.format)
df['sens_1e28'] = df['sens']/1e28
df = df.sort_values(by=['sens'])

In [None]:
display(df)
df.to_csv('multivar_study_results.csv')
df.to_hdf(results_file, 'df')

In [None]:
d = df.loc[df['xe137_f']==2.0]
display(d)
df_res_d = {}
for e_res in [0.008, 0.01, 0.012]:
    dd = d.loc[df['energy_res_f'] == e_res]

In [None]:
df.columns

# Plots grouped by DNN Smearing factor

In [None]:
# colors = ['r', 'b', 'g', 'orange']
colors = sns.color_palette('tab10')
alphas = [1.0, 0.6, 0.4, 0.2]
for i, dnn_f in enumerate([0., 0.15, 0.177, 0.2]):
    d = df.loc[(df['dnn_f']==dnn_f) & (df['rn222_f'] == df['bkg_f']) & (df['rn222_f'] == df['xe137_f'])]
    #display(d.sort_values(by=['energy_res_f']))
    for j, bkg_f in enumerate([1.0, 1.2, 1.5, 2.0]):
        dd = d.loc[df['bkg_f'] == bkg_f]
        dd = dd.sort_values(by=['energy_res_f'])
        if not len(dd): continue
#         if bkg_f == 1.5:
#             dt = df.loc[(df['dnn_f']==dnn_f) & (df['rn222_f'] == df['bkg_f']) & (df['xe137_f'] == 1.) & (df['bkg_f'] == bkg_f)]
#             if len(dt):
#                 plt.plot(dt.energy_res_f*100, dt.sens_1e28,'--s', color=colors[i], 
#                      markersize=10, linewidth=3, alpha=alphas[j], label=f"Bkg_f: {bkg_f} (no Xe137 scaling)") #DNN_f: {dnn_f}
#                 for (x,y,z) in zip(dt.energy_res_f*100, dt.sens_1e28, dt.TotalROIBkg/10.):
#                     if x == min(dt.energy_res_f)*100:
#                         plt.gca().annotate(f'{z:.2f}', xy=(x,y), xytext=(5,5), textcoords='offset points', 
#                                            color=colors[i], fontsize=14)
        plt.plot(dd.energy_res_f*100, dd.sens_1e28,'-o', color=colors[i], 
                 markersize=10, linewidth=3, alpha=alphas[j], label=f"Bkg_f: {bkg_f}") #DNN_f: {dnn_f}
        for (x,y,z) in zip(dd.energy_res_f*100, dd.sens_1e28, dd.TotalROIBkg/10.):
#             if x == min(dd.energy_res_f)*100:
            plt.gca().annotate(f'{z:.2f}', xy=(x,y), xytext=(5,5), textcoords='offset points', 
                                   color=colors[i], fontsize=14)
    
    plt.gca().annotate(r'ROI Bkg values in units of SS cts/(FWHM$\cdot$2000kg$\cdot$y)'
                       + '\n' + fr'SS cut based on {signal_efficiency}% $0\nu\beta\beta$ efficiency', 
                       xy=(9,6), xycoords='axes points', color=colors[i], fontsize=12)       
    plt.xlim((0.75,1.45))    
    plt.ylim((0.8,1.45))    
    plt.title(f"DNN Background MisID: {dnn_factors_df.loc[dnn_f]['bkg_misID']:.1f}% (f={dnn_f})")
    plt.xlabel(r'$\sigma/E$ Energy Resolution [%]')
    plt.ylabel(r'Halflife Sensitivity [$\times 10^{28}$ yrs]')
    handles, labels = plt.gca().get_legend_handles_labels() # get existing handles and labels
    plt.legend(handles, labels, fontsize=16)
    plt.axhline(y=1.0, color='black', linestyle='--', linewidth=3,)
    plt.grid()
    plt.savefig(plotdir / f'Multivar_study_by_DNN_{dnn_f}.png', bbox_inches = "tight", 
                transparent=False, facecolor='white', dpi=200)
    plt.show()

# Plots grouped by Energy Resolution

In [None]:
# colors = ['r', 'b', 'g', 'orange']
colors = sns.color_palette('tab10')
alphas = [1.0, 0.7, 0.5, 0.3]
for i, energy_res_f in enumerate([0.008, 0.01, 0.011, 0.012]):
    d = df.loc[(df['energy_res_f']==energy_res_f) & (df['rn222_f'] == df['bkg_f']) & (df['rn222_f'] == df['xe137_f'])]
    #display(d.sort_values(by=['energy_res_f']))
    for j, bkg_f in enumerate([1.0, 1.2, 1.5, 2.0]):
        dd = d.loc[df['bkg_f'] == bkg_f]
        dd = dd.sort_values(by=['bkg_misID'])
        if not len(dd): continue
#         if bkg_f == 1.5:
#             dt = df.loc[(df['energy_res_f']==energy_res_f) & (df['rn222_f'] == df['bkg_f']) & (df['xe137_f'] == 1.) & (df['bkg_f'] == bkg_f)]
#             if len(dt):
#                 plt.plot(dt.bkg_misID, dt.sens_1e28,'--s', color=colors[i], 
#                      markersize=10, linewidth=3, alpha=alphas[j], label=f"Bkg_f: {bkg_f} (no Xe137 scaling)") #DNN_f: {dnn_f}
#                 for (x,y,z) in zip(dt.bkg_misID, dt.sens_1e28, dt.TotalROIBkg/10.):
#                     plt.gca().annotate(f'{z:.2f}', xy=(x,y), xytext=(5,5), textcoords='offset points', 
#                                        color=colors[i], fontsize=14)
        plt.plot(dd.bkg_misID, dd.sens_1e28,'-o', color=colors[i], 
                 markersize=10, linewidth=3, alpha=alphas[j], label=f"Bkg_f: {bkg_f}") # , ERes: {energy_res_f}
        for (x,y,z) in zip(dd.bkg_misID, dd.sens_1e28, dd.TotalROIBkg/10.):
#             if x == min(dd.bkg_misID):
              plt.gca().annotate(f'{z:.2f}', xy=(x,y), xytext=(5,5), textcoords='offset points', 
                                   color=colors[i], fontsize=14)

    plt.gca().annotate(r'ROI Bkg values in units of SS cts/(FWHM$\cdot$2000kg$\cdot$y)'
                       + '\n' + fr'SS cut based on {signal_efficiency}% $0\nu\beta\beta$ efficiency', 
                       xy=(9,6), xycoords='axes points', color=colors[i], fontsize=12)       
    plt.xlim((None, 9.0))
    plt.ylim((0.8, 1.45))    
    plt.title(f'Energy Resolution: {energy_res_f*100:.1f}%')
    plt.xlabel(fr'DNN Background MisID [%] at {signal_efficiency}% Signal ID')
    plt.ylabel(r'Halflife Sensitivity [$\times 10^{28}$ yrs]')
    plt.legend(fontsize=16)
    plt.axhline(y=1.0, color='black', linestyle='--', linewidth=3,)
    plt.grid()
    plt.savefig(plotdir / f'Multivar_study_by_ERes_{energy_res_f}.png', bbox_inches = "tight", 
                transparent=False, facecolor='white', dpi=200)
    plt.show()

# Study convergence 

In [None]:
# Given a vector V of length N, the median of V is the middle value 
# of a sorted copy of V, V_sorted - i e., V_sorted[(N-1)/2], when N 
# is odd, and the average of the two middle values of V_sorted when 
# N is even.
# To speed things up, I use `sortedcontainers`
from sortedcontainers import SortedList

# for tag in ['8E1210', 'C8BAC8', 'C0FEAE', '800AAD']:
for tag in df.loc[(df['energy_res_f'] == 0.011) & (df['dnn_f'] == 0.15)].index:
    
    if tag not in dfdict.keys():
        h5_files = glob.glob(f'{outputdir}/*{tag}*.h5')
        if h5_files:
            df_list = [pd.read_hdf(outputdir/Path(filename)) for filename in h5_files]
            dfdict[tag] = pd.concat(df_list, ignore_index=True)
            
    df_fit = dfdict[tag]
    
    good_fit_mask = []
    for index,row in df_fit.iterrows():
        if np.sum(row['fixed_fit_acc_covar']) < len(row['fixed_fit_acc_covar'])-2 \
            or not row['best_fit_covar'] \
            or row['90CL_crossing'] < 0.01\
            or row['best_fit_nll'] > 0.:
            good_fit_mask.append(False)
        else:
            good_fit_mask.append(True)

    values = df_fit['90CL_crossing'].loc[good_fit_mask]

    sl = SortedList()
    medians = []
    for n, v in enumerate(values):
        sl.add(v)
        if v % 2 == 0:  # even
            medians.append(sl[n//2])
        else:  # odd
            medians.append((sl[(n-1)//2] + sl[(n+1)//2]) / 2.)
        if v % 100 == 0:  
            print(medians[-1], ComputeHalflifeFromNumCounts(medians[-1]))

    plt.plot(np.arange(len(values)), medians, '-', linewidth=3, label=df.loc[tag, 'bkg_f'])
    
plt.xlabel('Number of Toys')
plt.ylabel('Median Counts')
plt.ylim((6.5,10))
# plt.legend()
plt.grid()
sec_ax = plt.gca().secondary_yaxis('right', 
                            functions=(lambda x: ComputeHalflifeFromNumCounts(x)/1e28, ComputeCountsFromHalflife))
sec_ax.set_ylabel(r'Halflife Sensitivity [$\times 10^{28}$ yrs]')
plt.savefig(plotdir / f'Multivar_study_convergence.png', bbox_inches = "tight", 
                transparent=False, facecolor='white', dpi=200)
plt.show()


In [None]:
display(df[(df.energy_res_f==0.008) & (df.bkg_f==1.0)])
display(df[(df.dnn_f==0.15) & (df.bkg_f==1.5)])
for tag in df[(df.dnn_f==0.0) & (df.bkg_f==1.0)].index:
    print(df.loc[tag][['energy_res_f','TotalROIBkg']])

In [None]:
fig, ax = plt.subplots()
for tag in df[(df.energy_res_f==0.011) & (df.bkg_f==1.0)].index:
# for tag in ['992C89', ]:
    df_fit = load_fit_results(tag)
    good_fit_mask = get_good_fit_mask(df_fit)
    h = Hist.new.Regular(121, 0, 60, name="signal_counts").Double()
    values = df_fit['90CL_crossing'].loc[good_fit_mask]
    h.fill(values)
#     median = np.median(values)
#     df.at[tag, 'median'] = median
#     df.at[tag, 'sens'] = ComputeHalflifeFromNumCounts(median)
#     print(f'{tag}\t{median:.3f}\t{df.at[tag, "sens"]:.3e}')
    h.plot1d(ax=ax, label=f'{tag}')

plt.legend()
plt.xlabel('90% CL on signal counts for given toy')
plt.yscale('log')

# Sensitivity as a function of background

In [None]:
df['TotalROIBkgY'] = df['TotalROIBkg']/10.

from scipy.optimize import curve_fit
def f(x, A, exponent):
    return A * x ** (-exponent)

xnew = np.linspace(0.4,3.1,1000)

In [None]:
popt, pcov = curve_fit(f, df.TotalROIBkgY, df.sens_1e28)
perr = np.sqrt(np.diag(pcov))
# print(popt, perr)

sns.scatterplot(data=df, x='TotalROIBkgY', y='sens_1e28', hue='energy_res_f', 
                    palette='deep', style='bkg_misID', size='bkg_f', alpha=1 )

plt.plot(xnew, f(xnew, popt[0], popt[1]), '--', color='gray', 
         label=u'Fit: $T_{1/2} \propto B^{'+ f'-({popt[1]:.2f} \pm {perr[1]:.2f})' + '}$')
# Draw uncertainty band
# plt.fill_between(xnew, f(xnew, *(popt))*0.95, f(xnew, *(popt))*1.05, color='gray', alpha=0.2, label = '_nolegend_')
# plt.fill_between(xnew, f(xnew, *(popt+perr)), f(xnew, *(popt-perr)), color='gray', alpha=0.2, label = '_nolegend_')
# plt.fill_between(xnew, f(xnew, popt[0]+perr[0], popt[1]-perr[1]), f(xnew, popt[0]-perr[0], popt[1]+perr[1]), color='gray', alpha=0.2, label = '_nolegend_')

plt.axhline(y=1.0, color='lightgray', linestyle='-', linewidth=3,)

handles, labels = plt.gca().get_legend_handles_labels()
new_labels = []
for k, label in enumerate(labels):
    if k in np.arange(1,6):
        new_labels.append(f'{float(label)*100:.1f} %')
    elif k in np.arange(12,16):
        new_labels.append(f'{float(label):.1f} %')
    else:
        new_labels.append(label)
new_labels[0] = "Energy Resolution"
new_labels[6] = "Background\nScaling Factor"
new_labels[11] = "Topological\nDiscriminator MisID"
plt.legend(handles, new_labels, bbox_to_anchor=(1.01,1.02), loc="upper left", fontsize=16)

plt.xlabel('Total ROI Background [SS cts/(FWHM$\cdot$2000kg$\cdot$y)]')
plt.ylabel(r'Halflife Sensitivity [$\times 10^{28}$ yrs]')
plt.tick_params(axis='both', which='major', direction='in', reset=True)

plt.savefig(plotdir / f'Sensivitity_vs_background.png', bbox_inches = "tight", 
                transparent=False, facecolor='white', dpi=200)
plt.show()

In [None]:
# Grouped and fitted by energy resolution
sns.scatterplot(data=df, x='TotalROIBkgY', y='sens_1e28', hue='energy_res_f', palette='deep', s=80 )

colors = iter(sns.color_palette('deep'))
for e_res in [0.008, 0.01, 0.011, 0.012, 0.014]:
    dtemp = df[df['energy_res_f']==e_res]
    popt, pcov = curve_fit(f, dtemp.TotalROIBkgY, dtemp.sens_1e28)
    perr = np.sqrt(np.diag(pcov))

    plt.plot(xnew, f(xnew, popt[0], popt[1]), '-', color=next(colors), 
             label=u'Fit: $T_{1/2} \propto B^{'+ f'-({popt[1]:.2f} \pm {perr[1]:.2f})' + '}$')

handles, labels = plt.gca().get_legend_handles_labels()
new_labels = []
for label in labels:
    if label.replace('.', '', 1).isdigit():
        new_labels.append(f'{float(label)*100:.1f} %')
    else:
        new_labels.append(label)
plt.legend(handles, new_labels, title="Energy Resolution", bbox_to_anchor=(1.01,1.02), loc="upper left", fontsize=16)

plt.xlabel('Total ROI Background [SS cts/(FWHM$\cdot$2000kg$\cdot$y)]')
plt.ylabel(r'Halflife Sensitivity [$\times 10^{28}$ yrs]')
plt.tick_params(axis='both', which='major', direction='in', reset=True)
plt.ylim(0.8, 1.5)

plt.savefig(plotdir / f'Sensivitity_vs_background_groupByERes.png', bbox_inches = "tight", 
                transparent=False, facecolor='white', dpi=200)
plt.show()

In [None]:
sns.scatterplot(data=df, x='TotalROIBkgY', y='sens_1e28', hue='bkg_misID', palette='deep', s=80)

colors = iter(sns.color_palette('deep'))
for dnn_f in [0., 0.15, 0.177, 0.2]:
    dtemp = df[df['dnn_f']==dnn_f]
    popt, pcov = curve_fit(f, dtemp.TotalROIBkgY, dtemp.sens_1e28)
    perr = np.sqrt(np.diag(pcov))
    # print(popt, perr)

    plt.plot(xnew, f(xnew, popt[0], popt[1]), '-', color=next(colors), 
             label=u'Fit: $T_{1/2} \propto B^{'+ f'-({popt[1]:.2f} \pm {perr[1]:.2f})' + '}$')

handles, labels = plt.gca().get_legend_handles_labels()
new_labels = []
for label in labels:
    if label.replace('.', '', 1).isdigit():
        new_labels.append(f'{float(label):.1f} %')
    else:
        new_labels.append(label)
plt.legend(handles, new_labels, title="Background MisID", bbox_to_anchor=(1.01,1.02), loc="upper left", fontsize=16)

plt.xlabel('Total ROI Background [SS cts/(FWHM$\cdot$2000kg$\cdot$y)]')
plt.ylabel(r'Halflife Sensitivity [$\times 10^{28}$ yrs]')
plt.tick_params(axis='both', which='major', direction='in', reset=True)
plt.ylim(0.8, 1.5)

plt.savefig(plotdir / f'Sensivitity_vs_background_groupDNNF.png', bbox_inches = "tight", 
                transparent=False, facecolor='white', dpi=200)
plt.show()

In [None]:
# df['ROIbkg'] = 0.1 * (df['Num_Vessel_U238'] + df['Num_Internals_U238'] + df['Num_Rn222'] + df['Num_Far'])
df['ROIbkg'] = 0.1 * (df['Num_Vessel_Th232'] + df['Num_Internals_Th232'] + df['Num_B8nu'] + df['Num_Xe137_and_Ar42'])
 
popt, pcov = curve_fit(f, df.ROIbkg, df.sens_1e28)
perr = np.sqrt(np.diag(pcov))
# print(popt, perr)

# sns.scatterplot(data=df, x='TotalROIBkgYkeV', y='sens_1e28', hue='bkg_misID', 
#                 palette='deep', style='energy_res_f', size='bkg_f', alpha=1 )

sns.scatterplot(data=df, x='ROIbkg', y='sens_1e28', hue='energy_res_f', 
                    palette='deep', style='bkg_misID', size='bkg_f', alpha=1 )

# xnew = np.linspace(0.4,3.1,1000)
plt.plot(xnew/12, f(xnew/12, popt[0], popt[1]), '--', color='gray', 
         label=u'Fit: $T_{1/2} \propto B^{'+ f'-({popt[1]:.2f} \pm {perr[1]:.3f})' + '}$')
# Draw uncertainty band
# plt.fill_between(xnew/12, f(xnew/12, *(popt)*0.95), f(xnew/12, *(popt)*1.05), color='gray', alpha=0.2, label = '_nolegend_')
# plt.fill_between(xnew/15, f(xnew/15, *(popt+perr)), f(xnew/15, *(popt-perr)), color='gray', alpha=0.2, label = '_nolegend_')
# plt.fill_between(xnew/15, f(xnew/15, popt[0]+perr[0], popt[1]-perr[1]), f(xnew/15, popt[0]-perr[0], popt[1]+perr[1]), color='gray', alpha=0.2, label = '_nolegend_')

# plt.axhline(y=1.0, color='lightgray', linestyle='-', linewidth=3,)

handles, labels = plt.gca().get_legend_handles_labels()
# new_labels = []
# for k, label in enumerate(labels):
#     if k in np.arange(1,5):
#         new_labels.append(f'{float(label)*100:.1f} %')
#     elif k in np.arange(12,16):
#         new_labels.append(f'{float(label):.1f} %')
#     else:
#         new_labels.append(label)
# new_labels[0] = "Energy Resolution"
# new_labels[6] = "Background\nScaling Factor"
# new_labels[11] = "Topological\nDiscriminator MisID"
plt.legend(handles, labels, bbox_to_anchor=(1.01,1.02), loc="upper left", fontsize=16)

plt.xlabel(r'ROI Background ($^{232}$Th, $\nu$, $^{137}$Xe, $^{42}$Ar) [SS cts/(FWHM$\cdot$2000kg$\cdot$y)]')
plt.ylabel(r'Halflife Sensitivity [$\times 10^{28}$ yrs]')
plt.tick_params(axis='both', which='major', direction='in', reset=True)

# plt.savefig(plotdir / f'Sensivitity_vs_background-mod.png', bbox_inches = "tight", 
#                 transparent=False, facecolor='white', dpi=200)
plt.show()