In [None]:
import uproot
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from matplotlib import rcParams
import seaborn as sns
import scienceplots
from IPython.display import Image


import matplotlib.font_manager as font_manager


In [None]:
rcParams.update({'figure.autolayout':True})
rcParams.update({'figure.figsize':[12,8]})
plt.rcParams['legend.fontsize'] = 15
plt.rcParams['font.family']='serif'
plt.rcParams['font.size'] = 20
plt.rcParams['xtick.labelsize'] = 20  # Change 16 to the desired font size

# Increase the font size for y-axis labels
plt.rcParams['ytick.labelsize'] = 20  # Change 16 to the desired font size
plt.rcParams['lines.linewidth'] = 2.5
plt.rcParams['grid.linewidth'] = 2.5
plt.rcParams['axes.labelsize']=15

plt.rcParams["font.weight"] = "bold"
plt.rcParams["axes.labelweight"] = "bold"

plt.rcParams['grid.linestyle']=':'
plt.rcParams['grid.alpha'] = 0.50


In [None]:
# want to get the title of the plot or not 
keep_title=True

# HELPER FUNCTION

In [None]:
def my_plotter(ax,ene,weight,label_value,yscale,xlabel='Energy [keV]',ylabel='Counts',iso="Th228"):
    """
    A helper function to make a graph

    Parameters
    ----------
    ax : Axes
        The axes to draw to

    data1 : array
       The x data

    data2 : array
       The y data

    param_dict : dict
       Dictionary of keyword arguments to pass to ax.plot

    Returns
    -------
    out : list
        list of artists added
    """
    out=ax.hist(ene, bins=200, histtype=u'step', weights=weight, density=True, label=label_value)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.set_yscale(yscale)
    ax.set_title(f"Energy PDF for SS events  for {iso}")
    ax.legend()
    
    return out

In [None]:
#input the root file and ouput the filtered dataframe
#apply_filter='passed_z_thresh & passed_xy_thresh & (n_x_ch_abovenoise>0) & (n_y_ch_abovenoise>0) & (m_nOPCal< (1.064*m_nQ+703)) & (m_nOPCal> (0.644*m_nQ-2411)) & (~NESTBugFound) & (m_DNNvalue>0.85) & (standoff > 100)'

def get_filtered(file,apply_filter,use_filter=False):
    '''
    returns the pandas dataframe for the given root file using the uproot library. If use_filter=True, indicated filter will be used.
    '''
    f=uproot.open(file+':tree')
    df=f.arrays(f.keys(),library='pd')
    if use_filter:return df.query(apply_filter).reset_index(drop=True)
    return df#.query('energy<3000').reset_index(drop=True)
    
    


In [None]:
# returns the plot for the isotopes in the given dataframe 
def plot_isotopes(df,df_label="PX",scale_kind="log"):
    '''function to plot the isotopes based on the isotope number'''
    #isotopes=df.isotope.unique()
    ra224_df=df.query('isotope==1')
    pb212_df=df.query('isotope==2')
    bi212_df=df.query('isotope==3')
    tl208_df=df.query('isotope==4')
    
    ra224_energy,ra224_weight=ra224_df.energy,ra224_df.weight
    pb212_energy,pb212_weight=pb212_df.energy,pb212_df.weight
    bi212_energy,bi212_weight=bi212_df.energy,bi212_df.weight
    tl208_energy,tl208_weight=tl208_df.energy,tl208_df.weight
    
    fig, ax = plt.subplots(1, 1,figsize=(6,4))
    my_plotter(ax,ra224_energy,ra224_weight,df_label+" Ra224",scale_kind)
    my_plotter(ax,pb212_energy,pb212_weight,df_label+" Pb212",scale_kind)
    my_plotter(ax,bi212_energy,bi212_weight,df_label+" Bi212",scale_kind)
    my_plotter(ax,tl208_energy,tl208_weight,df_label+" Tl208",scale_kind)
    plt.show()
    

In [None]:
def display_fractions(df,cut,total_simulations,greek_name,use_filter=True):
    '''
    displays the fraction in the cut dataframe w.r.t. total simulations
    '''
    df_temp=get_filtered(df,cut,use_filter=use_filter)
    df_count=df_temp.shape[0]
    greek=df_count/total_simulations
    print(30*'--')
    print(f"""
    Shape of  dataframe                    : {df_temp.shape}
    Number of rows (i.e. events count)     : {df_count}
    {greek_name}                           : {greek}
    {greek_name} %                         : {greek:.2%}

    """)
    print(30*'--')
    return greek

In [None]:
def print_isotopes(df,name):
    '''
    reuturns the isotopes information for given df
    '''
    list_iso=df.isotope.unique()
    non_iso=df.isotope.value_counts().to_list()[::-1]
    isotopes=['Th228','Ra224','Rn220','Po216','Pb212','Bi212','Tl208']
    #isotope coutns before the cut
    counts_df=pd.DataFrame(df.isotope.value_counts().reset_index().values,columns=['isonum','counts'])\
    .sort_values(by=['isonum'])\
    .reset_index(drop=True)

    counts_df["isotopes"]=counts_df.isonum.apply(lambda x:isotopes[x-1])
    counts_df=counts_df[['isonum','isotopes','counts']]
    # #raw_counts.index=raw_counts.isotopes
    #print(raw_counts.to_string())
    #print(f"isotopes counts for {name}:\n\n{counts_df.to_string()}\n")
    return counts_df


In [None]:
# bar graph plot
def get_bar_plot(df,title="test",save_name="test"):
    #df_deposited=df[['isotopes','counts']]
    # title_split=title.split()
    # save_name=title_split[6]+"_"+title_split[1]+"_"+title_split[2]+"_"+title_split[3]
    df_rows=df.shape[0] #for length
    print(f"save_name: {prefix}{save_name}")
    skipEThreshold=0.1
    plt.figure(figsize=(10,6))
    ax=df.plot.bar(x='isotopes',y='counts')
    # ,width=min(1-1/(df_rows+1), 0.8))
    # ax.set_xlim(-0.5, df_rows-0.5)
    for c in ax.containers:
        # set the bar label
        ax.bar_label(c, fmt='%.0f', label_type='edge',rotation=30,fontsize=15,fontweight='bold')
        
    plt.xlabel('ISOTOPES',fontsize=15,fontweight='bold')
    #turns the title on/off
    if (keep_title):plt.gcf().suptitle(title,fontsize=20,fontweight='bold')
    #plt.title(f'Isotope counts in g4tree for 1M Th228 at all positions for skipEThreshold: {skipEThreshold} keV')
    
    plt.tick_params(bottom=True, top=True, left=True, right=True,direction='in')
    plt.ylabel('COUNTS',fontsize=15,fontweight='bold')
    plt.xticks(fontsize=15, fontweight='bold')
    plt.yticks(fontsize=15, fontweight='bold')
    plt.yscale('log')
    plt.grid()
    plt.tight_layout()
    plt.savefig(f'{prefix}{save_name}.pdf',dpi=600,bbox_inches='tight')
    
    plt.show()

In [None]:
#produces the energy spectrum
def get_energy_spectrum(df,loc,bins=1000,weights="weights",iso="test"):
    '''returns the energy spectrum including the weights of isotopes'''
    
    sns.set(rc={'figure.figsize':(20,10)})
    locc="".join(loc.split()[0:3]).replace(" ","_")
    save_name=f"{prefix}{locc}{iso}.pdf"
    print(f"save_name: {save_name}")
    #adding the isotop name column in the df dataframe
    df=df[df.energy<3000]
    df['iso_name']=df['isotope'].map({1:'Th228',2:'Ra224',3:'Rn220',4:'Po216',5:'Pb212',6:'Bi212',7:'Tl208'})
    
    
    #sns.histplot(data=df_s, x="energy",hue="isotope",bins=200,element="step",log_scale=True,fill=False)
    g=sns.histplot(data=df, x="energy",hue="iso_name",weights="weight",bins=bins,element="step",fill=False,linewidth=2.5)#,palette=['r','b','g','y','k'])
    #plt.legend(labels=iso_names)
    sns.set(font_scale=1.5)
    g.set_xlabel('Energy [keV]',fontsize=20,fontweight='bold')
    g.set_ylabel('Counts',fontsize=20,fontweight='bold')
    # g.set_xlabel('Energy [keV]')
    # g.set_ylabel('Counts')
    plt.yscale("log")
    plt.setp(g.get_legend().get_texts(), fontsize='22') # for legend text
    plt.setp(g.get_legend().get_title(), fontsize='22') # for legend title
    # plt.xticks(fontsize=15, fontweight='bold')
    # plt.yticks(fontsize=15, fontweight='bold')
    plt.xticks(fontsize=15, fontweight='bold')
    plt.yticks(fontsize=15, fontweight='bold')
    plt.tick_params(bottom=True, top=True, left=True, right=True,direction='in')
    plt.grid()
    if (keep_title):plt.title(f'Energy specturm pdf for {iso} at {loc}')
    plt.savefig(save_name,dpi=600)
    plt.show()

In [None]:
loc="test"
loc.replace(" ","_")

# ROOT DATA FILES

In [None]:

th228_all='/home/thakur/slac_data/s17/s17_Th228_all.root'
isotope='Th228'

#dirprefix
prefix='s17'

In [None]:
print(f"""
root file: {th228_all}
isotope  : {isotope}
prefix   : {prefix}
""")

# SIMULATION

In [None]:

#this is beta filter
#inner 1 tonne, ms, ss

beta_filter='''
(standoff>201.086) &\
passed_xy_thresh &\
passed_z_thresh &\
(n_x_ch_abovenoise > 0) &\
(n_y_ch_abovenoise > 0) &\
(m_nOPCal < (1.077 * m_nQ + 313)) &\
(m_nOPCal > (0.597 * m_nQ - 216)) &\
~NESTBugFound &\
~NearAnodeBugFound
'''


#this is gamma filter inner 1 tonne, ss, peak events (2610 to 2620)
gamma_filter='''
(standoff>201.086) &\
m_DNNvalue>0.85&\
(abs(energy-2615)<5)&\
passed_xy_thresh &\
passed_z_thresh &\
(n_x_ch_abovenoise > 0) &\
(n_y_ch_abovenoise > 0) &\
(m_nOPCal < (1.077 * m_nQ + 313)) &\
(m_nOPCal > (0.597 * m_nQ - 216)) &\
~NESTBugFound &\
~NearAnodeBugFound
'''

#this is gamma filter1 inner 1 tonne, ss, peak events close to 583 (578 to 588)
#583.187 2 	     85.0 % 3 
gamma_filter1='''
(standoff>201.086) &\
m_DNNvalue>0.85&\
(abs(energy-583)<5)&\
passed_xy_thresh &\
passed_z_thresh &\
(n_x_ch_abovenoise > 0) &\
(n_y_ch_abovenoise > 0) &\
(m_nOPCal < (1.077 * m_nQ + 313)) &\
(m_nOPCal > (0.597 * m_nQ - 216)) &\
~NESTBugFound &\
~NearAnodeBugFound
'''

dec102020_filter='''
(standoff>100) &\
passed_xy_thresh &\
passed_z_thresh &\
(n_x_ch_abovenoise > 0) &\
(n_y_ch_abovenoise > 0) &\ 
(m_nOPCal < (1.077 * m_nQ + 313)) &\
(m_nOPCal > (0.597 * m_nQ - 216)) &\
~NESTBugFound &\
~NearAnodeBugFound &\
m_DNNvalue>0.85'''




jason_filter='''
(standoff>100) &\
m_DNNvalue>0.85 &\
passed_xy_thresh &\
passed_z_thresh &\
(n_x_ch_abovenoise > 0) &\
(n_y_ch_abovenoise > 0) &\
(m_nOPCal < (1.077 * m_nQ + 313)) &\
(m_nOPCal > (0.597 * m_nQ - 216))&\
(abs(energy-2614)<10)
'''
#~NESTBugFound &\
#~NearAnodeBugFound &\
# & ~NESTBugFound & ~NearAnodeBugFound' & (abs(energy-2614)<10)'''


In [None]:
# beta filter
print(20*'==')
print("beta filter:\n", beta_filter.replace('&','\n'))
print(20*'==')
print("gamma filter:\n", gamma_filter.replace('&','\n'))
print(20*'==')
print("gamma filter1:\n", gamma_filter1.replace('&','\n'))
print(20*'==')
# print("jason filter:\n", jason_filter.replace('&','\n'))
# print(20*'==')

# Total Simulations

In [None]:
# Total simulations
total_simulations= 25e6
file_name=th228_all

print(f"Total Simulations: {total_simulations:0.1e}")


# $\alpha$ (Any deposits in LXe)

In [None]:
df_alpha=get_filtered(file_name,None,use_filter=False)
g4tree_count=df_alpha.shape[0]
raw_alpha=g4tree_count/total_simulations

print(f"""
Shape of  dataframe                        : {df_alpha.shape}
Number of rows (total events in g4tree)    : {g4tree_count:e}
raw_alpha                                  : {raw_alpha}
raw_alpha %                                : {raw_alpha:.3%}

""")

In [None]:
df_alpha.head()

In [None]:
#columns
df_alpha.columns

In [None]:
#n_y_ch_abovenoise
df_alpha['n_x_ch_abovenoise'].value_counts().to_frame() #plot(kind='bar')

In [None]:
#n_y_ch_abovenoise
df_alpha['n_y_ch_abovenoise'].value_counts().to_frame() #plot(kind='bar')

In [None]:
#plt.plot(df_alpha['n_x_ch_abovenoise'],df_alpha['n_y_ch_abovenoise'])

In [None]:
df_alpha_dnn=df_alpha.copy()#query('m_DNNvalue>0.80')
df_alpha_dnn.head()

In [None]:
#see the infinity
df_alpha_dnn[df_alpha_dnn['lower_z']==np.inf]

In [None]:
#difference of lower and upper z
z_diff=df_alpha_dnn['lower_z']-df_alpha_dnn['upper_z']
z_diff

In [None]:
#include z_diff in df_alpha
df_alpha_dnn['z_diff']=z_diff

In [None]:
df_alpha_dnn

In [None]:
#plot z_diff vs DNN
dnn=df_alpha_dnn['m_DNNvalue']
dnn

In [None]:
#plot dnn vs z_diff
ylabel_name=fr'z_diff= [lower_z-upper_z]'
total=4918023
#ax=plt.plot(dnn.head(total),z_diff.head(total),'bo')
ax=plt.plot(dnn,z_diff,'bo')
plt.xlabel('DNN');plt.ylabel(ylabel_name)
#plt.tick_params(axis='both',which='both')
plt.tick_params(bottom=True, top=True, left=True, right=True,direction='in')
plt.grid()
plt.show()

In [None]:
# only considering the peak energies
df_peak_energy=df_alpha_dnn.query('2610 < energy < 2620')
df_peak_energy

In [None]:
df_peak_energy.shape[0]

In [None]:
#plot dnn vs z_diff
ylabel_name=fr'z_diff= [lower_z-upper_z]'
ax=plt.plot(df_peak_energy.m_DNNvalue,df_peak_energy.z_diff,'ro')
plt.xlabel('DNN');plt.ylabel(ylabel_name)
plt.tick_params(bottom=True, top=True, left=True, right=True,direction='in')
plt.grid()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(rc={'figure.figsize':(30,20)})

# Sample data
import numpy as np
np.random.seed(42)
x = np.random.randn(100)
y = 2 * x + np.random.randn(100)

x=df_peak_energy.m_DNNvalue
y=df_peak_energy.z_diff

# Create scatter plot with density representation
sns.set(style="white")
sns.scatterplot(x=x, y=y, s=10, color="blue", alpha=0.6)
sns.kdeplot(x=x, y=y, cmap="Blues", levels=5, fill=True)

# Add labels and title
plt.xlabel("X-axis")
plt.ylabel("Y-axis")
plt.title("Scatter Plot with Density Representation")

# Show the plot
plt.show()



In [None]:
#
sns.set(rc={'figure.figsize':(30,20)})
sns.displot(df_peak_energy,x='m_DNNvalue',y='z_diff')


In [None]:
#
sns.displot(df_peak_energy,x='m_DNNvalue',y='z_diff',binwidth=(.5,.5),cbar=True)


In [None]:
#
plt.figure(figsize=(30,20))
ax=sns.jointplot(df_peak_energy,x='m_DNNvalue',y='z_diff')
plt.savefig('compare.pdf')

In [None]:
#
sns.displot(df_peak_energy,x='m_DNNvalue',y='z_diff',kind='kde')


In [None]:
#n_x_ch_abovenoise & n_y_ch_abovenoise
a=2
above_noise=df_alpha_dnn.query('(n_x_ch_abovenoise<@a) & (n_y_ch_abovenoise<@a)')
selected_columns=['m_DNNvalue','n_x_ch_abovenoise','n_y_ch_abovenoise']
above_noise_temp=above_noise.loc[:,selected_columns]
above_noise_temp.set_index('m_DNNvalue',inplace=True)
#above_noise_temp.set_index('m_DNNvalue').plot()

In [None]:
# i=0;step=-200
# temp_df=df_alpha_dnn[(z_diff<i) & (z_diff>i+step)]
temp_df=df_alpha_dnn.copy()
data=temp_df[['m_DNNvalue','z_diff']].to_numpy()#.round(2).set_index('z_diff')
data
# print(f'shape: {temp_df.shape}')
# dnn=temp_df['m_DNNvalue']
# z_dif=temp_df['z_diff']
# data=[dnn,z_dif]
ax=sns.heatmap(data)

In [None]:
#plot dnn vs z_diff
#different parts
# ylabel_name=fr'z_diff= [lower_z-upper_z]'
# for i in range(0,-800,-200):
#     ax=plt.plot(dnn,z_diff,'bo')
#     plt.xlabel('DNN');plt.ylabel(ylabel_name)
# #plt.tick_params(axis='both',which='both')
#     plt.tick_params(bottom=True, top=True, left=True, right=True,direction='in')
#     plt.grid()
#     plt.show()

In [None]:
step=-200
for i in range(0,-800,step):
    temp_df=df_alpha_dnn[(z_diff<i) & (z_diff>i+step)]
    print(f'shape: {temp_df.shape}')
    dnn=temp_df['m_DNNvalue']
    z_dif=temp_df['z_diff']
    #print(z_dif,dnn)
    ax=plt.plot(dnn,z_dif,'bo')
    plt.xlabel('DNN');plt.ylabel(ylabel_name)
#plt.tick_params(axis='both',which='both')
    plt.tick_params(bottom=True, top=True, left=True, right=True,direction='in')
    plt.grid()
    plt.show()

In [None]:
z_diff[(z_diff<0) & (z_diff>-200)]


# Deposits outside field cage

In [None]:
df_out=df_alpha[df_alpha.standoff<0]
df_out

In [None]:
print(f"Total counts outside field cage: {df_out[df_out.standoff<0].shape[0]}")

# Deposits inside field cage

In [None]:
df_in=df_alpha[df_alpha.standoff>0]
df_in

# $\beta$ (inner 1 TONNE, SS & MS)

In [None]:
df_beta=get_filtered(file_name,beta_filter,use_filter=True)
raw_beta_count=df_beta.shape[0]
raw_beta=raw_beta_count/total_simulations

print(f"""
Shape of  dataframe                        : {df_beta.shape}
Number of rows                             : {raw_beta_count:e}
raw_beta                                   : {raw_beta_count}
raw_beta                                   : {raw_beta:.3}
raw_beta  %                                : {raw_beta:.3%}

""")

In [None]:
beta_df=print_isotopes(df_beta,"df_beta")
beta_df

# $\gamma$ (INNER 1 TONNE, SS, PEAK ENERGY [2615 keV])

In [None]:
df_gamma=get_filtered(file_name,gamma_filter,use_filter=True)
raw_gamma_count=df_gamma.shape[0]
raw_gamma=raw_gamma_count/total_simulations

print(f"""
Shape of  dataframe                        : {df_gamma.shape}
Number of rows                             : {raw_gamma_count:e}
raw_gamma count                            : {raw_gamma_count}
raw_gamma ratio                            : {raw_gamma}
raw_gamma %                                : {raw_gamma:.5%}

""")

In [None]:
gamma_df=print_isotopes(df_gamma,"df_gammaa")
gamma_df