In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.colors import LogNorm
import multihist as mh
import _pickle as pickle
import scipy.interpolate as itp
import click

import strax, straxen, wfsim, cutax
import nestpy
import datetime, os
from tqdm.notebook import tqdm
from datetime import datetime

In [None]:
straxen.print_versions(('strax','straxen','cutax','wfsim'))

### Get high statistics SN data

In [None]:
st = cutax.contexts.xenonnt_sim_SR0v0_cmt_v5(output_folder='/dali/lgrandi/melih/sn_wfsim/strax_data')
st.set_config(dict(fax_config_override=dict(field_distortion_on=False)))

rid = 'SN_wfsimdata_100k'
print(f'---> Data for "{rid}" ')
for kind in ['truth', 'raw_records', 'peaks', 'peak_basics']:
    click.echo(f'{kind:15s} is {click.style(" stored ", bold=True, bg="green") if st.is_stored(rid, kind) else click.style(" not stored ",bold=True,bg="red")}')

In [None]:
truth = st.get_df(rid,('truth'))
peak_basics = st.get_df(rid, 'peak_basics')
event_info = st.get_df(rid, 'event_info')

### Get also a single SN event

In [None]:
st.set_config(dict(nearby_window=5e8))
st.set_config(dict(fax_config_override=dict(field_distortion_on=False)))
st.set_config(dict(fax_file=f'/dali/lgrandi/melih/sn_wfsim/instructions/single_SN.csv'))
rid_single = 'sn_simple'

print(f'---> Data for "{rid_single}" ')
for kind in ['truth', 'raw_records', 'peaks', 'peak_basics']:
    click.echo(f'{kind:15s} is {click.style(" stored ", bold=True, bg="green") if st.is_stored(rid, kind) else click.style(" not stored ",bold=True,bg="red")}')

In [None]:
truth_single = st.get_df(rid_single,('truth'))
peak_basics_single = st.get_df(rid_single, 'peak_basics')
event_info_single = st.get_df(rid_single, 'event_info')

### Get a background run

In [None]:
st_runs = cutax.xenonnt_v5(cuts_for='commissioning.commissioning_cuts', _minimum_run_number=17917)

In [None]:
# exclude_tags = ('flash', 'ramp_up', 'ramp_down', 'anode_off', 'hot_spot', 'missing_one_pmt', 'messy', 'bad')

# # Not tpc_radon
# runs_bkg = st_runs.select_runs(run_mode='tpc_bkg', include_tags=('_sr0_*'), 
#                                exclude_tags=exclude_tags,  available=('event_basics'), 
#                                ignore_underscore=True)

# bkg_run_id = runs_bkg.sample()['name'].values[0]

In [None]:
bkg_run_id = '019537'
st_runs.set_config(dict(nearby_window=5e8)) # half a second in each direction
peaks_run = st_runs.get_df(bkg_run_id, ('peak_proximity', 'peak_basics'))
peak_prox_rate = peaks_run['n_competing'] / st_runs.config['nearby_window']

**Auxiliary scripts**

In [None]:
def display_times(arr):
    """ times array in ns
    """
    ti = int(arr.min()/1e9)
    tf = int(arr.max()/1e9)
    print(ti, datetime.utcfromtimestamp(ti).strftime('%Y-%m-%d %H:%M:%S'))
    print(tf, datetime.utcfromtimestamp(tf).strftime('%Y-%m-%d %H:%M:%S'))
    timedelta = datetime.utcfromtimestamp(tf)-datetime.utcfromtimestamp(ti)
    print(f'{timedelta.seconds} seconds \n{timedelta.resolution} resolution')
    
def get_rate(arr, sampling=1e9):
    interval_array = np.arange(arr.min(), arr.max() + sampling , sampling) # in seconds
    rates = np.zeros(len(interval_array[:-1]))
    for i, time in enumerate(interval_array[:-1]):
        dists = arr - time
        entries = ((dists < sampling) & (dists > 0)).sum()
        rates[i] = entries
        
    times = interval_array[:-1] + sampling/2
    return times, np.array(rates)/sampling

def inject_in(small_signal, big_signal):
    # bring the small signal to zero
    small_signal['time'] -= small_signal['time'].min()
    # push it inside the big signal
    small_signal['time'] += np.random.choice(big_signal['time'])
    # check
    for time in small_signal['time']:
        if np.isclose(time, any(big_signal['time']), rtol=1e-8):
            print('Unlucky guess!')
            return inject_in(small_signal, big_signal)
    
    times_bkg = big_signal['time'].values
    times_sn =  small_signal['time'].values
    if (times_bkg.min() < times_sn.min()) & (times_bkg.max() > times_sn.max()):
        return small_signal
    else: print('something went wrong!')

In [None]:
display_times(peaks_run['time'])

In [None]:
display_times(peak_basics_single['time'])

We need to inject this inside the runs time interval.

In [None]:
signal = inject_in(peak_basics_single, peaks_run)

In [None]:
display_times(signal['time'])

Now it is inside our background run!

In [None]:
t_run, r_run = get_rate(peaks_run['time'])
t_sn, r_sn = get_rate(signal['time'])

plt.plot(t_run, r_run, label='BG Rates')
plt.plot(t_sn, r_sn*10, label='SN signal x10', lw=3);

### Cleaning
The SN signal only visible when amplified by 100. We should clean the background run to have a more stable signal.

In [None]:
# st_runs.data_info('peak_basics')
plt.hist(peaks_run['range_90p_area'], 50, range=(0,2e3));

In [None]:
m1 = peaks_run['type'] == 2
m2 = peaks_run['area'] < 600
m3 = peaks_run['range_90p_area'] < 1000
m4 = peaks_run['range_90p_area'] > 300
masks = m1 & m2 & m3 & m4
peaks_bg_filt = peaks_run[masks]

In [None]:
t_run, r_run = get_rate(peaks_bg_filt['time'])
t_sn, r_sn = get_rate(signal['time'])

plt.plot(t_run, r_run, label='BG Rates')
plt.plot(t_sn, r_sn, label='SN signal', lw=3);

Some initial cuts already give nice results. Tune these cuts using higher statistics data.

In [None]:
s2_runs = peaks_run[(peaks_run['type']==2)&(peaks_run['area']<2000)]

In [None]:
s2_peaks = peak_basics[(peak_basics['type']==2)&(peak_basics['area']<2000)]
s1_peaks = peak_basics[peak_basics['type']==1]

fig, ax = plt.subplots(ncols=2, nrows=3, figsize=(14,12))
ax[0,0].hist2d(s2_runs['area'], s2_runs['range_90p_area'], bins=(200,200), norm=LogNorm(), cmap='Reds');
ax[0,0].hist2d(s2_peaks['area'], s2_peaks['range_90p_area'], bins=(200,200), norm=LogNorm(), alpha=0.6);
ax[0,0].set_xlabel('S2 Area [P.E.]'); ax[0,0].set_ylabel('S2 width');

ax[0,1].hist2d(s2_runs['area'], s2_runs['area_fraction_top'], bins=(200,200), norm=LogNorm(), cmap='Reds');
ax[0,1].hist2d(s2_peaks['area'], s2_peaks['area_fraction_top'], bins=(200,200), norm=LogNorm(), alpha=0.6);
ax[0,1].set_xlabel('S2 Area [P.E.]'); ax[0,1].set_ylabel('S2 AFT');
ax[0,1].axhline(0.68);

ax[1,0].hist(s2_runs['area'], bins=50, histtype='step', density=True, label='BG');
ax[1,0].hist(s2_peaks['area'], bins=50, histtype='step', density=True, label='SN');
ax[1,0].set_xlabel('S2 area')
ax[1,0].set_yscale('log')

ax[1,1].hist(s2_runs['area_fraction_top'], bins=50, histtype='step', density=True, label='BG');
ax[1,1].hist(s2_peaks['area_fraction_top'], bins=50, histtype='step', density=True, label='SN');
ax[1,1].set_xlabel('AFT')
ax[1,1].set_yscale('log')


ax[2,0].hist(s2_runs['range_50p_area'], bins=50, histtype='step', density=True, label='BG', range=(0,15_000));
ax[2,0].hist(s2_peaks['range_50p_area'], bins=50, histtype='step', density=True, label='SN', range=(0,15_000));
ax[2,0].set_xlabel('range_50p_area')
ax[2,0].set_yscale('log')

ax[2,1].hist2d(s2_runs['range_90p_area'], s2_runs['area_fraction_top'], bins=(200,200), norm=LogNorm(), cmap='Reds');
ax[2,1].hist2d(s2_peaks['range_90p_area'], s2_peaks['area_fraction_top'], bins=(200,200), norm=LogNorm(), alpha=0.6);
ax[2,1].set_xlabel('range_90p_area')
ax[2,1].set_xlabel('90p width')
ax[2,1].set_ylabel('AFT')

for a in ax.ravel():
    a.legend();

In [None]:
# st_runs.data_info('peak_basics')

In [None]:
def tune_param(signal1, signal2, param='area_fraction_top'):
    """ Return the best cut value
    """
    surv_rat = lambda sig, cut: (len(sig)-len(sig[sig<cut]))/len(sig)
    best = 0
    cut_val = 0
    for cut in np.arange(0,1,0.05):
        surv_ratio_bg = surv_rat(signal1,cut)
        surv_ratio_sn = surv_rat(signal2,cut)
        proxy = surv_ratio_sn - surv_ratio_bg
        if proxy > best:
            best = proxy
            cut_val = cut
    return cut_val

In [None]:
signal1 = s2_runs['area_fraction_top']
signal2 = s2_peaks['area_fraction_top']

# signal1 = s2_runs['range_90p_area']
# signal2 = s2_peaks['range_90p_area']

cut = tune_param(signal1, signal2)
cut

In [None]:
plt.hist(signal1[signal1 < cut], bins=50, histtype='step', density=True, label='BG', range=(0, cut));
plt.hist(signal2[signal2 < cut], bins=50, histtype='step', density=True, label='SN', range=(0, cut));
plt.xlabel('AFT');
plt.legend()
plt.gca().set_yscale('log')

Let's try these cuts.

In [None]:
m1 = peaks_run['type'] == 2
m2 = peaks_run['area'] < 600
m3 = peaks_run['range_50p_area'] < 15_000
m4 = peaks_run['range_50p_area'] > 0
m5 = peaks_run['area_fraction_top'] < 0.5
m6 = (peaks_run['time'] > signal['time'].mean()*.95) & (peaks_run['time'] < signal['time'].mean()*1.05)
masks = m1 & m2 & m3 & m4 & m5 & m6
peaks_bg_filt = peaks_run[masks]
peaks_bg_filt.shape

In [None]:
signal_filt = signal[signal['type']==2]

In [None]:
t_run, r_run = get_rate(peaks_bg_filt['time'], 8e7)
t_sn, r_sn = get_rate(signal_filt['time'], 8e7)

plt.plot(t_run, r_run, label='BG Rates')
plt.plot(t_sn, r_sn, label='SN signal', lw=3);

Lower level data? Pulses? We should be able to trigger as we read the data. <br>

I think my current rate scan takes discrete steps, but maybe I should move finer? I mean, take 0.5sec steps, compute the rate in the next 5 seconds. That should be more efficient.

In [None]:
test_bg_data.keys()

In [None]:
time_of_peak = t_run[np.where(r_run == r_run.max())[0][0]]
test_bg_data = peaks_bg_filt.copy()
test_bg_data = test_bg_data[(test_bg_data['time'] < time_of_peak*1.05)&(test_bg_data['time'] > time_of_peak*.95)]
plt.hist(test_bg_data['area'], bins=50);

In [None]:
plt.hist(peaks_bg_filt['n_competing_left'], 50, range=(0,600));

In [None]:
plt.hist(peaks_bg_filt['t_to_nearest_peak'][peaks_bg_filt['t_to_nearest_peak']>0], 50, range=(0,4000));

In [None]:
st_runs.data_info('peaks')

In [None]:
st_runs.data_info('raw_records')

In [None]:
a, b = s2_runs['range_90p_area'], s2_runs['area_fraction_top']
c, d = s2_peaks['range_90p_area'], s2_peaks['area_fraction_top']
# a = a[(a>0) & (b>0)]
# b = b[(a>0) & (b>0)]
plt.hist2d(a,b, bins=(200,200), norm=LogNorm()); # , range=((0, 0.01e6),(0,1.3))
plt.hist2d(c,d, bins=(200,200), norm=LogNorm(), cmap='Reds'); # , range=((0, 0.01e6),(0,1.3))
plt.xlabel('90p width')
plt.ylabel('aft')

In [None]:
plt.hist(peaks_run['range_90p_area'], 50, range=(0,2e3));

In [None]:
plt.hist(peaks_run['range_90p_area'][peaks_run['type']==1], 50, range=(0,2e3), histtype='step', color='red');
plt.hist(peaks_run['range_90p_area'][peaks_run['type']==2], 50, range=(0,2e3), histtype='step', color='blue');
plt.hist(peaks_run['range_90p_area'], 50, range=(0,2e3), histtype='step', color='k');

In [None]:
bins = 200
plt.hist2d(np.log10(s2_runs['area'][(s2_runs['area']>0)&(s2_runs['range_50p_area']>0)]), 
           np.log10(s2_runs['range_50p_area'][(s2_runs['range_50p_area']>0)&(s2_runs['area']>0)]), 
           bins=(bins,bins), norm=LogNorm(), cmap='Reds');
plt.hist2d(np.log10(s2_peaks['area']), np.log10(s2_peaks['range_50p_area']), bins=(bins,bins), norm=LogNorm(), alpha=0.7);
plt.xscale('log') ; plt.yscale('log')
plt.xlabel('log(S2 area)'); plt.ylabel('log(S2 width)');