### Import and config setting

In [None]:
import beamtest_analysis_helper as helper
import datetime
from pathlib import Path
import pandas as pd
import numpy as np
from glob import glob
from natsort import natsorted
from collections import defaultdict
import hist
import matplotlib.pyplot as plt
import mplhep as hep
from tqdm.notebook import tqdm
hep.style.use('CMS')

In [None]:
chip_labels = [0, 1, 2, 3]
chip_names = ["ET2_EPIR_Pair1", "ET2_BAR_4", "ET2_BAR_6", "ET2_CNM_1-3"]
high_voltages = [260, 260, 260, 200]

run_names = ["run31", "run32"]
run_info = "cover_off_offset15"
offsets = [15, 15, 15, 15]

# run_names = ["run38", "run39"]
# run_info = "cover_off_offset10"
# offsets = [15, 10, 15, 10]

chip_fignames = chip_names
chip_figtitles = [
    f"(Trigger) EPIR Pair1 HV{high_voltages[0]}V OS:{offsets[0]}",
    f"(DUT1) Barcelona 4 HV{high_voltages[1]}V OS:{offsets[1]}",
    f"(Reference) Barcelona 6 HV{high_voltages[2]}V OS:{offsets[2]}",
    f"(DUT2) CNM (HPK Sensor) 1-3 HV{high_voltages[3]}V OS:{offsets[3]}"
]

board_to_analyze = [0,1,2]
ignore_boards = [3]
combi_tag = "dut1"
toa_cut = [100, 500]

columns_to_read = ['evt', 'board', 'row', 'col', 'toa', 'tot', 'cal']

### Clean up bad pixels

In [None]:
track_df = pd.read_csv(f"./track_selection/{run_info}_good_track_candidates_track2k_{combi_tag}_first2hours.csv")
track_df.info()

In [None]:
bad_pix_df = pd.read_pickle(f'./track_selection/bad_pixels_{run_info}.pkl')
bad_pix_df.info()

In [None]:
masks = []

for idx, irow in bad_pix_df.iterrows():
    if irow['board_id'] not in board_to_analyze:
        continue

    mask = (
        (track_df[f"row_{irow['board_id']}"] == irow['row']) & (track_df[f"col_{irow['board_id']}"] == irow['col'])
    )
    masks.append(mask)

combined_mask = pd.concat(masks, axis=1).any(axis=1)
clean_track_df = track_df[~combined_mask].reset_index(drop=True)

del masks, combined_mask, track_df, bad_pix_df
clean_track_df.info()

### Load files and preprocessing

In [None]:
files = []
for rn in run_names:
    files += natsorted(glob(f'./desy_TB_analyze/{run_info}/desy_TB_{rn}/*feather'))
print(files[:2])
print(files[-2:])
print('Total:', len(files))

In [None]:
total_df = pd.DataFrame()
last_evt = 0

for idx, ifile in enumerate(tqdm(files)):
    run_df = pd.read_feather(ifile, columns=columns_to_read)
    run_df = helper.singlehit_event_clear(run_df, ignore_boards=ignore_boards)

    run_df['evt'] = run_df['evt'].astype('category').cat.codes.astype('int64')
    run_df['evt'] = run_df['evt'].astype('uint64')

    if idx > 0:
        run_df['evt'] += last_evt

    last_evt += run_df['evt'].unique()[-1]

    total_df = pd.concat([total_df, run_df])
    total_df['evt'] = total_df['evt'].astype('uint64')
    del run_df

### Find parameters for diagonal cut

In [None]:
merged_df = pd.merge(total_df,
                     bad_pix_df[bad_pix_df['board_id'].isin(board_to_analyze)][['board_id', 'row', 'col']],
                     how='left',
                     left_on=['board', 'row', 'col'],
                     right_on=['board_id', 'row', 'col'],
                     indicator=True)

clean_hit_df = merged_df[merged_df['_merge'] == 'left_only'].drop(columns=['board_id', '_merge']).reset_index(drop=True)
del merged_df

event_board_counts = clean_hit_df.groupby(['evt', 'board']).size().unstack(fill_value=0)
event_selection_col = None

trig_selection = (event_board_counts[0] == 1)
ref_selection = (event_board_counts[2] == 1)
event_selection_col = trig_selection & ref_selection

sub_clean_hit_df = clean_hit_df[clean_hit_df['evt'].isin(event_board_counts[event_selection_col].index)]
sub_clean_hit_df.reset_index(inplace=True, drop=True)
del clean_hit_df, event_board_counts, event_selection_col, trig_selection, ref_selection

## Selecting good hits
tdc_cuts = {}
for idx in board_to_analyze:
    # board ID: [CAL LB, CAL UB, TOA LB, TOA UB, TOT LB, TOT UB]
    if idx == 0:
        tdc_cuts[idx] = [sub_clean_hit_df.loc[sub_clean_hit_df['board'] == idx]['cal'].mode()[0]-50, sub_clean_hit_df.loc[sub_clean_hit_df['board'] == idx]['cal'].mode()[0]+50,  100, 500, 0, 600]
    else:
        tdc_cuts[idx] = [sub_clean_hit_df.loc[sub_clean_hit_df['board'] == idx]['cal'].mode()[0]-50, sub_clean_hit_df.loc[sub_clean_hit_df['board'] == idx]['cal'].mode()[0]+50,  0, 1100, 0, 600]

filtered_df = helper.tdc_event_selection(sub_clean_hit_df, tdc_cuts_dict=tdc_cuts)
del sub_clean_hit_df

params = np.polyfit(filtered_df.loc[filtered_df['board'] == 0]['toa'].reset_index(drop=True), filtered_df.loc[filtered_df['board'] == 2]['toa'].reset_index(drop=True), 1)
del filtered_df

In [None]:
track_pivots = defaultdict(list)

for itrack in tqdm(range(clean_track_df.shape[0])):

    ## Filter only the pixels of interest, dropping other hits on the boards of interest as well as boards not of interest
    pix_dict = {}
    for idx in board_to_analyze:
        pix_dict[idx] = [clean_track_df.iloc[itrack][f'row_{idx}'], clean_track_df.iloc[itrack][f'col_{idx}']]

    track_tmp_df = helper.pixel_filter(total_df, pix_dict)

    ## Selecting good hits with TDC cuts
    tdc_cuts = {}
    for idx in board_to_analyze:
        # board ID: [CAL LB, CAL UB, TOA LB, TOA UB, TOT LB, TOT UB]
        if idx == 0:
            tdc_cuts[idx] = [track_tmp_df.loc[track_tmp_df['board'] == idx]['cal'].mode()[0]-3, track_tmp_df.loc[track_tmp_df['board'] == idx]['cal'].mode()[0]+3,  toa_cut[0], toa_cut[1], 0, 600]
        else:
            tdc_cuts[idx] = [track_tmp_df.loc[track_tmp_df['board'] == idx]['cal'].mode()[0]-3, track_tmp_df.loc[track_tmp_df['board'] == idx]['cal'].mode()[0]+3,  0, 1100, 0, 600]

    track_tmp_df = helper.tdc_event_selection(track_tmp_df, tdc_cuts_dict=tdc_cuts)

    x = track_tmp_df.loc[track_tmp_df['board'] == 0]['toa'].reset_index(drop=True)
    y = track_tmp_df.loc[track_tmp_df['board'] == 2]['toa'].reset_index(drop=True)
    distance = (x*params[0] - y + params[1])/(np.sqrt(params[0]**2 + 1))
    evts = track_tmp_df.loc[track_tmp_df['board'] == 0].reset_index(drop=True)[distance < 3.*np.std(distance)]['evt'].unique()

    track_tmp_df = track_tmp_df.loc[track_tmp_df['evt'].isin(evts)]

    ## Pivot Table to make tracks
    pivot_table = track_tmp_df.pivot(index=["evt"], columns=["board"], values=["row", "col", "toa", "tot", "cal"])
    track_pivots[itrack].append(pivot_table)
    del track_tmp_df, pivot_table

### Save track pivot tables

In [None]:
save_dir = Path(f'./{run_info}_track_pivot_tables_{combi_tag}_TOA{toa_cut[0]}to{toa_cut[1]}_TOA_CorrelationCut')
save_dir.mkdir(exist_ok=True)

for key, val in tqdm(track_pivots.items()):
    r0 = clean_track_df.iloc[key][f'row_{board_to_analyze[0]}']
    c0 = clean_track_df.iloc[key][f'col_{board_to_analyze[0]}']
    r1 = clean_track_df.iloc[key][f'row_{board_to_analyze[1]}']
    c1 = clean_track_df.iloc[key][f'col_{board_to_analyze[1]}']
    r2 = clean_track_df.iloc[key][f'row_{board_to_analyze[2]}']
    c2 = clean_track_df.iloc[key][f'col_{board_to_analyze[2]}']
    fname = save_dir / f"track_{chip_names[board_to_analyze[0]]}_R{r0}C{c0}_{chip_names[board_to_analyze[1]]}_R{r1}C{c1}_{chip_names[board_to_analyze[2]]}_R{r2}C{c2}.pkl"
    val[0].to_pickle(fname)

### Now you don't need a full dataframe since you're using pivot table (Free Memory!!)

In [None]:
del total_df

### Let's do bootstrapping

In [None]:
final_dict = defaultdict(list)

for ikey, itable in tqdm(track_pivots.items()):
    sum_arr = defaultdict(float)
    sum_square_arr = defaultdict(float)
    iteration = 100
    sampling_fraction = 0.75
    counter = 0

    for iloop in tqdm(range(iteration)):

        tdc_filtered_df = itable[0].reset_index()

        n = int(sampling_fraction*tdc_filtered_df.shape[0])
        indices = np.random.choice(tdc_filtered_df['evt'].unique(), n, replace=False)
        tdc_filtered_df = tdc_filtered_df.loc[tdc_filtered_df['evt'].isin(indices)]

        if tdc_filtered_df.shape[0] < iteration/(3.*(1-sampling_fraction)):
            print('Warning!! Sampling size is too small. Skipping this track')
            break

        d = {
            'evt': tdc_filtered_df['evt'].unique(),
        }

        for idx in board_to_analyze:
            bins = 3.125/tdc_filtered_df['cal'][idx].mean()
            d[f'toa_b{str(idx)}'] = 12.5 - tdc_filtered_df['toa'][idx] * bins
            d[f'tot_b{str(idx)}'] = (2*tdc_filtered_df['tot'][idx] - np.floor(tdc_filtered_df['tot'][idx]/32)) * bins

        df_in_time = pd.DataFrame(data=d)
        del d, tdc_filtered_df

        if(len(board_to_analyze)==3):
            corr_toas = helper.three_board_iterative_timewalk_correction(df_in_time, 5, 3, board_list=board_to_analyze)
        elif(len(board_to_analyze)==4):
            corr_toas = helper.four_board_iterative_timewalk_correction(df_in_time, 5, 3)
        else:
            print("You have less than 3 boards to analyze")
            break

        diffs = {}
        for board_a in board_to_analyze:
            for board_b in board_to_analyze:
                if board_b <= board_a:
                    continue
                name = f"{board_a}{board_b}"
                diffs[name] = np.asarray(corr_toas[f'toa_b{board_a}'] - corr_toas[f'toa_b{board_b}'])
        hists = {}
        for key in diffs.keys():
            hists[key] = hist.Hist(hist.axis.Regular(80, -1.2, 1.2, name="TWC_delta_TOA", label=r'Time Walk Corrected $\Delta$TOA [ns]'))
            hists[key].fill(diffs[key])

        try:
            fit_params_lmfit = {}
            for key in hists.keys():
                params = helper.lmfit_gaussfit_with_pulls(diffs[key], hists[key], std_range_cut=0.4, width_factor=1.25, fig_title='',
                                                    chipNames='', use_pred_uncert=True, no_show_fit=False, no_draw=True, get_chisqure=False)
                fit_params_lmfit[key] = params
            del params, hists, diffs, corr_toas

            if(len(board_to_analyze)==3):
                resolutions = helper.return_resolution_three_board(fit_params_lmfit, var=list(fit_params_lmfit.keys()), board_list=board_to_analyze)
            elif(len(board_to_analyze)==4):
                resolutions = helper.return_resolution_four_board(fit_params_lmfit)
            else:
                print("You have less than 3 boards to analyze")
                break

            if any(np.isnan(val) for key, val in resolutions.items()):
                print('fit results is not good, skipping this iteration')
                continue

            for key in resolutions.keys():
                sum_arr[key] += resolutions[key]
                sum_square_arr[key] += resolutions[key]**2

            counter += 1

        except Exception as inst:
            print(inst)
            del hists, diffs, corr_toas

    if counter != 0:
        for idx in board_to_analyze:
            final_dict[f'row{idx}'].append(itable[0]['row'][idx].unique()[0])
            final_dict[f'col{idx}'].append(itable[0]['col'][idx].unique()[0])

        for key in sum_arr.keys():
            mean = sum_arr[key]/counter
            std = np.sqrt((1/(counter-1))*(sum_square_arr[key]-counter*(mean**2)))
            final_dict[f'res{key}'].append(mean)
            final_dict[f'err{key}'].append(std)
    else:
        print('Track is not validate for bootstrapping')

In [None]:
final_df = pd.DataFrame(final_dict)

In [None]:
csv_tag = "_trigTOA100_500_oneHitTrigDut1Ref_track2k_first2hours"
# csv_tag = "_trigTOA100_500_oneHitTrigDut2Ref_track2k_first2hours"
# csv_tag = "_trigTOA100_500_oneHitTrigDut1Dut2Ref_track2k_first2hours"

In [None]:
final_df.to_csv(f'./{run_info}{csv_tag}_resolutions.csv', index=False)