In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [97]:
calibration_folder="../../data/calibration"
data_folder="../../data/runData"
run_n=300

In [3]:
layers=np.arange(1,20)
strips=np.arange(0,12)

In [13]:
def drop_columns(df):
    kept_columns=['layer', 'strip', 'pedestal', 'pf_event', 'adc_sum_end0', 'adc_sum_end1', 'end', 'mpv']
    df=df[df.columns.intersection(kept_columns)]
    return df

In [14]:
def import_data(n):
    pedestals=drop_columns(pd.read_csv(calibration_folder+"/pedestals.csv", sep=','))
    mips=drop_columns(pd.read_csv(calibration_folder+"/mip.csv", sep=','))
    run=drop_columns(pd.read_csv(data_folder+"/run_"+str(n)+".csv", sep=','))
    return pedestals,mips,run

In [5]:
def choose_bar(df,layer, strip):
    df=df[df["strip"]==strip]
    df=df[df["layer"]==layer]
    return df

In [6]:
def choose_layer(df,layer):
    df=df[df['layer']==layer]
    return df

In [109]:
#uses the 1.2x pedestal criteria to remove "false" events. essentially cut 0
#I don't think it should have a switch here for other methods - somewhere else there needs to be one though!
def confirm_events(df):
    # can probably get rid of the loop, but then we would have to split the main df to have ends in different rows. annoying.
    confirmed_data=[]
    for layer in layers:
        for strip in strips:
            df_slice=choose_bar(df,layer,strip)
            pedestal_slice=choose_bar(ped,layer,strip)
            if not pedestal_slice.empty:
                df_slice=df_slice[df_slice["adc_sum_end0"]>1.2*pedestal_slice.iloc[0,-1]] # there could be a switch whether we want to
                df_slice=df_slice[df_slice["adc_sum_end1"]>1.2*pedestal_slice.iloc[1,-1]] # require both ends to register the hit
            confirmed_data.extend(df_slice.values.tolist())
    confirmed_df=pd.DataFrame(confirmed_data, columns=['event', 'adc_sum_end0', 'layer', 'strip', 'adc_sum_end1'])
    events_left=confirmed_df.event.unique()
    print("Initial pedestal-based selection performed.")
    print("Events left: "+str(len(events_left)))
    print("\n")
    return confirmed_df

In [110]:
def first_layer_cut(df):
    helper_df=df[df['layer']==1].groupby(['event']).sum()
    events_to_remove=helper[helper['layer']>1].index.values.tolist()
    df=df[~df['event'].isin(events_to_remove)]
    
    events_left=df.event.unique()
    print("Events with multiple hits in first layer removed.")
    print("Events left: "+str(len(events_left)))
    print("\n")
    return df

In [111]:
def back_layer_cut(df, b_num):
    events_to_remove=df[df['layer']>19-b_num].groupby(['event']).count().index.values.tolist()
    df=df[~df['event'].isin(events_to_remove)]
    
    events_left=df.event.unique()
    print("Events with hits in the back " +str(b_num)+" layers removed.")
    print("Events left: "+str(len(events_left)))
    print("\n")
    return df

In [115]:
def select_events(df,cut_first_layer=True, cut_back_layers=True, back_layers=7):
    print("Number of events: "+str(len(df.pf_event.unique())))
    print("\n")
    df=confirm_events(df)
    if cut_first_layer==True:
        df=first_layer_cut(df)
    if cut_back_layers==True:
        df=back_layer_cut(df,back_layers)
    return df

In [116]:
%%time
ped, mip, ru=import_data(run_n)

CPU times: user 11.2 s, sys: 1.99 s, total: 13.2 s
Wall time: 13.3 s


In [117]:
%%time
ru=select_events(ru)

Number of events: 71416


Initial pedestal-based selection performed.
Events left: 71414


Events with multiple hits in first layer removed.
Events left: 66783


Events with hits in the back 7 layers removed.
Events left: 38453


CPU times: user 11.5 s, sys: 1.39 s, total: 12.9 s
Wall time: 12.9 s
