In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [3]:
def import_and_select(calibration_folder, data_folder, run_n):
    pedestals, mips, run=import_data(calibration_folder, data_folder, run_n)
    run=select_events(run, pedestals)
    return run

In [4]:
def drop_columns(df):
    kept_columns=['layer', 'strip', 'pedestal', 'pf_event', 'adc_sum_end0', 'adc_sum_end1', 'end', 'mpv']
    df=df[df.columns.intersection(kept_columns)]
    return df

In [5]:
def import_data(calibration_folder, data_folder, run_n):
    pedestals=drop_columns(pd.read_csv(calibration_folder+"/pedestals.csv", sep=','))
    mips=drop_columns(pd.read_csv(calibration_folder+"/mip.csv", sep=','))
    run=drop_columns(pd.read_csv(data_folder+"/run_"+str(run_n)+".csv", sep=','))
    return pedestals,mips,run

In [6]:
def choose_bar(df,layer, strip):
    df=df[df["strip"]==strip]
    df=df[df["layer"]==layer]
    return df

In [7]:
def choose_layer(df,layer):
    df=df[df['layer']==layer]
    return df

In [8]:
#uses the 1.2x pedestal criteria to remove "false" events. essentially cut 0
#I don't think it should have a switch here for other methods - somewhere else there needs to be one though!
def confirm_events(df, pedestals):
    # can probably get rid of the loop, but then we would have to split the main df to have ends in different rows. annoying.
    confirmed_data=[]
    layers=np.arange(1,20)
    strips=np.arange(0,12)
    for layer in layers:
        for strip in strips:
            df_slice=choose_bar(df,layer,strip)
            pedestal_slice=choose_bar(pedestals,layer,strip)
            if not pedestal_slice.empty:
                df_slice=df_slice[df_slice["adc_sum_end0"]>1.2*pedestal_slice.iloc[0,-1]] # there could be a switch whether we want to
                df_slice=df_slice[df_slice["adc_sum_end1"]>1.2*pedestal_slice.iloc[1,-1]] # require both ends to register the hit
            confirmed_data.extend(df_slice.values.tolist())
    confirmed_df=pd.DataFrame(confirmed_data, columns=['event', 'adc_sum_end0', 'layer', 'strip', 'adc_sum_end1'])
    events_left=confirmed_df.event.unique()
    print("Initial pedestal-based selection performed.")
    print("Events left: "+str(len(events_left)))
    print("\n")
    return confirmed_df

In [50]:
def first_layer_cut(df):
    helper_df=df[df['layer']==1].groupby(['event']).count()
    events_to_remove=helper_df[helper_df['layer']>1].index.values.tolist()
    df=df[~df['event'].isin(events_to_remove)]
    
    events_left=df.event.unique()
    print("Events with multiple hits in first layer removed.")
    print("Events left: "+str(len(events_left)))
    print("\n")
    return df

In [10]:
def back_layer_cut(df, b_num):
    events_to_remove=df[df['layer']>19-b_num].groupby(['event']).count().index.values.tolist()
    df=df[~df['event'].isin(events_to_remove)]
    
    events_left=df.event.unique()
    print("Events with hits in the back " +str(b_num)+" layers removed.")
    print("Events left: "+str(len(events_left)))
    print("\n")
    return df

In [47]:
def select_events(df,pedestals,cut_first_layer=True, cut_back_layers=True, back_layers=5):
    print("Number of events: "+str(len(df.pf_event.unique())))
    print("\n")
    df=confirm_events(df, pedestals)
    if cut_first_layer==True:
        df=first_layer_cut(df)
    if cut_back_layers==True:
        df=back_layer_cut(df,back_layers)
    return df

In [48]:
c_folder="../../data/calibration"
d_folder="../../data/runData"
r_n=300

In [49]:
%%time
clean=import_and_select(c_folder,d_folder,r_n)

Number of events: 71416


Initial pedestal-based selection performed.
Events left: 71414


Events with multiple hits in first layer removed.
Events left: 50834


Events with hits in the back 5 layers removed.
Events left: 41643


CPU times: user 25.6 s, sys: 3.88 s, total: 29.5 s
Wall time: 29.9 s
