# Making a Drift Model Analysis Class

In [None]:
""" Write an introduction to this module and explain what it should be used for.
"""

In [1]:
import numpy as np
import time as time
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import uproot

In [14]:
def process_data(file_name, num_events, all_events=False, verbose=True, name='events.csv', index=False):
    """ This function takes UDD format data (root file) and converts it to a pandas dataframe style csv file.

    :param file_name: Raw (commissioning) data, must be a root file.
    :type file_name: str
    :param num_events: Integer that indicates how many events you would like to process. 100 events takes about an hour to run. 
    :type num_events: int
    :param all_events: Option to process all events. WARNING: this function takes a long time to run. If you choose all_events=True, the 
                       num_events parameter will become obsolete.
    :type all_events: bool, optional
    :param verbose: Indicates whether you would like the function to print "n of N complete" after each event has been processed. This will
                    also print the time it takes for the function to run. 
    :type verbose: bool, optional
    :param name: Determines what the resulting csv file will be called, default is 'events.csv'. 
    :type name: str, optional
    :param index: Default of False does not include the index as a column in the csv file.
    :type index: bool, optional
    """
    
    start = tm.time()

    data = uproot.open(file)
    id_branch = data['SimData;1']['digitracker.id'].array(library='np')

    if all_events is True:
        n_events = len(id_branch)
    else:
        n_events = num_events

    # initializing lists to be added as data in the final dataframe
    exceptions = []
    event_nums = []
    ids = []
    sides = []
    layers = []
    columns = []
    R0s = []
    trigger_times = []

    # loop to add all the datapoints into entries in the lists -- converts the root 'branch' format to a single
    # list for each variable (so different events no longer have separate arrays)
    # test_events determines the number of events to convert into dataframe format
    for n in n_events:
        # indicates how many hits occur in the given event
        id_list = np.array(id_branch[n], dtype='int')
        num_hits = len(id_list)

        # calling information from the data file -- one array per variable per event
        ev_sides = data['SimData;1']['digitracker.side'].array(library='np')[n]  # French side = 1, Italian = 0
        ev_layers = data['SimData;1']['digitracker.layer'].array(library='np')[n]  # Layer 0 near source foil, layer 8 by calo 
        ev_columns = data['SimData;1']['digitracker.column'].array(library='np')[n]  # Column 0 on mountain side, col 112 on tunnel side
        ev_R0s = data['SimData;1']['digitracker.anodetimestampR0'].array(library='np')[n]  # R0 timestamps from anode (clock ticks), 
                                                                                           #  tail end of drift time
        ev_ids = data['SimData;1']['digitracker.id'].array(library='np')[n]  # Hit IDs 

        # appending data to respective lists
        # use .extend() to append lists rather than individual items
        event_nums.extend([n] * num_hits)  # all hits in each iteration should have the same event ID, so add num_hits IDs to the list
        trigger_times.extend([(data['SimData;1']['digicalo.timestamp'].array(library='np')[n][0] - 44)/2] * num_hits)  # using the first 
        # calorimeter timestamp arbitrarily as the trigger time. The calorimeter clock runs twice as fast as the tracker clock, so divide 
        # the timestamp by 2 and include an offset of - 44 ticks due to 125 ns offset of calorimeter timestamp compared to the signal peak
        sides.extend(ev_sides)
        layers.extend(ev_layers)
        columns.extend(ev_columns)
        R0s.extend([r for [r] in ev_R0s])  # this format since ev_R0s is of the form [[#], [#], ...], so need to get rid of extra brackets
        ids.extend(ev_ids)
        
        if verbose is True:
            print(n + 1, 'of', n_events, 'complete')

    event_df = pd.DataFrame(data={'Event':event_nums, 'ID':ids, 'Side':sides, 'Layer':layers, 'Column':columns, 'R0':R0s, 
                                  'Trigger_times':trigger_times})
    event_df.to_csv(name, index=index)
    
    end = tm.time()
    
    if verbose is True:
        print('Runtime:', (end - start)/60, 'minutes')

In [13]:
class AnalyzeDrift():
    """ What does this class do?
    
    :param file_name: description of parameter
    :type file_name: str
    :param index:
    :type index:
    """
    
    def __init__(self, file_name, index=42):
        
        # Dictionaries!
        
        # ab_vals contains information on the parameters a and b that determine radius calculation and the value
        # tx that gives the threshold for a particle being in the 'inner' or 'outer' region of the cell. a, b, and tx
        # are pressure-dependent, which is why each entry has three tuples of three values.
        
        # 'entry' : [(a_850, b_850, tx_850), (a_880, b_880, tx_880), (a_910, b_910, tx_910)]
        
        ab_vals = {'center_in': [(8.28, -0.9, 2.95), (8.53, -0.9, 2.97), (8.77, -0.9, 3.07)],
                   'center_out': [(3.86, -1.99, 2.95), (4.19, -1.93, 2.97), (4.55, -1.9, 3.06)],
                   'edge_in': [(8.05, -0.9, 3.73), (8.35, -0.92, 4.15), (8.56, -0.9, 4.12)],
                   'edge_out': [(3.34, -2.04, 3.73), (3.39, -2.07, 4.15), (4.03, -1.91, 4.12)],
                   'corner_in': [(7.66, -0.87, 3.34), (7.92, -0.87, 3.45), (8.16, -0.87, 3.59)],
                   'corner_out': [(5.18, -1.4, 3.34), (4.94, -1.48, 3.45), (5.25, -1.45, 3.59)]
                  }
        
        # add some data prep if needed
            # calculate drift time and radius here and add it to the dataframe automatically
            # maybe add options for more complex radius calculations?
        
        # list attributes below (initialize with = None if needed)
        self.original_df = df
        self.drift_df = None
    
    #####################################################################################################################################
    
    def define_io(self, t_drift, tx):
        """ Defines whether the particle is in the inner or outer section of the cell.

        :param t_drift: The measured drift time (t1 - t0)
        :type t_drift: float
        :param tx: The reference value of t
        :type t_drift: float
        """
        
        if t_drift > tx:
            inner = False
        else:
            inner = True
        return inner

    def find_params(self, inner, region, pressure):
        """ Consults the ab_vals dictionary to determine a and b parameter values.
        
        :param inner: Defines where the particle is within the cell.
        :type inner: bool
        :param region: Defines the cell region as 'center', 'edge', or 'corner'.
        :type region: str
        :param pressure: Defines the pressure within the tracker (850, 880, or 910).
        :type pressure: float
        """
        cell_type = region

        if inner is True: 
            cell_type.append('_in')
        else:
            cell_type.append('_out')

        if pressure == 850:
            n = 0
        if pressure == 880:
            n = 1
        if pressure == 910:
            n = 2 

        params = ab_vals(cell_type[n])
        a = params[0]
        b = params[1]
        tx = params[2]

        return params

    # since defining the above function, can remove cell_type and pressure from calc_radius and just reference find_params instead

    def calc_radius(self, t_drift, cell_type='center_in', pressure=880):
        """ Calculates the radius of the particle based on drift time and cell conditions.
        
        :param t_drift: The measured drift time (t1 - t0)
        :type t_drift: float
        :param cell_type: indicates what type of cell the particle is in, references an entry in the ab_vals dictionary.
        :type cell_type: str, optional
        :param pressure: indicates the pressure in the tracking chamber, determines which tuple in the cell type dictionary 
                         entry should be used to define a and b.
        :type pressure: float, optional
        """
        if pressure == 850:
            n = 0
        if pressure == 880:
            n = 1
        if pressure == 910:
            n = 2

        params = ab_vals[cell_type][n]
        a = params[0]
        b = params[1]

        rad = (t_drift / a)**(1 / (1 + 0.9))
        return rad
