In [166]:
import numpy as np
import pandas as pd
pd.options.display.max_rows = 4000
import datetime as dt
import re

# Data Cleaning

Read in the text files with the timestamps and triggers

In [167]:
flags1 = pd.read_csv('akram_files/timing_test_events1.txt', delimiter = '\t',\
                   header = None, names = ['pin_num', 'label', 'date', 'time'])
flags2 = pd.read_csv('akram_files/timing_test_events2.txt', delimiter = '\t',\
                   header = None, names = ['pin_num', 'label', 'date', 'time'])

Combine the two text files

In [168]:
flags = pd.concat([flags1, flags2])
flags.tail()

Unnamed: 0,pin_num,label,date,time
410,sync summary event # 6,describe_trial_choice_start,03/06/2023,04:06:46.555 PM
411,sync summary event # 128,async summary event 128,03/06/2023,04:06:46.586 PM
412,sync summary event # 10,describe_trial_choice_end,03/06/2023,04:06:47.732 PM
413,sync summary event # 14,fixation,03/06/2023,04:06:48.767 PM
414,sync summary event # 17,end_experiment,03/06/2023,04:06:49.810 PM


## Deal with Time

Transform the timestamps into Timestamp objects

In [169]:
flags['time'] = pd.to_datetime(flags['time'])
flags.head()

Unnamed: 0,pin_num,label,date,time
0,BioLab,Acquisition Started,03/06/2023,2023-03-16 15:35:06.294
1,sync summary event # 13,start_experiment,03/06/2023,2023-03-16 15:35:17.438
2,sync summary event # 128,async summary event 128,03/06/2023,2023-03-16 15:35:17.469
3,sync summary event # 2,describe_trial_start,03/06/2023,2023-03-16 15:35:26.346
4,sync summary event # 128,async summary event 128,03/06/2023,2023-03-16 15:35:26.417


Create a new variable with the time in seconds with decimal places down to microseconds

In [170]:
microsec = []
for row in flags.time:
    microsec.append(float(row.strftime('%S.%f')))
flags['sec_microsec'] = microsec
flags.head()

Unnamed: 0,pin_num,label,date,time,sec_microsec
0,BioLab,Acquisition Started,03/06/2023,2023-03-16 15:35:06.294,6.294
1,sync summary event # 13,start_experiment,03/06/2023,2023-03-16 15:35:17.438,17.438
2,sync summary event # 128,async summary event 128,03/06/2023,2023-03-16 15:35:17.469,17.469
3,sync summary event # 2,describe_trial_start,03/06/2023,2023-03-16 15:35:26.346,26.346
4,sync summary event # 128,async summary event 128,03/06/2023,2023-03-16 15:35:26.417,26.417


## Isolate Triggers

Label the photo trigger

In [171]:
flags['label'] = flags['label'].replace('async summary event 128', 'photo_trigger')
flags.head()

Unnamed: 0,pin_num,label,date,time,sec_microsec
0,BioLab,Acquisition Started,03/06/2023,2023-03-16 15:35:06.294,6.294
1,sync summary event # 13,start_experiment,03/06/2023,2023-03-16 15:35:17.438,17.438
2,sync summary event # 128,photo_trigger,03/06/2023,2023-03-16 15:35:17.469,17.469
3,sync summary event # 2,describe_trial_start,03/06/2023,2023-03-16 15:35:26.346,26.346
4,sync summary event # 128,photo_trigger,03/06/2023,2023-03-16 15:35:26.417,26.417


Remove irrelevant triggers

In [172]:
def clean_the_triggers(df):
    ''' 
    Keep only rows with triggers that are followed by a photo trigger.

    Arguments:
        df: dataframe with triggers

    Returns: dataframe with triggers that have corresponding
        photo triggers only.
    '''

    clean_triggers = pd.DataFrame({'pin_num': [], 'label': [], 'date': [], \
                                   'time': [], 'sec_microsec': []})
    for loc in range(0, df.shape[0] - 1):
        if df.iloc[loc + 1].label == 'photo_trigger':
            clean_triggers = pd.concat([clean_triggers, df.iloc[[loc, loc+1],]])
    return clean_triggers

In [173]:
clean_triggers = clean_the_triggers(flags)
clean_triggers.head()

Unnamed: 0,pin_num,label,date,time,sec_microsec
1,sync summary event # 13,start_experiment,03/06/2023,2023-03-16 15:35:17.438,17.438
2,sync summary event # 128,photo_trigger,03/06/2023,2023-03-16 15:35:17.469,17.469
3,sync summary event # 2,describe_trial_start,03/06/2023,2023-03-16 15:35:26.346,26.346
4,sync summary event # 128,photo_trigger,03/06/2023,2023-03-16 15:35:26.417,26.417
5,sync summary event # 6,describe_trial_choice_start,03/06/2023,2023-03-16 15:35:36.385,36.385


In [174]:
clean_triggers['label'].unique()

array(['start_experiment', 'photo_trigger', 'describe_trial_start',
       'describe_trial_choice_start', 'imagine_trial_start',
       'imagine_trial_choice_start', 'describe_trial_choice_end',
       'describe_writeout_start', 'describe_writeout_choice_start',
       'imagine_writeout_start', 'imagine_writeout_choice_start',
       'imagine_writeout_choice_end', 'describe_writeout_choice_end'],
      dtype=object)

# Timing Test

Helper functions

In [175]:
def subset_triggers(df, label_of_interest):
    '''
    Pull out rows from original df with
      triggers of interest and their corresponding photo triggers.
    
    Arguments:
        df: dataframe with all the triggers
        label_of_interest: trigger label to pull out.

    Returns:
        Dataframe with the data for the trigger of interest and 
        their corresponding photo triggers.
    '''
    subset_df = pd.DataFrame({'pin_num': [], 'label': [], 'date': [], \
                              'time': [], 'sec_microsec': []})
    for loc in range(0, df.shape[0] - 1):
        if df.iloc[loc].label == label_of_interest:
            subset_df = pd.concat([subset_df, df.iloc[[loc, loc+1],]])

    return subset_df

In [176]:
def find_stats(df):
    ''' 
    Find the standard deviation of the differences between the triggers and the 
    corresponding photo triggers.

    Arguments:
        df: dataframe with only one trigger type
    
    Returns: standard deviation (float) of the differences.
    '''

    # compute the differences
    diffs = []
    for loc in range(0, df.shape[0] - 1):
        if df.iloc[loc].label != "photo_trigger" and \
            df.iloc[loc + 1].label == "photo_trigger":
            diff = df.iloc[loc + 1].sec_microsec- df.iloc[loc].sec_microsec
            diffs.append(diff)
    
    # compute the mean and sd
    M = np.mean(diffs)
    SD = np.std(diffs)

    return (M, SD)

## Find Means and SDs for each trigger type

In [177]:
labels = ['describe_trial_start',
       'describe_trial_choice_start', 'imagine_trial_start',
       'imagine_trial_choice_start',
       'describe_writeout_start', 'describe_writeout_choice_start',
       'imagine_writeout_start', 'imagine_writeout_choice_start']
means = {}
SDs = {}
for label in labels:
    temp_df = subset_triggers(clean_triggers, label)
    M, SD = find_stats(temp_df)
    means[label] = M
    SDs[label] = SD

In [178]:
means

{'describe_trial_start': 0.08198214285714253,
 'describe_trial_choice_start': 0.03838181818181807,
 'imagine_trial_start': 0.04566666666666659,
 'imagine_trial_choice_start': 0.03745614035087713,
 'describe_writeout_start': 0.06533333333333251,
 'describe_writeout_choice_start': 0.03933333333333322,
 'imagine_writeout_start': 0.03554545454545422,
 'imagine_writeout_choice_start': 0.03737500000000038}

In [179]:
SDs

{'describe_trial_start': 0.02141052067111329,
 'describe_trial_choice_start': 0.007966956551545239,
 'imagine_trial_start': 0.009205896915113546,
 'imagine_trial_choice_start': 0.00879806632550677,
 'describe_writeout_start': 0.017518244457960984,
 'describe_writeout_choice_start': 0.010241527663825562,
 'imagine_writeout_start': 0.00857441899821914,
 'imagine_writeout_choice_start': 0.009860749210886038}