# Imports

In [19]:
import pyxdf
import pandas as pd
import numpy as np
import sounddevice as sd
from glob import glob
from tqdm import tqdm
import datetime

# Load Data

In [20]:
sub_files = glob('../*/*.xdf')
sub_files

['../sub-P5318014/sub-P5318014_ses-S001_task-CUNY_run-001_mobi.xdf',
 '../sub-5182010/sub-P5182010_ses-S001_task-CUNY_run-001_mobi.xdf',
 '../sub-P5447527/sub-P5447527_ses-S001_task-CUNY_run-001_mobi.xdf',
 '../sub-5958030/sub-P5958030_ses-S001_task-CUNY_run-001_mobi.xdf',
 '../sub-P5548165/sub-P5548165_ses-S001_task-CUNY_run-001_MOBI.xdf']

In [21]:
xdf_path = sub_files[4]
data, header = pyxdf.load_xdf(xdf_path)
streams_collected = [stream['info']['name'][0] for stream in data]


'''
HELPERS
'''
def get_event_data(event, df, stim_df):
    return df.loc[(df.lsl_time_stamp >= stim_df.loc[stim_df.event == 'Onset_'+event, 'lsl_time_stamp'].values[0]) & 
                  (df.lsl_time_stamp <= stim_df.loc[stim_df.event == 'Offset_'+event, 'lsl_time_stamp'].values[0])]

def get_secs_between_triggers(trigger1, trigger2, stim_df):
    return stim_df.loc[stim_df.trigger == trigger1, 'time'].values[0] - stim_df.loc[stim_df.trigger == trigger2, 'time'].values[0]

                  
streams_collected

['Tobii',
 'Stimuli_Markers',
 'EGI NetAmp 0',
 'OpenSignals',
 'WebcamStream',
 'Microphone']

# Stimulus

In [22]:
stim_dat = data[streams_collected.index('Stimuli_Markers')]
stim_df = pd.DataFrame(stim_dat['time_series'])
stim_df.rename(columns={0: 'trigger'}, inplace=True)
events = {
    200: 'Onset_Experiment',
    10: 'Onset_RestingState',
    11: 'Offset_RestingState',
    500: 'Onset_StoryListening',
    501: 'Offset_StoryListening',
    100: 'Onset_10second_rest',
    101: 'Offset_10second_rest', 
    20: 'Onset_CampFriend',
    21: 'Offset_CampFriend',
    30: 'Onset_FrogDissection',
    31: 'Offset_FrogDissection',
    40: 'Onset_DanceContest',
    41: 'Offset_DanceContest',
    50: 'Onset_ZoomClass',
    51: 'Offset_ZoomClass',
    60: 'Onset_Tornado',
    61: 'Offset_Tornado',
    70: 'Onset_BirthdayParty',
    71: 'Offset_BirthdayParty',
    300: 'Onset_subjectInput',
    301: 'Offset_subjectInput',
    302: 'Onset_FavoriteStory',
    303: 'Offset_FavoriteStory',
    304: 'Onset_WorstStory',
    305: 'Offset_WorstStory',
    400: 'Onset_impedanceCheck',
    401: 'Offset_impedanceCheck',
    80: 'Onset_SocialTask',
    81: 'Offset_SocialTask',
    201: 'Offset_Experiment',
}

story_onsets = [20, 30, 40, 50, 60, 70]

# relabel the event if the trigger is in the events dictionary, else if 
stim_df['event'] = stim_df['trigger'].apply(lambda x: events[x] if x in events.keys() else 'Bx_input')

# relabel the event as a psychopy timestamp if the trigger is greater than 5 digits
stim_df.loc[stim_df.trigger.astype(str).str.len() > 5, 'event'] = 'psychopy_time_stamp'
stim_df['lsl_time_stamp'] = stim_dat['time_stamps']
stim_df['time'] = (stim_dat['time_stamps'] - stim_dat['time_stamps'][0])/1000
stim_df

Unnamed: 0,trigger,event,lsl_time_stamp,time
0,200,Onset_Experiment,690726.668239,0.000000e+00
1,1728397654,psychopy_time_stamp,690726.668251,1.269986e-08
2,10,Onset_RestingState,690748.380707,2.171247e-02
3,1728397676,psychopy_time_stamp,690748.380716,2.171248e-02
4,11,Offset_RestingState,691048.382576,3.217143e-01
...,...,...,...,...
256,1728400158,psychopy_time_stamp,693229.909103,2.503241e+00
257,201,Offset_Experiment,693234.914481,2.508246e+00
258,1728400163,psychopy_time_stamp,693234.914492,2.508246e+00
259,4,Bx_input,693234.914532,2.508246e+00


# Eye Tracking Data

In [23]:
ET = data[streams_collected.index('Tobii')]
et_dat = ET['time_series']
# Get the column names
column_labels = [ET['info']['desc'][0]['channels'][0]['channel'][i]['label'][0] for i in range(len(ET['info']['desc'][0]['channels'][0]['channel']))]

et_df = pd.DataFrame(data=et_dat, columns=column_labels)
et_df['lsl_time_stamp'] = ET['time_stamps']
et_df['time'] = (ET['time_stamps'] - ET['time_stamps'][0])/1000
et_df.columns

Index(['device_time_stamp', 'left_gaze_origin_validity',
       'right_gaze_origin_validity',
       'left_gaze_origin_in_user_coordinate_system_0',
       'left_gaze_origin_in_user_coordinate_system_1',
       'left_gaze_origin_in_user_coordinate_system_2',
       'right_gaze_origin_in_user_coordinate_system_0',
       'right_gaze_origin_in_user_coordinate_system_1',
       'right_gaze_origin_in_user_coordinate_system_2',
       'left_gaze_origin_in_trackbox_coordinate_system_0',
       'left_gaze_origin_in_trackbox_coordinate_system_1',
       'left_gaze_origin_in_trackbox_coordinate_system_2',
       'right_gaze_origin_in_trackbox_coordinate_system_0',
       'right_gaze_origin_in_trackbox_coordinate_system_1',
       'right_gaze_origin_in_trackbox_coordinate_system_2',
       'left_gaze_point_validity', 'right_gaze_point_validity',
       'left_gaze_point_in_user_coordinate_system_0',
       'left_gaze_point_in_user_coordinate_system_1',
       'left_gaze_point_in_user_coordinate_s

In [24]:
et_df

Unnamed: 0,device_time_stamp,left_gaze_origin_validity,right_gaze_origin_validity,left_gaze_origin_in_user_coordinate_system_0,left_gaze_origin_in_user_coordinate_system_1,left_gaze_origin_in_user_coordinate_system_2,right_gaze_origin_in_user_coordinate_system_0,right_gaze_origin_in_user_coordinate_system_1,right_gaze_origin_in_user_coordinate_system_2,left_gaze_origin_in_trackbox_coordinate_system_0,...,left_gaze_point_on_display_area_0,left_gaze_point_on_display_area_1,right_gaze_point_on_display_area_0,right_gaze_point_on_display_area_1,left_pupil_validity,right_pupil_validity,left_pupil_diameter,right_pupil_diameter,lsl_time_stamp,time
0,9.565256e+11,0.0,0.0,,,,,,,,...,,,,,0.0,0.0,,,690617.841304,0.000000
1,9.565256e+11,0.0,0.0,,,,,,,,...,,,,,0.0,0.0,,,690617.849638,0.000008
2,9.565256e+11,0.0,0.0,,,,,,,,...,,,,,0.0,0.0,,,690617.857972,0.000017
3,9.565256e+11,0.0,0.0,,,,,,,,...,,,,,0.0,0.0,,,690617.866305,0.000025
4,9.565256e+11,0.0,0.0,,,,,,,,...,,,,,0.0,0.0,,,690617.874639,0.000033
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187763,9.592304e+11,0.0,0.0,,,,,,,,...,,,,,0.0,0.0,,,693322.612947,2.704772
187764,9.592304e+11,0.0,0.0,,,,,,,,...,,,,,0.0,0.0,,,693322.621280,2.704780
187765,9.592304e+11,0.0,0.0,,,,,,,,...,,,,,0.0,0.0,,,693322.629614,2.704788
187766,9.592304e+11,0.0,0.0,,,,,,,,...,,,,,0.0,0.0,,,693322.637947,2.704797


# Physio Data

In [25]:
PS = data[streams_collected.index('OpenSignals')]
# Get the column names
column_labels = [PS['info']['desc'][0]['channels'][0]['channel'][i]['label'][0] for i in range(len(PS['info']['desc'][0]['channels'][0]['channel']))]
ps_df = pd.DataFrame(data=PS['time_series'], columns=column_labels)
ps_df['lsl_time_stamp'] = PS['time_stamps']
ps_df['time'] = (PS['time_stamps'] - PS['time_stamps'][0])/1000

# Microphone Data 

In [26]:
mic_data = data[streams_collected.index('Microphone')]
mic_df = pd.DataFrame(mic_data['time_series'], columns=['int_array'])
mic_df['bytestring'] = mic_df['int_array'].apply(lambda x: np.array(x).tobytes())
mic_df['duration'] = (mic_data['time_stamps'] - mic_data['time_stamps'][0])/441000

mic_df['lsl_time_stamp'] = mic_data['time_stamps']
mic_df['time_from_last'] = mic_df['lsl_time_stamp'].diff()

mic_df.head()

Unnamed: 0,int_array,bytestring,duration,lsl_time_stamp,time_from_last
0,159,b'\x9f\x00\x00\x00\x00\x00\x00\x00',0.0,691080.361552,
1,196,b'\xc4\x00\x00\x00\x00\x00\x00\x00',5.141967e-11,691080.361575,2.3e-05
2,204,b'\xcc\x00\x00\x00\x00\x00\x00\x00',1.028396e-10,691080.361597,2.3e-05
3,229,b'\xe5\x00\x00\x00\x00\x00\x00\x00',1.542593e-10,691080.36162,2.3e-05
4,238,b'\xee\x00\x00\x00\x00\x00\x00\x00',2.056792e-10,691080.361643,2.3e-05


# Video Data

In [27]:
# Identify the frames we're interested in
cam_data = data[streams_collected.index('WebcamStream')]

#cam_df = pd.DataFrame(cam['time_series'], columns=['frame'])
#cam_df['lsl_timestamps'] = cam['time_stamps']
#cam_df['time'] = cam_df['lsl_timestamps'] - cam_df['lsl_timestamps'][0]
cam_data['time_series'][0]
frame_nums = [int(i[0]) for i in cam_data['time_series']]
time_pre = [float(i[1]) for i in cam_data['time_series']]
time_evnt_ms = [float(i[2]) for i in cam_data['time_series']]
time_post = [float(i[3]) for i in cam_data['time_series']]


cam_df = pd.DataFrame({'frame_num': frame_nums, 
                    'time_pre': time_pre, 
                    'cap_time_ms': time_evnt_ms,
                    'time_post': time_post,
                    'lsl_time_stamp': cam_data['time_stamps']})



cam_df['frame_time_sec'] = (cam_df.cap_time_ms - cam_df.cap_time_ms[0])/1000
cam_df['lsl_time_sec'] = cam_df.lsl_time_stamp #- cam_df.lsl_time_stamp[0]
cam_df['time'] = (cam_df.lsl_time_stamp - cam_df.lsl_time_stamp[0])/1000

# EEG Data

In [28]:
eeg_dat = data[streams_collected.index('EGI NetAmp 0')]
eeg_df = pd.DataFrame(eeg_dat['time_series'])
eeg_df['lsl_time_stamp'] = eeg_dat['time_stamps']


# Durations for Each Experiment Part

In [29]:
streams = ['et', 'ps', 'mic', 'cam', 'eeg']

# get durations of certain experiment arm
def get_durations(ExperimentPart):

    # find expected duration
    exp_start = stim_df.loc[stim_df.event == 'Onset_'+ExperimentPart, 'lsl_time_stamp'].values[0]
    exp_end = stim_df.loc[stim_df.event == 'Offset_'+ExperimentPart, 'lsl_time_stamp'].values[0]
    exp_dur = round(exp_end - exp_start, 4)

    # expected mm:ss
    exp_dt = datetime.timedelta(seconds=exp_dur)
    exp_dt_dur = str(datetime.timedelta(seconds=round(exp_dt.total_seconds())))

    # make + populate df
    df = pd.DataFrame(columns = ['stream', 'duration', 'mm:ss', 'percent'])
    for i, stream in enumerate(streams):
        # don't include mic in resting state
        if ExperimentPart == 'RestingState' and stream == 'mic':
            continue
        # grab data for stream + experiment part
        event_data = get_event_data(ExperimentPart, globals()[stream+'_df'], stim_df)

        # print if no data
        if event_data.empty:
            df.loc[i] = [stream, 0, str(datetime.timedelta(seconds=0)), '0.00%']
            print(stream + ' has no ' + ExperimentPart + ' data') 
            continue
        # calculate duration
        start = event_data['lsl_time_stamp'].values[0]
        stop = event_data['lsl_time_stamp'].values[-1]
        dur = round(stop - start, 4)

        # calculate hh:mm:ss
        dt = datetime.timedelta(seconds=dur)
        dt_dur = str(datetime.timedelta(seconds=round(dt.total_seconds())))

        # calculate percent 
        percent = '{}%'.format(round(dur/exp_dur * 100, 2))
             
        df.loc[i] = [stream, dur, dt_dur, percent]

    # print which are short
    for i in df.iterrows():
        if i[1]['duration'] == 0:
            continue
        if i[1]['duration'] < (exp_dur - 5): # 5 second margin
            print(i[1]['stream'] + ' is shorter than expected for ' + ExperimentPart + ' by ' + str(round(exp_dur - i[1]['duration'], 2)) + ' seconds')
    
    # print df
    df.loc[df.index.max() + 1] = ['expected', exp_dur, exp_dt_dur, '100.0%']
    df.sort_values(by='duration', inplace=True)
    print('\n' + ExperimentPart + ' DataFrame')
    return df
    

In [30]:
get_durations('Experiment')

et is shorter than expected for Experiment by 1092.43 seconds
mic is shorter than expected for Experiment by 353.69 seconds

Experiment DataFrame


Unnamed: 0,stream,duration,mm:ss,percent
0,et,1415.8145,0:23:36,56.45%
2,mic,2154.5529,0:35:55,85.9%
3,cam,2508.2332,0:41:48,100.0%
1,ps,2508.2447,0:41:48,100.0%
4,eeg,2508.2454,0:41:48,100.0%
5,expected,2508.2462,0:41:48,100.0%


In [31]:
get_durations('RestingState')


RestingState DataFrame


Unnamed: 0,stream,duration,mm:ss,percent
0,et,299.9935,0:05:00,100.0%
3,cam,299.9953,0:05:00,100.0%
1,ps,300.0001,0:05:00,100.0%
4,eeg,300.0004,0:05:00,100.0%
5,expected,300.0019,0:05:00,100.0%


In [32]:
get_durations('StoryListening')

et is shorter than expected for StoryListening by 492.49 seconds
mic is shorter than expected for StoryListening by 31.98 seconds

StoryListening DataFrame


Unnamed: 0,stream,duration,mm:ss,percent
0,et,1094.0953,0:18:14,68.96%
2,mic,1554.6042,0:25:55,97.98%
3,cam,1586.5331,0:26:27,100.0%
1,ps,1586.5812,0:26:27,100.0%
4,eeg,1586.5823,0:26:27,100.0%
5,expected,1586.5831,0:26:27,100.0%


In [33]:
get_durations('SocialTask')

et has no SocialTask data

SocialTask DataFrame


Unnamed: 0,stream,duration,mm:ss,percent
0,et,0.0,0:00:00,0.00%
3,cam,300.4623,0:05:00,99.99%
1,ps,300.5025,0:05:01,100.0%
4,eeg,300.5035,0:05:01,100.0%
2,mic,300.5038,0:05:01,100.0%
5,expected,300.5038,0:05:01,100.0%


In [34]:
get_durations('CampFriend')


CampFriend DataFrame


Unnamed: 0,stream,duration,mm:ss,percent
3,cam,152.7995,0:02:33,99.97%
0,et,152.8468,0:02:33,100.0%
4,eeg,152.8493,0:02:33,100.0%
1,ps,152.8498,0:02:33,100.0%
2,mic,152.8509,0:02:33,100.0%
5,expected,152.8509,0:02:33,100.0%


In [35]:
streams = ['et', 'ps', 'mic', 'cam', 'eeg']

# get duration of entire recording for each stream
def whole_durations():
    df = pd.DataFrame(columns = ['stream', 'duration', 'mm:ss'])
  
    # populate df
    for i, stream in enumerate(streams):  
        duration = globals()[stream+'_df']['lsl_time_stamp'].iloc[-1]- globals()[stream+'_df']['lsl_time_stamp'].iloc[0]
        duration = round(duration, 4)
        # convert to mm:ss
        whole_dt = datetime.timedelta(seconds=duration)
        whole_dt_dur = str(datetime.timedelta(seconds=round(whole_dt.total_seconds())))
        df.loc[i] = [stream, duration, whole_dt_dur]
    
    df.sort_values(by = 'duration', inplace = True)

    # percent
    max_dur = df.duration.max()
    # df['percent'] = '{}%'.format(round((df.duration)/max_dur * 100, 2))
    df['percent'] = round(df['duration']/max_dur*100, 2).astype(str) + '%'
    # df['percent'] = df['duration'].map(lambda x: '{:.2%}'.format(x/max_dur))

    # print which are short
    for i in df.iterrows():
        if i[1]['duration'] == 0:
            continue
        if i[1]['duration'] < (max_dur - 30): # 30 second margin
            print(i[1]['stream'] + ' is shorter than expected by ' + str(round(max_dur - i[1]['duration'], 2)) + ' seconds')
    
        
    df.sort_values(by = 'duration', inplace = True)
    return(df)


whole_durations()

mic is shorter than expected by 525.52 seconds
cam is shorter than expected by 115.81 seconds


Unnamed: 0,stream,duration,mm:ss,percent
2,mic,2190.1369,0:36:30,80.65%
3,cam,2599.8524,0:43:20,95.74%
0,et,2704.805,0:45:05,99.6%
4,eeg,2715.6456,0:45:16,100.0%
1,ps,2715.6606,0:45:16,100.0%
