# Imports

In [1]:
import pyxdf
import pandas as pd
import numpy as np
import sounddevice as sd
from glob import glob
from tqdm import tqdm
import librosa
import datetime


# Load Data

In [2]:
sub_files = glob('../*/*.xdf')
sub_files

['../sub-P5318014/sub-P5318014_ses-S001_task-CUNY_run-001_mobi.xdf',
 '../sub-5182010/sub-P5182010_ses-S001_task-CUNY_run-001_mobi.xdf',
 '../sub-P5447527/sub-P5447527_ses-S001_task-CUNY_run-001_mobi.xdf',
 '../sub-P5548165/sub-P5548165_ses-S001_task-CUNY_run-001_MOBI.xdf']

In [3]:
xdf_path = sub_files[1]
data, header = pyxdf.load_xdf(xdf_path)
streams_collected = [stream['info']['name'][0] for stream in data]


'''
HELPERS
'''
def get_event_data(event, df, stim_df):
    return df.loc[(df.lsl_time_stamp >= stim_df.loc[stim_df.event == 'Onset_'+event, 'lsl_time_stamp'].values[0]) & 
                  (df.lsl_time_stamp <= stim_df.loc[stim_df.event == 'Offset_'+event, 'lsl_time_stamp'].values[0])]

def get_secs_between_triggers(trigger1, trigger2, stim_df):
    return stim_df.loc[stim_df.trigger == trigger1, 'time'].values[0] - stim_df.loc[stim_df.trigger == trigger2, 'time'].values[0]

                  
streams_collected

['WebcamStream',
 'Stimuli_Markers',
 'Tobii',
 'OpenSignals',
 'EGI NetAmp 0',
 'Microphone']

# Stimulus

In [4]:
stim_dat = data[streams_collected.index('Stimuli_Markers')]
stim_df = pd.DataFrame(stim_dat['time_series'])
stim_df.rename(columns={0: 'trigger'}, inplace=True)
events = {
    200: 'Onset_Experiment',
    10: 'Onset_RestingState',
    11: 'Offset_RestingState',
    500: 'Onset_StoryListening',
    501: 'Offset_StoryListening',
    100: 'Onset_10second_rest',
    101: 'Offset_10second_rest', 
    20: 'Onset_CampFriend',
    21: 'Offset_CampFriend',
    30: 'Onset_FrogDissection',
    31: 'Offset_FrogDissection',
    40: 'Onset_DanceContest',
    41: 'Offset_DanceContest',
    50: 'Onset_ZoomClass',
    51: 'Offset_ZoomClass',
    60: 'Onset_Tornado',
    61: 'Offset_Tornado',
    70: 'Onset_BirthdayParty',
    71: 'Offset_BirthdayParty',
    300: 'Onset_subjectInput',
    301: 'Offset_subjectInput',
    302: 'Onset_FavoriteStory',
    303: 'Offset_FavoriteStory',
    304: 'Onset_WorstStory',
    305: 'Offset_WorstStory',
    400: 'Onset_impedanceCheck',
    401: 'Offset_impedanceCheck',
    80: 'Onset_SocialTask',
    81: 'Offset_SocialTask',
    201: 'Offset_Experiment',
}

story_onsets = [20, 30, 40, 50, 60, 70]

# relabel the event if the trigger is in the events dictionary, else if 
stim_df['event'] = stim_df['trigger'].apply(lambda x: events[x] if x in events.keys() else 'Bx_input')

# relabel the event as a psychopy timestamp if the trigger is greater than 5 digits
stim_df.loc[stim_df.trigger.astype(str).str.len() > 5, 'event'] = 'psychopy_time_stamp'
stim_df['lsl_time_stamp'] = stim_dat['time_stamps']
stim_df['time'] = (stim_dat['time_stamps'] - stim_dat['time_stamps'][0])/1000
stim_df

Unnamed: 0,trigger,event,lsl_time_stamp,time
0,200,Onset_Experiment,652877.853621,0.000000e+00
1,1729108775,psychopy_time_stamp,652877.853632,1.079973e-08
2,10,Onset_RestingState,652903.462754,2.560913e-02
3,1729108800,psychopy_time_stamp,652903.462766,2.560914e-02
4,11,Offset_RestingState,653203.463529,3.256099e-01
...,...,...,...,...
256,1729111346,psychopy_time_stamp,655451.996951,2.574143e+00
257,201,Offset_Experiment,655456.997079,2.579143e+00
258,1729111351,psychopy_time_stamp,655456.997089,2.579143e+00
259,4,Bx_input,655456.997128,2.579144e+00


# Microphone Data 

In [5]:
mic_data = data[streams_collected.index('Microphone')]
mic_df = pd.DataFrame(mic_data['time_series'], columns=['int_array'])
mic_df['bytestring'] = mic_df['int_array'].apply(lambda x: np.array(x).tobytes())
mic_df['time_stamps'] = (mic_data['time_stamps'] - mic_data['time_stamps'][0])/441000

mic_df['lsl_time_stamp'] = mic_data['time_stamps']
mic_df['time'] = (mic_df.lsl_time_stamp - mic_df.lsl_time_stamp[0])
mic_df['time_delta'] = mic_df['lsl_time_stamp'].diff()

mic_df.tail()

Unnamed: 0,int_array,bytestring,time_stamps,lsl_time_stamp,time,time_delta
100124636,11,b'\x0b\x00\x00\x00\x00\x00\x00\x00',0.005148,655520.270591,2270.445501,2.3e-05
100124637,11,b'\x0b\x00\x00\x00\x00\x00\x00\x00',0.005148,655520.270613,2270.445524,2.3e-05
100124638,12,b'\x0c\x00\x00\x00\x00\x00\x00\x00',0.005148,655520.270636,2270.445546,2.3e-05
100124639,7,b'\x07\x00\x00\x00\x00\x00\x00\x00',0.005148,655520.270659,2270.445569,2.3e-05
100124640,8,b'\x08\x00\x00\x00\x00\x00\x00\x00',0.005148,655520.270681,2270.445592,2.3e-05


## Compare wav and LSL lengths

In [6]:
# get wav file
wav_files = glob('../*/*.wav')
wav_files

wav_path = wav_files[0]
wav_path

'../sub-P5318014/sub-P5318014_task-CUNY_run-001_mic.wav'

In [7]:
# calculate wav and lsl durations
wav_dur = round(librosa.get_duration(path=wav_path), 2)
lsl_dur = round(mic_df['lsl_time_stamp'].iloc[-1]- mic_df['lsl_time_stamp'].iloc[0], 2)

# convert to hh:mm:ss
wav_dt = datetime.timedelta(seconds=wav_dur)
wav_dur_dt = str(datetime.timedelta(seconds=round(wav_dt.total_seconds())))
lsl_dt = datetime.timedelta(seconds=lsl_dur)
lsl_dur_dt = str(datetime.timedelta(seconds=round(lsl_dt.total_seconds())))


# make df of durations
variables = {name: [globals()[name], globals()[name+'_dt']]  for name in ['wav_dur', 'lsl_dur']}
durations_df = pd.DataFrame.from_dict(variables, orient = 'index', columns = ['seconds', 'mm:ss'])
print(durations_df.to_string() + '\n')

# print if they are close or not (margin on 5 seconds rn)
if abs(wav_dur - lsl_dur) > 5:
    print(f'Warning: The duration of the wav file ({wav_dur}) and the duration of the lsl time stamps ({lsl_dur}) differ by more than 5 seconds. \n')
    if wav_dur > lsl_dur:
        print('The wav file is longer than the lsl time stamps by {} seconds'.format(wav_dur - lsl_dur))
    if lsl_dur > wav_dur:
        print('The lsl time stamps are longer than the wav file by {} seconds'.format(lsl_dur - wav_dur))
else: 
    print('The wav file and the lsl time stamps are the same length!')

         seconds    mm:ss
wav_dur  1989.40  0:33:09
lsl_dur  2270.45  0:37:50


The lsl time stamps are longer than the wav file by 281.0499999999997 seconds


## Slice parts we care about

In [8]:
slice_mic1 = mic_df.loc[(mic_df.lsl_time_stamp >= stim_df.loc[stim_df.event == 'Onset_StoryListening', 'lsl_time_stamp'].values[0]) & 
                  (mic_df.lsl_time_stamp <= stim_df.loc[stim_df.event == 'Onset_impedanceCheck', 'lsl_time_stamp'].values[0])]
slice_mic2 = mic_df.loc[(mic_df.lsl_time_stamp >= stim_df.loc[stim_df.event == 'Offset_impedanceCheck', 'lsl_time_stamp'].values[0]) & 
                  (mic_df.lsl_time_stamp <= stim_df.loc[stim_df.event == 'Offset_SocialTask', 'lsl_time_stamp'].values[0])]

slice_exp1 = stim_df.loc[(stim_df.lsl_time_stamp >= stim_df.loc[stim_df.event == 'Onset_BirthdayParty', 'lsl_time_stamp'].values[0]) & 
                  (stim_df.lsl_time_stamp <= stim_df.loc[stim_df.event == 'Onset_impedanceCheck', 'lsl_time_stamp'].values[0])]
slice_exp2 = stim_df.loc[(stim_df.lsl_time_stamp >= stim_df.loc[stim_df.event == 'Offset_impedanceCheck', 'lsl_time_stamp'].values[0]) & 
                  (stim_df.lsl_time_stamp <= stim_df.loc[stim_df.event == 'Offset_SocialTask', 'lsl_time_stamp'].values[0])]

## Compare experiment and mic durations with sliced data

In [9]:
slice_mic = mic_df.loc[(mic_df.lsl_time_stamp >= stim_df.loc[stim_df.event == 'Onset_BirthdayParty', 'lsl_time_stamp'].values[0]) & 
                  (mic_df.lsl_time_stamp <= stim_df.loc[stim_df.event == 'Offset_Experiment', 'lsl_time_stamp'].values[0])]

slice_exp = stim_df.loc[(stim_df.lsl_time_stamp >= stim_df.loc[stim_df.event == 'Onset_BirthdayParty', 'lsl_time_stamp'].values[0]) & 
                  (stim_df.lsl_time_stamp <= stim_df.loc[stim_df.event == 'Offset_Experiment', 'lsl_time_stamp'].values[0])]

In [10]:
mic_start = slice_mic['lsl_time_stamp'].values[0]
mic_stop = slice_mic['lsl_time_stamp'].values[-1]
mic_dur0 = round(mic_stop - mic_start, 4)
mic_dur0

2166.5484

In [11]:
exp_start = slice_exp['lsl_time_stamp'].values[0]
exp_stop = slice_exp['lsl_time_stamp'].values[-1]
exp_dur0 = round(exp_stop - exp_start, 4)
exp_dur0

2166.5484

In [12]:
mic_start1 = slice_mic1['lsl_time_stamp'].values[0]
mic_stop1 = slice_mic1['lsl_time_stamp'].values[-1]
mic_dur1 = round(mic_stop1 - mic_start1, 4)

mic_start2 = slice_mic2['lsl_time_stamp'].values[0]
mic_stop2 = slice_mic2['lsl_time_stamp'].values[-1]
mic_dur2 = round(mic_stop2 - mic_start2, 4)
mic_dur = mic_dur1 + mic_dur2
mic_dur

1648.4554

In [13]:
mic_dur1

1285.8061

In [14]:
mic_dur2

362.6493

In [15]:
exp_start1 = slice_exp1['lsl_time_stamp'].values[0]
exp_stop1 = slice_exp1['lsl_time_stamp'].values[-1]
exp_dur1 = round(exp_stop1 - exp_start1, 4)

exp_start2 = slice_exp2['lsl_time_stamp'].values[0]
exp_stop2 = slice_exp2['lsl_time_stamp'].values[-1]
exp_dur2 = round(exp_stop2 - exp_start2, 4)
exp_dur = exp_dur1 + exp_dur2
exp_dur

1607.832

In [16]:
exp_dur1

1245.1826

In [17]:
exp_dur2

362.6494

In [18]:
exp_start = slice_exp['lsl_time_stamp'].values[0]
exp_stop = slice_exp['lsl_time_stamp'].values[-1]
exp_dur = round(exp_stop - exp_start, 4)
exp_dur

2166.5484

In [19]:
np.quantile(mic_df['int_array'], 0.25)

-36.0

In [20]:
np.quantile(mic_df['int_array'], 0.75)

35.0