## Imports  

In [1]:
import pyxdf
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns 
import wave
#import pyaudio
import numpy as np
import sounddevice as sd
import cv2
from scipy.signal import iirnotch, filtfilt
from glob import glob
from tqdm import tqdm
from datetime import date
import calendar

In [2]:
glob('../*/*.xdf')

['../sub-P5318014/sub-P5318014_ses-S001_task-CUNY_run-001_mobi.xdf',
 '../sub-5182010/sub-P5182010_ses-S001_task-CUNY_run-001_mobi.xdf',
 '../sub-P5548165/sub-P5548165_ses-S001_task-CUNY_run-001_MOBI.xdf']

## Load Data

In [3]:
subject_xdf_paths = glob('../*/*.xdf')
subject_xdf_paths.sort()
sub_ids = [x.split('/')[-1].split('_')[0] for x in subject_xdf_paths]
streams_df = pd.DataFrame({
    'Subject': sub_ids, 
    # 'Date':[0] * len(sub_ids),
    'EGI NetAmp 0':[0] * len(sub_ids),
    'OpenSignals':[0] * len(sub_ids),
    'WebcamStream':[0]  * len(sub_ids),
    'Tobii':[0] * len(sub_ids),
    'Stimuli_Markers':[0] * len(sub_ids),
    'Microphone':[0] * len(sub_ids)})

xdfs = {}
for i, subject in enumerate(sub_ids):
    data, header = pyxdf.load_xdf(subject_xdf_paths[i])
    xdfs[subject] = data
    streams_collected = [stream['info']['name'][0] for stream in data]
    
    streams_df.loc[streams_df['Subject'] == subject, 'datetime'] = header['info']['datetime'][0]
    for s in streams_collected:
        streams_df.loc[streams_df['Subject'] == subject, s] = 1

streams_df.rename(columns={
    'EGI NetAmp 0':'EEG', 
    'OpenSignals':'Physiology', 
    'WebcamStream':'Video', 
    'Tobii':'EyeTracker', 
    'Stimuli_Markers':'Stimuli',
    'Microphone':'Audio'}, inplace=True)

KeyboardInterrupt: 

In [None]:
streams_df['date'] = pd.to_datetime(streams_df['datetime']).dt.date
streams_df.sort_values(by='date', inplace=True)

In [None]:
streams_df

Unnamed: 0,Subject,Date,EEG,Physiology,Video,EyeTracker,Stimuli,Audio,datetime,date
2,sub-P5548165,0,1,1,1,1,1,1,2024-10-08 10:25:44-04:00,2024-10-08
0,sub-P5182010,0,1,1,1,1,1,1,2024-10-16 15:58:20-04:00,2024-10-16
1,sub-P5318014,0,1,1,1,1,1,1,2024-10-17 15:32:02-04:00,2024-10-17


In [None]:
streams = ['EEG', 'Physiology', 'Video', 'EyeTracker', 'Stimuli', 'Audio']
calculated_values = {}
# Get percentage of subjects that have each stream
calculated_values = {s: f'{streams_df[s].sum() / len(streams_df) * 100} %'  for s in streams}
calculated_values['N'] = streams_df.Subject.nunique()
# Get the number of subjects in the last week
calculated_values['n_lastweek'] = streams_df.loc[streams_df.date >= date.today() - pd.Timedelta(7, 'D')].Subject.nunique()
# Get the percentage of subjects that have all 6 data streams
calculated_values['all_streams'] = f'{len(streams_df.loc[streams_df[streams].sum(axis=1) == 6]) / len(streams_df) * 100} %'

In [None]:
# count = number of participants than have gone by this date
streams_df['count'] = [len(streams_df.loc[streams_df.date <= d]) for d in streams_df.date]


In [None]:
streams_df

Unnamed: 0,Subject,Date,EEG,Physiology,Video,EyeTracker,Stimuli,Audio,datetime,date,has_RestingState,has_StoryListening,has_CampFriend,has_FrogDissection,has_DanceContest,has_ZoomClass,has_Tornado,has_BirthdayParty,has_SocialTask,count
2,sub-P5548165,0,1,1,1,1,1,1,2024-10-08 10:25:44-04:00,2024-10-08,True,True,True,True,True,True,True,True,True,1
0,sub-P5182010,0,1,1,1,1,1,1,2024-10-16 15:58:20-04:00,2024-10-16,True,True,True,True,True,True,True,True,True,2
1,sub-P5318014,0,1,1,1,1,1,1,2024-10-17 15:32:02-04:00,2024-10-17,True,True,True,True,True,True,True,True,True,3


## subjects w each experimental block

In [None]:
def get_stim(xdfs, subject):
    data = xdfs[subject]
    streams_collected = [stream['info']['name'][0] for stream in data]
    stim_dat = data[streams_collected.index('Stimuli_Markers')]
    stim_df = pd.DataFrame(stim_dat['time_series'])
    stim_df.rename(columns={0: 'trigger'}, inplace=True)
    events = {
            200: 'Onset_ExperimentStart',
            10: 'Onset_RestingState',
            11: 'Offset_RestingState',
            500: 'Onset_StoryListening',
            501: 'Offset_StoryListening',
            100: 'Onset_10second_rest',
            101: 'Offset_10second_rest', 
            20: 'Onset_CampFriend',
            21: 'Offset_CampFriend',
            30: 'Onset_FrogDissection',
            31: 'Offset_FrogDissection',
            40: 'Onset_DanceContest',
            41: 'Offset_DanceContest',
            50: 'Onset_ZoomClass',
            51: 'Offset_ZoomClass',
            60: 'Onset_Tornado',
            61: 'Offset_Tornado',
            70: 'Onset_BirthdayParty',
            71: 'Offset_BirthdayParty',
            300: 'Onset_subjectInput',
            301: 'Offset_subjectInput',
            302: 'Onset_FavoriteStory',
            303: 'Offset_FavoriteStory',
            304: 'Onset_WorstStory',
            305: 'Offset_WorstStory',
            400: 'Onset_impedanceCheck',
            401: 'Offset_impedanceCheck',
            80: 'Onset_SocialTask',
            81: 'Offset_SocialTask',
            201: 'Offset_ExperimentEnd',
        }

    story_onsets = [20, 30, 40, 50, 60, 70]

    # relabel the event if the trigger is in the events dictionary, else if 
    stim_df['event'] = stim_df['trigger'].apply(lambda x: events[x] if x in events.keys() else 'Bx_input')
    # relabel the event as a psychopy timestamp if the trigger is greater than 5 digits
    stim_df.loc[stim_df.trigger.astype(str).str.len() > 5, 'event'] = 'psychopy_time_stamp'
    stim_df['lsl_time_stamp'] = stim_dat['time_stamps']
    stim_df['time'] = (stim_dat['time_stamps'] - stim_dat['time_stamps'][0])/1000
    return stim_df

In [57]:
trials = ['RestingState', 'StoryListening', 'CampFriend', 'FrogDissection', 'DanceContest', 'ZoomClass', 'Tornado', 'BirthdayParty','SocialTask']
for subject in sub_ids:
    stim = get_stim(xdfs, subject)
    for trial in trials:
        # Find out if event columns contains Onset and Offset for RestingState
        streams_df['has_'+trial] = stim.event.str.contains('Onset_'+ trial).sum() + stim.event.str.contains('Offset_'+ trial).sum() == 2

for t in trials:
    calculated_values['Completed '+t] =  f'{streams_df['has_'+t].sum() / len(streams_df) * 100} %'