In [3]:
import os, os.path as op, pandas as pd, numpy as np, re
from glob import glob
from functools import partial

In [4]:
file = "/Volumes/GoogleDrive/Mój dysk/packages/durons/logs/DEPHC_01-emreg_MIND_1_K.log" # log file

blocks =  'mindful|ogladaj' #each block starts with either a 'mindful' or a 'ogladaj' stimulus
until = 'pytanie' #each block ends when a 'pytanie' stimulus happens
codes = 'NEU|SAD' # we will be looking for durations and onsets for any stimulus that has "NEU" or "SAD" in its name

sep = '\t'
skiprows = 3

In [9]:
def stimuli_from_log(file, participant=None, sep = '\t', skiprows = 3, sort = True):
    
    '''
    Show all stimuli from the log file
    
    Parameters
    ----------
    
    file : str
        Directory to the log file
    participant : str, default None
        Participant's name - by default reads from the filepath until the first hyphen (-), for example: p55_332-task1.log -> p55_332
    sep : str, default '\t'
        The separator used to read the log file
    skiprows : int, default 3
        Number of rows to skip in the log file
    sort : bool, default True
        Sort the stimuli names
    
    Returns
    -------
    
    List
        A list of stimuli that occured in the log file
        
    
    '''
    import pandas as pd
    import os.path as op
    if participant == None:
        participant = op.split(file)[-1].split("-")[0]
    def fix_names(self):
        'fix column names'
        self = self.copy()
        self.columns = self.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '').str.replace('.','_')
        return self
    def exclude_nonsubj_data(self, participant):
        'Exclude from the DataFrame any data that does not belong to the subject'
        s = 'subject' in self.columns

        #d = self.copy()
        if s:
            try:
                self = self[self.subject == participant]
            except:
                self = self[self.subject == participant]
        else:
            try:
                self = self[self.Subject == participant]
            except:
                self = self[self.Subject == participant]
        return self
    
    data = pd.read_csv(file, sep = sep, skiprows = skiprows)
    data.reset_index(inplace=True,drop=True)
    data = fix_names(data)
    data = exclude_nonsubj_data(data, participant = participant)
    if sort:
        stimuli = sorted(list(set(data.code)))
    else:
        stimuli = list(set(data.code))
    return stimuli

##### Get all the stimuli names from log files

In [12]:
stimuli_from_log(file, participant=None, sep = '\t', skiprows = 3, sort = True)

['10',
 '111',
 '20',
 '3',
 '4',
 '5',
 '6',
 '7',
 'Instrukcja',
 'NEU1',
 'NEU2',
 'NEU3',
 'NEU4',
 'NEU5',
 'NEU6',
 'NEU7',
 'NEU8',
 'SAD1',
 'SAD10',
 'SAD11',
 'SAD12',
 'SAD2',
 'SAD3',
 'SAD4',
 'SAD5',
 'SAD6',
 'SAD7',
 'SAD8',
 'SAD9',
 'fix',
 'mindful',
 'ogladaj',
 'pytanie',
 'short_fix']

In [43]:
def get_blocks(file, codes, blocks, until, participant=None, sep = '\t', skiprows = 3, sort = True):
    '''
    Try to get all the stimuli that occur in defined blocks
    
    Parameters
    ----------
    
    file : str
        Directory to the log file
    codes : list or regex
        Names of the stimuli to be searched for within a block
    blocks : list or regex
        Names of the stimuli that mark the begining of a block
    until : list or regex
        Names of the stimuli that mark the end of a block
    participant : str, default None
        Participant's name - by default reads from the filepath until the first hyphen (-), for example: p55_332-task1.log -> p55_332
    sep : str, default '\t'
        The separator used to read the log file
    skiprows : int, default 3
        Number of rows to skip in the log file
    sort : bool, default True
        Sort the stimuli names
    
    Returns
    -------
    
    Dict
        A dictionary with stimuli as keys and block names as values
        
    
    '''
    
    
    import pandas as pd
    if participant == None:
        participant = op.split(file)[-1].split("-")[0]
    def fix_names(self):
        'fix column names'
        self = self.copy()
        self.columns = self.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '').str.replace('.','_')
        return self
    def exclude_nonsubj_data(self, participant):
        'Exclude from the DataFrame any data that does not belong to the subject'
        s = 'subject' in self.columns

        #d = self.copy()
        if s:
            try:
                self = self[self.subject == participant]
            except:
                self = self[self.subject == participant]
        else:
            try:
                self = self[self.Subject == participant]
            except:
                self = self[self.Subject == participant]
        return self
    
    def pairwise(iterable):
        from itertools import tee
        "s -> (s0,s1), (s1,s2), (s2, s3), ..."
        a, b = tee(iterable)
        next(b, None)
        return list(zip(a, b))
    
    def pairwise_idx(df, idx):
        from itertools import tee
        "s -> (s0,s1), (s1,s2), (s2, s3), ..."
        idx = list(idx)
        idx.append(data.index.max())
        a, b = tee(idx)
        next(b, None)
        return list(zip(a, b))
    
    def list2regex(l, pipe = '|'):
        if type(l) is list:
            l = pipe.join(l)
        else:
            pass
        return l
    
    data = pd.read_csv(file, sep = sep, skiprows = skiprows)

    data = fix_names(data)
    data = exclude_nonsubj_data(data, participant = participant)
    data.reset_index(inplace=True,drop=True)
    #data.code = data.code.str.lower()

    codes = list2regex(codes)
    blocks = list2regex(blocks)
    until = list2regex(until)
    
    idx = data[data.code.str.contains(blocks, na=False)].index
    pairs = pairwise_idx(data, idx)
    
    blocks_final = {}
    for pair in pairs:
        d_ = data.iloc[pair[0]:pair[1]]
        task = d_.loc[pair[0]].code
        for c in codes.split('|'):
            if d_.code.str.contains(c).any():
                for s in list(d_.code[d_.code.str.contains(c)]):
                    blocks_final[s] = f"{task}_{c}"
                    
    if sort:
        blocks_final = dict(sorted(blocks_final.items()))
    return blocks_final

##### Export all the stimuli from pre-defined blocks

In [46]:
get_blocks(file, codes, blocks, until, participant=None, sep = '\t', skiprows = 3, sort = True)

{'NEU1': 'ogladaj_NEU',
 'NEU2': 'ogladaj_NEU',
 'NEU3': 'ogladaj_NEU',
 'NEU4': 'ogladaj_NEU',
 'NEU5': 'ogladaj_NEU',
 'NEU6': 'ogladaj_NEU',
 'NEU7': 'ogladaj_NEU',
 'NEU8': 'ogladaj_NEU',
 'SAD1': 'mindful_SAD',
 'SAD10': 'ogladaj_SAD',
 'SAD11': 'mindful_SAD',
 'SAD12': 'mindful_SAD',
 'SAD2': 'mindful_SAD',
 'SAD3': 'mindful_SAD',
 'SAD4': 'mindful_SAD',
 'SAD5': 'mindful_SAD',
 'SAD6': 'mindful_SAD',
 'SAD7': 'ogladaj_SAD',
 'SAD8': 'ogladaj_SAD',
 'SAD9': 'ogladaj_SAD'}

In [52]:
def durons(file, stimuli=None, participant=None, sep = '\t', skiprows = 3, sort = True, s = True):

    '''
    Calculate durations and onsets in defined blocks
    
    Parameters
    ----------
    
    file : str
        Directory to the log file
    stimuli : dict, default is None
        A dictionary with {stimulus:block_name}. Takes all unique stimuli as separate blocks if no dict provided
    participant : str, default None
        Participant's name - by default reads from the filepath until the first hyphen (-), for example: p55_332-task1.log -> p55_332
    sep : str, default '\t'
        The separator used to read the log file
    skiprows : int, default 3
        Number of rows to skip in the log file
    sort : bool, default True
        Sort the stimuli names
    s : bool, default True
        Calculate the times in seconds (divide the times by 10000)
    
    Returns
    -------
    
    Dict of Pandas DataFrames
        A dictionary with conditions as keys and Pandas DataFrames as values. Each DataFrame contains durations and onsets of the stimuli.
        
    
    '''
    
    
    import pandas as pd
    
    if participant == None:
        participant = op.split(file)[-1].split("-")[0]

    
    def fix_names(self):
        'fix column names'
        self = self.copy()
        self.columns = self.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '').str.replace('.','_')
        return self
    def exclude_nonsubj_data(self, participant):
        'Exclude from the DataFrame any data that does not belong to the subject'
        s = 'subject' in self.columns
        if s:
            try:
                self = self[self.subject == participant]
            except:
                self = self[self.subject == participant]
        else:
            try:
                self = self[self.Subject == participant]
            except:
                self = self[self.Subject == participant]
        return self
    
    def calculate_times(self, s = True):
        d = self
        d.time = d.time.apply(lambda x: float(x))
        d.duration = d.duration.apply(lambda x: float(x))
        if s:
            d.time = (d.time - d.time[d.event_type == 'Pulse'].iloc[0])/10000
            d.duration = d.duration/10000
        else:
            d.time = (d.time - d.time[d.event_type == 'Pulse'].iloc[0])
        d = d[d.time >= 0]
        return d
    
    data = pd.read_csv(file, sep = sep, skiprows = skiprows)
    data = fix_names(data)
    data = exclude_nonsubj_data(data, participant = participant)
    data = calculate_times(data, s = True)
    data = data[['code','time','duration']].sort_values(by=['code', 'time']).reset_index(drop=True)
    data.columns = ['names','onsets','durations']
    if stimuli != None:
        data.names = data.names.map(stimuli)
    data = data.dropna()
    data_temp = data.copy()
    data = {}
    for name in list(set(data_temp.names)):
        data[name] = data_temp[data_temp.names == name][['onsets','durations']].sort_values(by='onsets').reset_index(drop=True)
    if sort:
        data = dict(sorted(data.items()))
    return data

##### calculate durations and onsets

In [53]:
dur1 = durons(file, stimuli=None, participant=None, sep = '\t', skiprows = 3, sort = True, s = True)

In [54]:
dur1

{'NEU1':    onsets  durations
 0  3.0625    10.0167, 'NEU2':     onsets  durations
 0  13.0959    10.0167, 'NEU3':      onsets  durations
 0  219.0804    10.0167, 'NEU4':      onsets  durations
 0  229.1138    10.0167, 'NEU5':      onsets  durations
 0  348.6811    10.0167, 'NEU6':      onsets  durations
 0  358.7145    10.0167, 'NEU7':      onsets  durations
 0  132.6632    10.0167, 'NEU8':      onsets  durations
 0  142.6966    10.0167, 'SAD1':     onsets  durations
 0  46.2627    10.0167, 'SAD10':      onsets  durations
 0  315.5142    10.0167, 'SAD11':      onsets  durations
 0  262.2806    10.0167, 'SAD12':     onsets  durations
 0  272.314    10.0167, 'SAD2':     onsets  durations
 0  56.2961    10.0167, 'SAD3':      onsets  durations
 0  175.8635    10.0167, 'SAD4':      onsets  durations
 0  185.8969    10.0167, 'SAD5':      onsets  durations
 0  391.8813    10.0167, 'SAD6':      onsets  durations
 0  401.9147    10.0167, 'SAD7':    onsets  durations
 0  89.463    10.0167, 'SAD

In [55]:
b = get_blocks(file, codes, blocks, until, participant=None, sep = '\t', skiprows = 3, sort = True)

In [56]:
dur2 = durons(file, stimuli=b, participant=None, sep = '\t', skiprows = 3, sort = True, s = True)

In [57]:
dur2

{'mindful_SAD':      onsets  durations
 0   46.2627    10.0167
 1   56.2961    10.0167
 2  175.8635    10.0167
 3  185.8969    10.0167
 4  262.2806    10.0167
 5  272.3140    10.0167
 6  391.8813    10.0167
 7  401.9147    10.0167, 'ogladaj_NEU':      onsets  durations
 0    3.0625    10.0167
 1   13.0959    10.0167
 2  132.6632    10.0167
 3  142.6966    10.0167
 4  219.0804    10.0167
 5  229.1138    10.0167
 6  348.6811    10.0167
 7  358.7145    10.0167, 'ogladaj_SAD':      onsets  durations
 0   89.4630    10.0167
 1   99.4964    10.0167
 2  305.4809    10.0167
 3  315.5142    10.0167}

In [58]:
dur2['ogladaj_NEU']

Unnamed: 0,onsets,durations
0,3.0625,10.0167
1,13.0959,10.0167
2,132.6632,10.0167
3,142.6966,10.0167
4,219.0804,10.0167
5,229.1138,10.0167
6,348.6811,10.0167
7,358.7145,10.0167


In [102]:
def durons_savemat(durons_dict, filename, sort = True, output = False):
    '''
    Export durations and onsets to a .mat file
    
    Parameters
    ----------
    
    durons_dict : dict
        Dictionary of Pandas DataFrames with durations and onsets
    filename : str
        Filename or path where the mat file will be saved
    sort : bool or list, default True
        Sort the file alphabetically or with a pre-defined order
    output : bool, default False
        Show the final dictionary of arrays to be saved to the .mat file
    '''
    from scipy.io import savemat
    mat = {}
    
    if sort:
        names = sorted(durons_dict.keys())
    if not sort:
        names = durons_dict.keys()
    elif type(sort) is list:
        names = sort
    
    mat["names"] = np.array([t for t in names],dtype="object")
    mat["durations"] = np.array([np.array([t for t in durons_dict[condition]['durations']]) for condition in mat['names']])
    mat["onsets"] = np.array([np.array([t for t in durons_dict[condition]['onsets']]) for condition in mat['names']])
    savemat(file_name = filename, mdict = mat)
    if output:
        return mat

In [103]:
file = "/Volumes/GoogleDrive/Mój dysk/packages/durons/logs/DEPHC_01-emreg_MIND_1_K.log" # log file

In [104]:
mat_filename = file.split('.')[0] + '.mat'

##### export the duration and onset values into MATLAB and SPM-ready .mat file

In [105]:
durons_savemat(durons_dict=dur2, filename=mat_filename, sort = True, output=True)

{'names': array(['mindful_SAD', 'ogladaj_NEU', 'ogladaj_SAD'], dtype=object),
 'durations': array([array([10.0167, 10.0167, 10.0167, 10.0167, 10.0167, 10.0167, 10.0167,
        10.0167]),
        array([10.0167, 10.0167, 10.0167, 10.0167, 10.0167, 10.0167, 10.0167,
        10.0167]),
        array([10.0167, 10.0167, 10.0167, 10.0167])], dtype=object),
 'onsets': array([array([ 46.2627,  56.2961, 175.8635, 185.8969, 262.2806, 272.314 ,
        391.8813, 401.9147]),
        array([  3.0625,  13.0959, 132.6632, 142.6966, 219.0804, 229.1138,
        348.6811, 358.7145]),
        array([ 89.463 ,  99.4964, 305.4809, 315.5142])], dtype=object)}

In [88]:
durons_savemat(durons_dict=dur2, filename=mat_filename, sort = False, output=True)

{'names': array(['mindful_SAD', 'ogladaj_NEU', 'ogladaj_SAD'], dtype=object),
 'durations': array([array([10.0167, 10.0167, 10.0167, 10.0167, 10.0167, 10.0167, 10.0167,
        10.0167]),
        array([10.0167, 10.0167, 10.0167, 10.0167, 10.0167, 10.0167, 10.0167,
        10.0167]),
        array([10.0167, 10.0167, 10.0167, 10.0167])], dtype=object),
 'onsets': array([array([ 46.2627,  56.2961, 175.8635, 185.8969, 262.2806, 272.314 ,
        391.8813, 401.9147]),
        array([  3.0625,  13.0959, 132.6632, 142.6966, 219.0804, 229.1138,
        348.6811, 358.7145]),
        array([ 89.463 ,  99.4964, 305.4809, 315.5142])], dtype=object)}

In [89]:
durons_savemat(durons_dict=dur2, filename=mat_filename, sort = ['ogladaj_NEU', 'ogladaj_SAD', 'mindful_SAD'], output=True)

{'names': array(['ogladaj_NEU', 'ogladaj_SAD', 'mindful_SAD'], dtype=object),
 'durations': array([array([10.0167, 10.0167, 10.0167, 10.0167, 10.0167, 10.0167, 10.0167,
        10.0167]),
        array([10.0167, 10.0167, 10.0167, 10.0167]),
        array([10.0167, 10.0167, 10.0167, 10.0167, 10.0167, 10.0167, 10.0167,
        10.0167])], dtype=object),
 'onsets': array([array([  3.0625,  13.0959, 132.6632, 142.6966, 219.0804, 229.1138,
        348.6811, 358.7145]),
        array([ 89.463 ,  99.4964, 305.4809, 315.5142]),
        array([ 46.2627,  56.2961, 175.8635, 185.8969, 262.2806, 272.314 ,
        391.8813, 401.9147])], dtype=object)}