### Create onsets
This notebook uses the behavioral logs to create the onsets for first level (run level) analysis. 

In [84]:
import numpy as np
import pandas as pd
import os
import csv
import warnings
from scipy.io import loadmat
import scipy.stats as stats
import glob
import math

def round_down(n, decimals=0):
    multiplier = 10 ** decimals
    return math.floor(n * multiplier) / multiplier

#set pandas option:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)

In [85]:
# define some folders and parametrs:
server = '/jukebox'
project_dir = server + '/niv/oded/FHSS_L'
data_dir =  project_dir + '/data'
analysis_dir = data_dir + '/analysis'
behav_dir = project_dir + '/behavior'
#this is all subjects - though not all are included in the analysis, because of motion.
subjects = [103,104,105,106,107,108,109,110,111,112,
           114,115,119,120,122,123,124,125,126,127,
           128,129,130,133,134,135,136,138,139,140,141,142,143]
sessions = [1,2]

### Auxilary functions for creating regressors:
These are used by all typesd of onsets, so always run.

In [86]:
def add_error_onsets(curr_df,curr_er,prev_time,duration,min_time):
    if (curr_df.loc[curr_er,'btrial'] == 1):
        #this is the first trial, will be marked 'SSSS' take 8.7 s as the prev_time
        print('first trial is an error')
        prev_time.append(8.7)
        dur=(curr_df.loc[curr_er,'tstimon'] - .5 - prev_time[-1])
        if (dur < min_time): #minimal duration of a trial
            raise Exception("something is wrong, first trial, error duration is too short")
        else:
            duration.append(dur)
            
    elif ((curr_df.loc[curr_er,'cond'] == 'DCSA') | (curr_df.loc[curr_er,'cond']=='DCDA')):
        #category switch, model the time between this error and the previous trial as error:
        #the onset is the timing of the previous trial, plus the miminal trial timing:
        print('this error is a category switch')
        prev_time.append(curr_df.loc[curr_er-1,'tstimon'] + min_time)
        #the duration is until -.5 from the current trial
        #a check to see that the exception error is working - checked!:
        #curr_df.loc[curr_er,'tstimon']=362
        dur=(curr_df.loc[curr_er,'tstimon'] - .5 - prev_time[-1])
        if (dur < min_time): #minimal duration of a trial
            raise Exception("something is wrong, DC error duration is too short")
        else:
            duration.append(dur)

    else: #this is not a category switch, we need to model the time between n-1 and n-2
        #the onset is the timing of the n-2 trial, plus the miminal trial timing:
        print('this error is NOT a category switch')
        if ((curr_df.loc[curr_er,'btrial'] == 2) & (curr_df.loc[curr_er-1,'tstimon'] < 10)):
            print('this error is the second trial, first trial tstimon < 10, do not mark errors')
        else:
            if (curr_df.loc[curr_er,'btrial'] == 2):
                print('this error is the second trial')
                prev_time.append(8.7)
            else:
                prev_time.append(curr_df.loc[curr_er-2,'tstimon'] + min_time)

            #the duration is until -.5 from the n-1 trial
            dur=(curr_df.loc[curr_er-1,'tstimon'] - .5 - prev_time[-1])
            print('check that dur is larger than min time *2: ' + format(dur, '.2f'))
            #print(curr_df.loc[curr_er-1,'tstimon'])
            if (dur < (min_time*2 -.5)): #minimal duration of two trials - that's how much we miss, deduct the last fixation from that timing
                raise Exception("something is wrong, error duration is too short")
            else:
                duration.append(dur)

    return prev_time,duration;


def check_consec_ers(curr_df,er_t): #curr_df,er_t,ier
    er_tc=np.array(er_t).astype("float")
    #go through er_tc and remove the first error from two consecuitive trials, or the second, if answers the condition below:
    #go through er_t, so that er_tc can be changed independently - no problems with marking nan in first/second
    for ier in list(range(len(er_t))):
        if ier < (len(er_t)-1): #check until the last before one (implemented like this and not in the loop bc of the part I commented out below)
            if er_t[ier+1] - er_t[ier]  == 1: #this is the first of two consecuitive
                print('first of two consecuitive trials is: ' + curr_df.loc[er_t[ier],'cond'])
                print('second of two consecuitive trials is: ' + curr_df.loc[er_t[ier+1],'cond'])
                #if the second one is DCSA/DCDA, or the first is DCSA:
                if ((curr_df.loc[er_t[ier],'cond'] == 'DCSA') | 
                    (curr_df.loc[er_t[ier+1],'cond'] == 'DCSA') | (curr_df.loc[er_t[ier+1],'cond']=='DCDA')):
                    print('first/second of two consecuitive trials is a category switch')
                    print("Need to mark the trial before the first error as error (SO DO NOT exclude from er_tc)")
                elif ((curr_df.loc[er_t[ier],'cond'] == 'DCDA') & (curr_df.loc[er_t[ier+1],'cond'] == 'SCDA') & (curr_df.loc[er_t[ier],'congruency'] == 'incong')):
                    print('first of two consecuitive trials is DCDA and incong, second is SCDA')
                    print("Need to mark the trial before the first error as error (SO DO NOT exclude from er_tc)")
                elif ((curr_df.loc[er_t[ier],'cond'] == 'SCSA') & (curr_df.loc[er_t[ier+1],'cond'] == 'SCSA')):
                    print('curr trial: ' + str(curr_df.loc[er_t[ier],'btrial']) + ' error: ' + str(curr_df.loc[er_t[ier],'error']) + ' congruency: ' + str(curr_df.loc[er_t[ier],'congruency']))
                    print('next trial: ' + str(curr_df.loc[er_t[ier+1],'btrial']) + ' error: ' + str(curr_df.loc[er_t[ier+1],'error']) + ' congruency: ' + str(curr_df.loc[er_t[ier +1],'congruency']))
                    if (curr_df.loc[er_t[ier],'congruency'] == 'cong'):
                        print('cong first of two consecuitive trials that are SCSA')
                        print("DO NOT mark the trial before the first error as error (exclude from er_tc)")
                        er_tc[ier] = 'nan'
                    else: #an incong trial:
                        if (curr_df.loc[er_t[ier],'error'] == 2) & (curr_df.loc[er_t[ier+1],'error'] == 1):
                            print('first of two consecuitive trials that are SCSA. errors are 2 then 1')
                            print("Need to mark the trial before the first error as error (SO DO NOT exclude from er_tc)")
                        elif (curr_df.loc[er_t[ier]-1,'cond'] == 'SCSA'):
                            #not 2,1 - depends on the previous trial - if SCSA - no need to exclude
                            print('first of two consecuitive trials that are SCSA. prev is SCSA') #errors are 2 then 1
                            print("DO NOT mark the trial before the first error as error (exclude from er_tc)")
                            er_tc[ier] = 'nan'
                        elif ((curr_df.loc[er_t[ier]-1,'cond'] == 'DCSA') | (curr_df.loc[er_t[ier]-1,'cond']=='DCDA')):
                            print('first of two consecuitive trials that are SCSA. prev is DCSA/DCDA')
                            print("Need to mark the trial before the first error as error (SO DO NOT exclude from er_tc)") 
                        else:
                              print("might be a scenario I didn't check, check code")

                else: #if none of the above, generally do not mark:
                    print("DO NOT mark the trial before the first error as error (exclude from er_tc)")
                    er_tc[ier] = 'nan'

        #if the second of two consecutive trials,which fullfile these conditions, exclude so that we do not change the error from 2 to 1.         
        if ((ier > 0) & ((er_t[ier] - er_t[ier-1])  == 1)): #this is the second of two consecuitive
            if ((curr_df.loc[er_t[ier-1],'cond'] == 'SCSA') & (curr_df.loc[er_t[ier],'cond'] == 'SCSA')):
                print('second of two error trials where the first and second are SCSA')
                print("need to exclude the second trial from er_tc, so that it'll not change the 2 to 1")
                er_tc[ier] = 'nan'

    return er_tc

  
def add_all_errors_onsets(curr_df,er_t,min_time,subj,r):    
    prev_time=[]
    duration=[]
    
    for ier in range(len(er_t)):
        curr_er = er_t[ier]
        print('working on error trial: ' + str(curr_df.loc[curr_er,'btrial']))
        if ier == (len(er_t)-1): #this is the last error
            #this is the last error, cannot be the first of consecuitive, just add the onsets:
            add_error_onsets(curr_df,curr_er,prev_time,duration,min_time)
        else: #this is not the last error, check if the first of consecuitive: 
            if er_t[ier+1] - curr_er  == 1: #if so: 
                print('the first of two consecuitive errors, time difference is:')
                print(curr_df.loc[er_t[ier+1],'tstimon'] - curr_df.loc[curr_er,'tstimon'])
                print('curr cond is: ' + curr_df.loc[er_t[ier],'cond'] + ' second is: ' + curr_df.loc[er_t[ier+1],'cond'])

                if ((ier > 0) & (curr_er - er_t[ier-1] == 1)): #this might be a middle one, check for middle of three consec errors - treat differently:
                    print('Middle trial in a SEQUENCE OF THREE ERROR, CHECK CODE. marks changes')
                    #if category switch - might be different, raise excpetion:
                    #I now checked it (subj 110) - need to mark previous changes anyway, left her for reference
                    #if ((curr_df.loc[curr_er,'cond'] == 'DCSA') | (curr_df.loc[curr_er,'cond']=='DCDA')):
                    #   raise Exception("Middle trial in a sequence of three errors is a category switch, check code")
                    if ((subj == 108) & (r == 1) & (curr_df.loc[curr_er,'btrial'] != 2)): #if it's the seconf one, need to mark the time before the first trial
                        print('sub 108, begining, no need to mark changes')
                    elif ((subj == 134) & (r == 2) & (curr_df.loc[er_t[ier],'cond'] == 'SCSA')): 
                        print('sub 134, 4 trials, no need to mark changes')
                    else:
                        #middle of three errors, need to mark previous changes:
                        add_error_onsets(curr_df,curr_er,prev_time,duration,min_time)

                else: #ier == 0, so this cannot be the first of three errors, or ier >0, but only the first of two:
                    if ((curr_df.loc[er_t[ier+1],'cond'] == 'DCSA') | (curr_df.loc[er_t[ier+1],'cond']=='DCDA')):
                        print('the first of two consecuitive errors (not middle of three), second is a category switch, mark previous errors')
                        add_error_onsets(curr_df,curr_er,prev_time,duration,min_time)
                        #if  (curr_df.loc[er_t[ier+1],'cond']=='DCDA'):
                        #    print('next error is: ' + curr_df.loc[er_t[ier+1],'cond'] + ' check that indeed needs to mark errors')
                    elif ((curr_df.loc[er_t[ier],'cond'] == 'SCSA') & (curr_df.loc[er_t[ier+1],'cond'] == 'SCSA')):
                        if (curr_df.loc[er_t[ier],'congruency'] == 'cong'):
                            print('cong first of two consecuitive trials that are SCSA')
                            print('NOT marking additional errors')
                        else: #an incong trial:
                            if (curr_df.loc[er_t[ier],'error'] == 2) & (curr_df.loc[er_t[ier]+1,'error'] == 1):
                                print('first of two consecuitive trials that are SCSA. errors are 2 then 1. mark previous errors')
                                add_error_onsets(curr_df,curr_er,prev_time,duration,min_time)
                            elif (curr_df.loc[er_t[ier]-1,'cond'] == 'SCSA'):
                                #not 2,1 - depends on the previous trial - if SCSA - no need to exclude
                                print('first of two consecuitive trials that are SCSA. prev is SCSA') #errors are 2 then 1
                                print('NOT marking additional errors')
                            elif ((curr_df.loc[er_t[ier]-1,'cond'] == 'DCSA') | (curr_df.loc[er_t[ier]-1,'cond']=='DCDA')):
                                print('first of two consecuitive trials that are SCSA. prev is DC. mark previous errors')
                                add_error_onsets(curr_df,curr_er,prev_time,duration,min_time)
                            else:
                                  print("might be a scenario I didn't check, check code")

                    else: #not a special case of two consecuitive errors
                        print('first trial of two consecuitive errors, not a special case')
                        print('NOT marking additional errors')

            else: #if not the first of two consecuitive trials (i.e., the second, or a single one), add onsets:
                add_error_onsets(curr_df,curr_er,prev_time,duration,min_time)
    
    return prev_time,duration

#used only in the single trials models:
def create_onsets_for_single_trial_models(df_no_errs,all_states,model,sub_onsets_dir,r):
    #loop through each row:
    for index, row in df_no_errs.iterrows():
        #get trial num and state, save onsets for that trial, and then go through all states
        #if same to the curent trial - exclude current trial. If not,save.
        curr_trial=row['btrial']
        #the onset file for the current trial:
        onsets = pd.DataFrame(row).T[['tstimon','rt']]
        onsets['modulation'] = 1
        onsetsfile = '{o_dir}/{m}_run{r}_trial{t}_model_trial.txt'.format(o_dir=sub_onsets_dir, m=model,r=r,t=curr_trial)
        onsets.to_csv(onsetsfile,sep='\t',index=False, header = False)
        # for each state, create onsets file, exclude the current trial from the relevant onsets:
        for st in all_states:
            #grab all trials of a state:
            curr_st = df_no_errs[df_no_errs['state'] == st]
            #remove the current trial, if there:
            curr_st=curr_st[curr_st['btrial'] != curr_trial]
            #define the filename:
            onsetsfile = '{o_dir}/{m}_run{r}_trial{t}_model_other_trials_{st}.txt'.format(o_dir=sub_onsets_dir, m=model,r=r,t=curr_trial,st=st)
            #if empty, create a dummy file:
            if curr_st.empty:
                print('no other onsets state' + st) 
                with open(onsetsfile, 'w') as f:
                    f.write('0\t0\t0')
            else:
                #safe the file:
                onsets = curr_st[['tstimon','rt']]
                onsets['modulation'] = 1
                onsets.to_csv(onsetsfile,sep='\t',index=False, header = False)
            
            #print(onsetsfile)
            #print(onsets)

### This function creates all regressors per run-all 16 states model

Category switch trials (DCDA,DCSA): I only miss the error trial, and the trial marked as an error is it's repeat. mark the n-1 from the error as error as well (that's the trial preceded the error, decided to exclude, see gitHub), and model the time between n-1 and n (error) as error.

Other errors: got back two trials. 1) mark the trial before the one that appears as error as an error (that's a repeat). Then also model the gap between the last correct (n-2) and (n-1) as an error (two trials there should be missing).

Two consecutive errors:

This is where things got really funky - look at the file preprocessing/figuring_out_errors.xlsx to see exactly why all the decisions here in this code were made.


In [14]:
#Dec 23 2021: Note that I ran as is here to produce the 16 states regressors.
#then, in Dec 2021 I worked on the single-trials models, and I moved some chunks of code to
#the auxilary functions (see above). But these are identical, if you're wondering why there are auxilary functions
#that appear above and are identical to what's here below, it's because I first had this chunk of code, 
#and then created the aux functions.
#creates onsets per run:
def create_all16states_onsets(curr_df,sub_onsets_dir,r,chk_md,subj):
    #define the name of the model:
    model = 'all_16states'
    ### clean up ###
    #RTs need to be in seconds for the duration, and lets align all to have 3 digits:
    curr_df['rt'] = (curr_df['rt']/1000).map('{:,.3f}'.format)
    #the timing of all should be aligned, so let's make it all up to 3 digits after the dot:
    curr_df['tstimon'] = curr_df['tstimon'].map('{:,.3f}'.format)
    
    ##add cong/incong:
    dimA = curr_df['dimA'].values
    dimB = curr_df['dimB'].values
    cong=[]
    for i in list(range(len(dimA))):
        if dimA[i][1] == dimB[i][1]:
            cong.append('cong')
        else:
            cong.append('incong')
    curr_df['congruency'] = cong
    
    #get indices of where there are errors marked in the behavioral files:
    er_t=curr_df.index[curr_df['error'] != 0] 
    #print(er_t)
    if (er_t.empty):
        print('no errors')
    
    #mark one trial before the trial that is marked as an error as error:
    #this should not be done for the last error, in cases where there are two consecutive errors:
    #copy er_t (I don't want to use er_t for that, because I still want the loop below to go through it, for checking)
    print(er_t[:])
    er_tc=np.array(er_t).astype("float")
    #go through er_tc and remove the first error from two consecuitive trials, or the second, if answers the condition below:
    #go through er_t, so that er_tc can be changed independently - no problems with marking nan in first/second
    for ier in list(range(len(er_t))):
        if ier < (len(er_t)-1): #check until the last before one (implemented like this and not in the loop bc of the part I commented out below)
            if er_t[ier+1] - er_t[ier]  == 1: #this is the first of two consecuitive
                print('first of two consecuitive trials is: ' + curr_df.loc[er_t[ier],'cond'])
                print('second of two consecuitive trials is: ' + curr_df.loc[er_t[ier+1],'cond'])
                #if the second one is DCSA/DCDA, or the first is DCSA:
                if ((curr_df.loc[er_t[ier],'cond'] == 'DCSA') | 
                    (curr_df.loc[er_t[ier+1],'cond'] == 'DCSA') | (curr_df.loc[er_t[ier+1],'cond']=='DCDA')):
                    print('first/second of two consecuitive trials is a category switch')
                    print("Need to mark the trial before the first error as error (SO DO NOT exclude from er_tc)")
                elif ((curr_df.loc[er_t[ier],'cond'] == 'DCDA') & (curr_df.loc[er_t[ier+1],'cond'] == 'SCDA') & (curr_df.loc[er_t[ier],'congruency'] == 'incong')):
                    print('first of two consecuitive trials is DCDA and incong, second is SCDA')
                    print("Need to mark the trial before the first error as error (SO DO NOT exclude from er_tc)")
                elif ((curr_df.loc[er_t[ier],'cond'] == 'SCSA') & (curr_df.loc[er_t[ier+1],'cond'] == 'SCSA')):
                    print('curr trial: ' + str(curr_df.loc[er_t[ier],'btrial']) + ' error: ' + str(curr_df.loc[er_t[ier],'error']) + ' congruency: ' + str(curr_df.loc[er_t[ier],'congruency']))
                    print('next trial: ' + str(curr_df.loc[er_t[ier+1],'btrial']) + ' error: ' + str(curr_df.loc[er_t[ier+1],'error']) + ' congruency: ' + str(curr_df.loc[er_t[ier +1],'congruency']))
                    if (curr_df.loc[er_t[ier],'congruency'] == 'cong'):
                        print('cong first of two consecuitive trials that are SCSA')
                        print("DO NOT mark the trial before the first error as error (exclude from er_tc)")
                        er_tc[ier] = 'nan'
                    else: #an incong trial:
                        if (curr_df.loc[er_t[ier],'error'] == 2) & (curr_df.loc[er_t[ier+1],'error'] == 1):
                            print('first of two consecuitive trials that are SCSA. errors are 2 then 1')
                            print("Need to mark the trial before the first error as error (SO DO NOT exclude from er_tc)")
                        elif (curr_df.loc[er_t[ier]-1,'cond'] == 'SCSA'):
                            #not 2,1 - depends on the previous trial - if SCSA - no need to exclude
                            print('first of two consecuitive trials that are SCSA. prev is SCSA') #errors are 2 then 1
                            print("DO NOT mark the trial before the first error as error (exclude from er_tc)")
                            er_tc[ier] = 'nan'
                        elif ((curr_df.loc[er_t[ier]-1,'cond'] == 'DCSA') | (curr_df.loc[er_t[ier]-1,'cond']=='DCDA')):
                            print('first of two consecuitive trials that are SCSA. prev is DCSA/DCDA')
                            print("Need to mark the trial before the first error as error (SO DO NOT exclude from er_tc)") 
                        else:
                              print("might be a scenario I didn't check, check code")

                else: #if none of the above, generally do not mark:
                    print("DO NOT mark the trial before the first error as error (exclude from er_tc)")
                    er_tc[ier] = 'nan'

        #if the second of two consecutive trials,which fullfile these conditions, exclude so that we do not change the error from 2 to 1.         
        if ((ier > 0) & ((er_t[ier] - er_t[ier-1])  == 1)): #this is the second of two consecuitive
            if ((curr_df.loc[er_t[ier-1],'cond'] == 'SCSA') & (curr_df.loc[er_t[ier],'cond'] == 'SCSA')):
                print('second of two error trials where the first and second are SCSA')
                print("need to exclude the second trial from er_tc, so that it'll not change the 2 to 1")
                er_tc[ier] = 'nan'

    #if the first trial is an error:
    if ((curr_df.loc[curr_df['btrial'] == 1, 'error'] != 0).bool()):
        print('first trial is an error, remove from er_tc so it does not cause problems\n' + 
        'with marking er_tc-1 trials')
        er_tc=er_tc[1:]
        #if the first is not an error, mark as one:
    else:
        #if not already an error, include the first trial in a trash bin
        #by marking the first trial as an error:
        curr_df.loc[curr_df['btrial'] == 1, 'error'] = 1 
        
    er_tc=er_tc[~np.isnan(er_tc)].astype(int)
    print(er_tc)
    curr_df.loc[er_tc-1,'error']=1 #if empty - won't do a thing :)

    ### compute the state based on same and prior step, and attach to curr_df ####
    curr_t = np.array(curr_df['dimA'][1:])
    prev_t = np.array(curr_df['dimA'][0:-1])
    state = prev_t + curr_t #this is prev trial,curr trial
    #it's parallel to how it is in the paper: e.g., (Ho)Fo, only I didn't include the parantheses
    all_states = np.unique(state)
    state = np.insert(state,0,np.nan)
    curr_df['state'] = state
    
    
    #find the shortest time gap - we'll add that to calculate the timing of the missing trials:
    curr_t = np.array(curr_df['tstimon'][1:])
    prev_t = np.array(curr_df['tstimon'][0:-1])
    #min_time=round_down(min(curr_t.astype(np.float)-prev_t.astype(np.float)),2) #used to define it by this - but didn't work well because depends on RT - better taking it from what is pre-difined
    min_time=min_time=round_down(min(curr_df['soa'][1:]),2) #that defines the minimal time to pass - no trial should be shorter than that
    print('min time is: ' + str(min_time))
    
    ### for each state, create onsets file:
    for st in all_states:
        curr_st = curr_df[(curr_df['state'] == st) & (curr_df['error'] == 0)] 
        onsets = curr_st[['tstimon','rt']]
        onsets['modulation'] = 1
        if chk_md == 1:
            #print how many trials are in the current state: 
            print('#trials in state {st}: {n}'.format(st=st,n=curr_st.shape[0]))
        else:
            #save file:
            onsetsfile = '{o_dir}/{m}_run{r}_{st}.txt'.format(o_dir=sub_onsets_dir, m=model,r=r,st=st)
            onsets.to_csv(onsetsfile,sep='\t',index=False, header = False)
    
    ### create the trash regressor for the first trial and mistakes:
    curr_st = curr_df[(curr_df['error'] != 0)] 
    onsets = curr_st[['tstimon','rt']]
    
    #change 'tstimon' in curr_df to numeric so I can add and subtract to it:
    curr_df['tstimon'] = pd.to_numeric(curr_df['tstimon'],downcast="float") 
    
    #add the onsets for the missing trials:
    prev_time=[]
    duration=[]
    
    for ier in range(len(er_t)):
        curr_er = er_t[ier]
        print('working on error trial: ' + str(curr_df.loc[curr_er,'btrial']))
        if ier == (len(er_t)-1): #this is the last error
            #this is the last error, cannot be the first of consecuitive, just add the onsets:
            add_error_onsets(curr_df,curr_er,prev_time,duration,min_time)
        else: #this is not the last error, check if the first of consecuitive: 
            if er_t[ier+1] - curr_er  == 1: #if so: 
                print('the first of two consecuitive errors, time difference is:')
                print(curr_df.loc[er_t[ier+1],'tstimon'] - curr_df.loc[curr_er,'tstimon'])
                print('curr cond is: ' + curr_df.loc[er_t[ier],'cond'] + ' second is: ' + curr_df.loc[er_t[ier+1],'cond'])

                if ((ier > 0) & (curr_er - er_t[ier-1] == 1)): #this might be a middle one, check for middle of three consec errors - treat differently:
                    print('Middle trial in a SEQUENCE OF THREE ERROR, CHECK CODE. marks changes')
                    #if category switch - might be different, raise excpetion:
                    #I now checked it (subj 110) - need to mark previous changes anyway, left her for reference
                    #if ((curr_df.loc[curr_er,'cond'] == 'DCSA') | (curr_df.loc[curr_er,'cond']=='DCDA')):
                    #   raise Exception("Middle trial in a sequence of three errors is a category switch, check code")
                    if ((subj == 108) & (r == 1) & (curr_df.loc[curr_er,'btrial'] != 2)): #if it's the seconf one, need to mark the time before the first trial
                        print('sub 108, begining, no need to mark changes')
                    elif ((subj == 134) & (r == 2) & (curr_df.loc[er_t[ier],'cond'] == 'SCSA')): 
                        print('sub 134, 4 trials, no need to mark changes')
                    else:
                        #middle of three errors, need to mark previous changes:
                        add_error_onsets(curr_df,curr_er,prev_time,duration,min_time)

                else: #ier == 0, so this cannot be the first of three errors, or ier >0, but only the first of two:
                    if ((curr_df.loc[er_t[ier+1],'cond'] == 'DCSA') | (curr_df.loc[er_t[ier+1],'cond']=='DCDA')):
                        print('the first of two consecuitive errors (not middle of three), second is a category switch, mark previous errors')
                        add_error_onsets(curr_df,curr_er,prev_time,duration,min_time)
                        #if  (curr_df.loc[er_t[ier+1],'cond']=='DCDA'):
                        #    print('next error is: ' + curr_df.loc[er_t[ier+1],'cond'] + ' check that indeed needs to mark errors')
                    elif ((curr_df.loc[er_t[ier],'cond'] == 'SCSA') & (curr_df.loc[er_t[ier+1],'cond'] == 'SCSA')):
                        if (curr_df.loc[er_t[ier],'congruency'] == 'cong'):
                            print('cong first of two consecuitive trials that are SCSA')
                            print('NOT marking additional errors')
                        else: #an incong trial:
                            if (curr_df.loc[er_t[ier],'error'] == 2) & (curr_df.loc[er_t[ier]+1,'error'] == 1):
                                print('first of two consecuitive trials that are SCSA. errors are 2 then 1. mark previous errors')
                                add_error_onsets(curr_df,curr_er,prev_time,duration,min_time)
                            elif (curr_df.loc[er_t[ier]-1,'cond'] == 'SCSA'):
                                #not 2,1 - depends on the previous trial - if SCSA - no need to exclude
                                print('first of two consecuitive trials that are SCSA. prev is SCSA') #errors are 2 then 1
                                print('NOT marking additional errors')
                            elif ((curr_df.loc[er_t[ier]-1,'cond'] == 'DCSA') | (curr_df.loc[er_t[ier]-1,'cond']=='DCDA')):
                                print('first of two consecuitive trials that are SCSA. prev is DC. mark previous errors')
                                add_error_onsets(curr_df,curr_er,prev_time,duration,min_time)
                            else:
                                  print("might be a scenario I didn't check, check code")

                    else: #not a special case of two consecuitive errors
                        print('first trial of two consecuitive errors, not a special case')
                        print('NOT marking additional errors')

            else: #if not the first of two consecuitive trials (i.e., the second, or a single one), add onsets:
                add_error_onsets(curr_df,curr_er,prev_time,duration,min_time)


    
    temp_ons = pd.DataFrame({"tstimon": prev_time,'rt': duration})
    onsets = onsets.append(temp_ons)
    #change to numeric so I can sort:
    onsets['tstimon'] = pd.to_numeric(onsets['tstimon'],downcast="float") 
    onsets=onsets.sort_values(by=['tstimon'])
    
    ## check first trial:
    if (onsets.iloc[0,onsets.columns.get_loc('tstimon')] > 10):
        
        print('*** CHECK FIRST TRIAL, dur is: %.3f ***' % onsets.iloc[0,onsets.columns.get_loc('tstimon')])
    #ls = [type(item) for item in onsets['tstimon']]
    #print(ls)
    
    #format nicely:
    onsets['rt'] = pd.to_numeric(onsets['rt'],downcast="float").map('{:,.3f}'.format)
    onsets['tstimon'] = onsets['tstimon'].map('{:,.3f}'.format)
    
    #if there are three consecuitive errors, hard to say in advance when to mark changes or not
    #becuase it depends on the specific sequence of trials. So I added this to exclude if marked twice:
    onsets=onsets.drop_duplicates()
    
    #add modulation
    onsets['modulation'] = 1
    print(onsets)
    #save to a file
    if chk_md == 0:
        #save file:
        onsetsfile = '{o_dir}/{m}_run{r}_trash.txt'.format(o_dir=sub_onsets_dir, m=model,r=r)
        onsets.to_csv(onsetsfile,sep='\t',index=False, header = False)

### Create onsets for all subjects, all 16 states model

In [17]:
warnings.filterwarnings('ignore')
#in the study sample (n=27):
subjects = [103,104,105,106,107,108,109,110,111,112,
            114,115,119,120,122,123,124,125,126,127,
            128,129,130,133,134,135,136,138,139,140,141,142,143]
# #only with edata
# subjects = [110,111,112,
#            114,115,120,122,123,124,125,126,
#            128,129,130,133]

# incomplete edata: 119, 127
# I ran the same exact thing for all participants
subjects = [108] #,114,115,120,122,123,124,125,126,128,129,130,133]
chk_md = 1 #checking number of trials, or actually save files
#sessions = [2]
if chk_md:
    print('When running this code check the following:\n'
          '1. Sequences of more than 2 trials - will alert - check that makes sense against the behavioral file \n'
          '2. First and second trial errors - check that makes sense, and it did not miss. '
          '3. First trial too long - will alert')
    
for i in subjects:
    sub_onsets_dir = analysis_dir + '/sub-{sub}'.format(sub=i) + '/onsets'
    if not os.path.exists(sub_onsets_dir):
        os.makedirs(sub_onsets_dir)
        
    for sess in sessions:
        tsv_file = '{b_dir}/FHSS_Learn_{s}_{sub}behavioral.txt'.format(b_dir=behav_dir,sub=i,s=sess)

        with open(tsv_file) as f:
            header = np.array(f.readline().strip().split(','))
            #The header has more variables than the data, select the relvant ones:
            cols = list(range(10)) + [15,16,17,19,21,22,23]
            header = header[cols] #12 13 15:17
        #read the data and attach the header:
        df = pd.read_csv(tsv_file, sep='\t',skiprows=(1),names=header)
        
        #set the runs numbers:
        if sess == 1:
            runs = [1,2,3,4] #[1] #
        elif sess == 2:
            runs = [5,6,7,8]
        
        #run per run
        for r in runs:
            print('creating onsets for subj {sub}, sess {s}, run {r}:'.format(sub=i,s=sess,r=r))
            #filter the data to have only current run:
            curr_df = df[df['block'] == r]
            #run the function that creates the onsets files:
            create_all16states_onsets(curr_df,sub_onsets_dir,r,chk_md,i)

When running this code check the following:
1. Sequences of more than 2 trials - will alert - check that makes sense against the behavioral file 
2. First and second trial errors - check that makes sense, and it did not miss. 3. First trial too long - will alert
creating onsets for subj 108, sess 1, run 1:
Int64Index([0, 1, 2, 3, 10, 15, 29, 36, 44, 45, 52, 53, 58, 61, 67, 69, 74,
            84],
           dtype='int64')
first of two consecuitive trials is: SSSS
second of two consecuitive trials is: SCSA
DO NOT mark the trial before the first error as error (exclude from er_tc)
first of two consecuitive trials is: SCSA
second of two consecuitive trials is: SCSA
curr trial: 2 error: 2 congruency: incong
next trial: 3 error: 2 congruency: incong
might be a scenario I didn't check, check code
first of two consecuitive trials is: SCSA
second of two consecuitive trials is: SCSA
curr trial: 3 error: 2 congruency: incong
next trial: 4 error: 1 congruency: incong
first of two consecuitive tr

#trials in state FyFy: 8
#trials in state FyHo: 4
#trials in state FyHy: 3
#trials in state HoFo: 4
#trials in state HoFy: 3
#trials in state HoHo: 8
#trials in state HoHy: 8
#trials in state HyFo: 4
#trials in state HyFy: 4
#trials in state HyHo: 5
#trials in state HyHy: 8
working on error trial: 32
this error is a category switch
working on error trial: 43
this error is NOT a category switch
check that dur is larger than min time *2: 7.32
working on error trial: 96
this error is NOT a category switch
check that dur is larger than min time *2: 7.32
     tstimon     rt  modulation
291    9.636  0.900           1
321  118.110  0.967           1
0    120.870  4.041           1
322  125.411  1.196           1
1    159.523  7.324           1
332  167.347  1.607           1
333  170.630  1.248           1
2    355.803  7.324           1
385  363.627  1.705           1
386  366.911  1.673           1


### creates the RT regressors per run
To run this, run the cell above - we need the add_error_onsets function that identifies errors

In [4]:
'''
This function creates onsets for RT regressors per run:
1. Since for errors we didn't have RTs for some of the participants, I didn't add an RT regressors for errors.
2. This is acontrol analysis for the main analysis. Since in the main analysis the duration was the RT, I kept it here
as well for the modulated (RT) regressor and the unmodulated (average response) condition.
3. Since I did already all the checks in the function above - this one doesn't print out much. See above for checks.
but, I kept the conditionals the same, lazy.
'''

def create_add_rt_reg_all16states_onsets(curr_df,sub_onsets_dir,r,subj):
    #define the name of the model:
    model = 'RT_reg_all_16states'
    ### clean up ###
    #RTs need to be in seconds for the modulation and lets align all to have 3 digits:
    curr_df['rt'] = (curr_df['rt']/1000).map('{:,.3f}'.format)
    #the timing of all should be aligned, so let's make it all up to 3 digits after the dot:
    curr_df['tstimon'] = curr_df['tstimon'].map('{:,.3f}'.format)
    
    ##add cong/incong:
    dimA = curr_df['dimA'].values
    dimB = curr_df['dimB'].values
    cong=[]
    for i in list(range(len(dimA))):
        if dimA[i][1] == dimB[i][1]:
            cong.append('cong')
        else:
            cong.append('incong')
    curr_df['congruency'] = cong
    
    #get indices of where there are errors marked in the behavioral files:
    er_t=curr_df.index[curr_df['error'] != 0] 
    
    #mark one trial before the trial that is marked as an error as error:
    #this should not be done for the last error, in cases where there are two consecutive errors:
    #copy er_t (I don't want to use er_t for that, because I still want the loop below to go through it, for checking)
    er_tc=np.array(er_t).astype("float")
    #go through er_tc and remove the first error from two consecuitive trials, or the second, if answers the condition below:
    #go through er_t, so that er_tc can be changed independently - no problems with marking nan in first/second
    for ier in list(range(len(er_t))):
        if ier < (len(er_t)-1): #check until the last before one (implemented like this and not in the loop bc of the part I commented out below)
            if er_t[ier+1] - er_t[ier]  == 1: #this is the first of two consecuitive
                #if the second one is DCSA/DCDA, or the first is DCSA:
                if ((curr_df.loc[er_t[ier],'cond'] == 'DCSA') | 
                    (curr_df.loc[er_t[ier+1],'cond'] == 'DCSA') | (curr_df.loc[er_t[ier+1],'cond']=='DCDA')):
                    pass
                elif ((curr_df.loc[er_t[ier],'cond'] == 'DCDA') & (curr_df.loc[er_t[ier+1],'cond'] == 'SCDA') & (curr_df.loc[er_t[ier],'congruency'] == 'incong')):
                    pass
                elif ((curr_df.loc[er_t[ier],'cond'] == 'SCSA') & (curr_df.loc[er_t[ier+1],'cond'] == 'SCSA')):
                    if (curr_df.loc[er_t[ier],'congruency'] == 'cong'):
                        er_tc[ier] = 'nan'
                    else: #an incong trial:
                        if (curr_df.loc[er_t[ier],'error'] == 2) & (curr_df.loc[er_t[ier+1],'error'] == 1):
                            pass
                        elif (curr_df.loc[er_t[ier]-1,'cond'] == 'SCSA'):
                            er_tc[ier] = 'nan'
                        elif ((curr_df.loc[er_t[ier]-1,'cond'] == 'DCSA') | (curr_df.loc[er_t[ier]-1,'cond']=='DCDA')):
                            pass
                        else:
                            print("might be a scenario I didn't check, check code")
                                
                else: #if none of the above, generally do not mark:
                    er_tc[ier] = 'nan'
                    
        #if the second of two consecutive trials,which fullfile these conditions, exclude so that we do not change the error from 2 to 1.         
        if ((ier > 0) & ((er_t[ier] - er_t[ier-1])  == 1)): #this is the second of two consecuitive
            if ((curr_df.loc[er_t[ier-1],'cond'] == 'SCSA') & (curr_df.loc[er_t[ier],'cond'] == 'SCSA')):
                er_tc[ier] = 'nan'
                    
        #if the second of two consecutive trials,which fullfile these conditions, exclude so that we do not change the error from 2 to 1.         
        if ((ier > 0) & ((er_t[ier] - er_t[ier-1])  == 1)): #this is the second of two consecuitive
            if ((curr_df.loc[er_t[ier-1],'cond'] == 'SCSA') & (curr_df.loc[er_t[ier],'cond'] == 'SCSA')):
                er_tc[ier] = 'nan'

    #if the first trial is an error:
    if ((curr_df.loc[curr_df['btrial'] == 1, 'error'] != 0).bool()):
        er_tc=er_tc[1:]
        #if the first is not an error, mark as one:
    else:
        #if not already an error, include the first trial in a trash bin
        #by marking the first trial as an error:
        curr_df.loc[curr_df['btrial'] == 1, 'error'] = 1 
        
    er_tc=er_tc[~np.isnan(er_tc)].astype(int)
    curr_df.loc[er_tc-1,'error']=1 #if empty - won't do a thing :)
    
    

    ### compute the state based on same and prior step, and attach to curr_df ####
    curr_t = np.array(curr_df['dimA'][1:])
    prev_t = np.array(curr_df['dimA'][0:-1])
    state = prev_t + curr_t #this is prev trial,curr trial
    #it's parallel to how it is in the paper: e.g., (Ho)Fo, only I didn't include the parantheses
    all_states = np.unique(state)
    state = np.insert(state,0,np.nan)
    curr_df['state'] = state
    
    
    #find the shortest time gap - we'll add that to calculate the timing of the missing trials:
    curr_t = np.array(curr_df['tstimon'][1:])
    prev_t = np.array(curr_df['tstimon'][0:-1])
    #min_time=round_down(min(curr_t.astype(np.float)-prev_t.astype(np.float)),2) #used to define it by this - but didn't work well because depends on RT - better taking it from what is pre-difined
    min_time=min_time=round_down(min(curr_df['soa'][1:]),2) #that defines the minimal time to pass - no trial should be shorter than that
    
    ### for each state, create onsets file:
    for st in all_states:
        curr_st = curr_df[(curr_df['state'] == st) & (curr_df['error'] == 0)] 
        onsets = curr_st[['tstimon','rt']]
        onsetsfile = '{o_dir}/{m}_run{r}_{st}.txt'.format(o_dir=sub_onsets_dir, m=model,r=r,st=st)
        if len(onsets) > 1: #can have an RT regressor, zscore and it will be ortho to the unmodulated regressor.
            onsets['modulation']=stats.zscore(onsets['rt'].to_numpy().astype(float)) #zscore rt, after converting it to a numpy float array
            #save file:
            onsets.to_csv(onsetsfile,sep='\t',index=False, header = False)
        else: #cannot create and RT regressor that will be ortho to the unmodulated, bc only one trial
            #create an empty file, and output a note:
            print('sub {s} run {r} state {st} has only 1 trial, creating empty RT onset file'.format(s=subj,r=r,st=st))
            # Creates a new file
            with open(onsetsfile, 'w') as f:
                f.write('0\t0\t0\t')

### add onsets for RT regressors all subjects, for the all 16 states control RT model

In [5]:
              
#only subjects that are in the study (N=27, passed motion criteria, see github readme):
subjects = [103,104,105,106,107,108,109,110,111,112,
            114,115,119,120,122,123,124,126,127,128,
            130,135,138,139,140,141,142]

for i in subjects:
    sub_onsets_dir = analysis_dir + '/sub-{sub}'.format(sub=i) + '/onsets'
    if not os.path.exists(sub_onsets_dir):
        os.makedirs(sub_onsets_dir)
        
    for sess in sessions:
        tsv_file = '{b_dir}/FHSS_Learn_{s}_{sub}behavioral.txt'.format(b_dir=behav_dir,sub=i,s=sess)

        with open(tsv_file) as f:
            header = np.array(f.readline().strip().split(','))
            #The header has more variables than the data, select the relvant ones:
            cols = list(range(10)) + [15,16,17,19,21,22,23]
            header = header[cols] #12 13 15:17
        #read the data and attach the header:
        df = pd.read_csv(tsv_file, sep='\t',skiprows=(1),names=header)
        
        #set the runs numbers:
        if sess == 1:
            runs = [1,2,3,4] #[1] #[1,2,3,4] #
        elif sess == 2:
            runs = [5,6,7,8]
        
        #run per run
        for r in runs:
            print('creating RT regressors onsets for subj {sub}, sess {s}, run {r}:'.format(sub=i,s=sess,r=r))
            #filter the data to have only current run:
            curr_df = df[df['block'] == r]
            #run the function that creates the onsets files:
            create_add_rt_reg_all16states_onsets(curr_df,sub_onsets_dir,r,i)

creating RT regressors onsets for subj 103, sess 1, run 1:
creating RT regressors onsets for subj 103, sess 1, run 2:
creating RT regressors onsets for subj 103, sess 1, run 3:
creating RT regressors onsets for subj 103, sess 1, run 4:
creating RT regressors onsets for subj 103, sess 2, run 5:
creating RT regressors onsets for subj 103, sess 2, run 6:
creating RT regressors onsets for subj 103, sess 2, run 7:
creating RT regressors onsets for subj 103, sess 2, run 8:
creating RT regressors onsets for subj 104, sess 1, run 1:
creating RT regressors onsets for subj 104, sess 1, run 2:
creating RT regressors onsets for subj 104, sess 1, run 3:
creating RT regressors onsets for subj 104, sess 1, run 4:
creating RT regressors onsets for subj 104, sess 2, run 5:
creating RT regressors onsets for subj 104, sess 2, run 6:
creating RT regressors onsets for subj 104, sess 2, run 7:
creating RT regressors onsets for subj 104, sess 2, run 8:
creating RT regressors onsets for subj 105, sess 1, run 

creating RT regressors onsets for subj 124, sess 2, run 5:
creating RT regressors onsets for subj 124, sess 2, run 6:
creating RT regressors onsets for subj 124, sess 2, run 7:
creating RT regressors onsets for subj 124, sess 2, run 8:
creating RT regressors onsets for subj 126, sess 1, run 1:
creating RT regressors onsets for subj 126, sess 1, run 2:
creating RT regressors onsets for subj 126, sess 1, run 3:
creating RT regressors onsets for subj 126, sess 1, run 4:
creating RT regressors onsets for subj 126, sess 2, run 5:
creating RT regressors onsets for subj 126, sess 2, run 6:
creating RT regressors onsets for subj 126, sess 2, run 7:
creating RT regressors onsets for subj 126, sess 2, run 8:
creating RT regressors onsets for subj 127, sess 1, run 1:
creating RT regressors onsets for subj 127, sess 1, run 2:
creating RT regressors onsets for subj 127, sess 1, run 3:
creating RT regressors onsets for subj 127, sess 1, run 4:
creating RT regressors onsets for subj 127, sess 2, run 

## splitting each state regressor to 2 vs. single trials
We wanted to get more data points for classification, so we reasoned to split each regressor
to two regressors, to have 2 regressors per state per run. Eventually, Yael and I decided to go immediately to single trials. the 2 regressors per state would force us to make decisions about how to do so, and it may not be so different for some states than the single trial, because some states would have only 2 trials in each regressor. To aviod that, and allow many more data points for the classifier - we decided to go for trying single trials at first.


## creating LSS single-trial regressors

In [96]:
#creates onsets per run:
#create the onsets per trial number, then have a mapping between the trial and the state per run 
#to be used for classification/RSA
def create_16states_onsets_per_trial(curr_df,sub_onsets_dir,r,chk_md,subj):
    #define the name of the model:
    model = 'all_16states_per_trial'
    ### clean up ###
    #RTs need to be in seconds for the duration, and lets align all to have 3 digits:
    curr_df['rt'] = (curr_df['rt']/1000).map('{:,.3f}'.format)
    #the timing of all should be aligned, so let's make it all up to 3 digits after the dot:
    curr_df['tstimon'] = curr_df['tstimon'].map('{:,.3f}'.format)
    
    ##add cong/incong:
    dimA = curr_df['dimA'].values
    dimB = curr_df['dimB'].values
    cong=[]
    for i in list(range(len(dimA))):
        if dimA[i][1] == dimB[i][1]:
            cong.append('cong')
        else:
            cong.append('incong')
    curr_df['congruency'] = cong
    
    #get indices of where there are errors marked in the behavioral files:
    er_t=curr_df.index[curr_df['error'] != 0] 
    #print(er_t)
    if (er_t.empty):
        print('no errors')
    
    #mark one trial before the trial that is marked as an error as error:
    #this should not be done for the last error, in cases where there are two consecutive errors:
    #copy er_t (I don't want to use er_t for that, because I still want the loop below to go through it, for checking)
    #print(er_t[:])
    
    #check consequitive errors:
    er_tc=check_consec_ers(curr_df,er_t)
    
    #if the first trial is an error:
    if ((curr_df.loc[curr_df['btrial'] == 1, 'error'] != 0).bool()):
        print('first trial is an error, remove from er_tc so it does not cause problems\n' + 
        'with marking er_tc-1 trials')
        er_tc=er_tc[1:]
        #if the first is not an error, mark as one:
    else:
        #if not already an error, include the first trial in a trash bin
        #by marking the first trial as an error:
        curr_df.loc[curr_df['btrial'] == 1, 'error'] = 1 
        
    er_tc=er_tc[~np.isnan(er_tc)].astype(int)
    #print(er_tc)
    curr_df.loc[er_tc-1,'error']=1 #if empty - won't do a thing :)

    ### compute the state based on same and prior step, and attach to curr_df ####
    curr_t = np.array(curr_df['dimA'][1:])
    prev_t = np.array(curr_df['dimA'][0:-1])
    state = prev_t + curr_t #this is prev trial,curr trial
    #it's parallel to how it is in the paper: e.g., (Ho)Fo, only I didn't include the parantheses
    all_states = np.unique(state)
    state = np.insert(state,0,np.nan)
    curr_df['state'] = state
    
    
    #find the shortest time gap - we'll add that to calculate the timing of the missing trials:
    curr_t = np.array(curr_df['tstimon'][1:])
    prev_t = np.array(curr_df['tstimon'][0:-1])
    #min_time=round_down(min(curr_t.astype(np.float)-prev_t.astype(np.float)),2) #used to define it by this - but didn't work well because depends on RT - better taking it from what is pre-difined
    min_time=min_time=round_down(min(curr_df['soa'][1:]),2) #that defines the minimal time to pass - no trial should be shorter than that
    #print('min time is: ' + str(min_time))
    
    #####################################
    #Go through all the trials, if it's an error, don't create onsets file.
    #then, in the code that runs the model, I'll run the models only if an onset file exists.
    
    #create a dataframe w/O errors to loop through:
    df_no_errs=curr_df[curr_df['error'] == 0]
    #save to be able to match the data later on:
    file_name = '{o_dir}/trials_info_run{r}.txt'.format(o_dir=sub_onsets_dir,r=r)
    df_no_errs.to_csv(file_name,sep='\t',index=False)
    
    #loop through each row and create the onsets for all regressors per this trial:
    create_onsets_for_single_trial_models(df_no_errs,all_states,model,sub_onsets_dir,r)
    
    #####################################
    ### create the trash regressor for the first trial and mistakes:
    curr_st = curr_df[(curr_df['error'] != 0)] 
    onsets = curr_st[['tstimon','rt']]
    
    #change 'tstimon' in curr_df to numeric so I can add and subtract to it:
    curr_df['tstimon'] = pd.to_numeric(curr_df['tstimon'],downcast="float") 
    
    #add the onsets for the missing trials:
    prev_time, duration = add_all_errors_onsets(curr_df,er_t,min_time,subj,r)
  
    temp_ons = pd.DataFrame({"tstimon": prev_time,'rt': duration})
    onsets = onsets.append(temp_ons)
    #change to numeric so I can sort:
    onsets['tstimon'] = pd.to_numeric(onsets['tstimon'],downcast="float") 
    onsets=onsets.sort_values(by=['tstimon'])
    
    ## check first trial:
    if (onsets.iloc[0,onsets.columns.get_loc('tstimon')] > 10):
        
        print('*** CHECK FIRST TRIAL, dur is: %.3f ***' % onsets.iloc[0,onsets.columns.get_loc('tstimon')])
    #ls = [type(item) for item in onsets['tstimon']]
    #print(ls)
    
    #format nicely:
    onsets['rt'] = pd.to_numeric(onsets['rt'],downcast="float").map('{:,.3f}'.format)
    onsets['tstimon'] = onsets['tstimon'].map('{:,.3f}'.format)
    
    #if there are three consecuitive errors, hard to say in advance when to mark changes or not
    #becuase it depends on the specific sequence of trials. So I added this to exclude if marked twice:
    onsets=onsets.drop_duplicates()
    
    #add modulation
    onsets['modulation'] = 1
    #print(onsets)
    #save to a file
    if chk_md == 0:
        #save file:
        onsetsfile = '{o_dir}/{m}_run{r}_trash.txt'.format(o_dir=sub_onsets_dir, m=model,r=r)
        onsets.to_csv(onsetsfile,sep='\t',index=False, header = False)


### Create onsets for all subjects, 16 states_per_trial

In [97]:
warnings.filterwarnings('ignore')
#in the study sample (n=27):
subjects = [103,104,105,106,107,108,109,110,111,112,
            114,115,119,120,122,123,124,125,126,127,
            128,129,130,133,134,135,136,138,139,140,141,142,143]
# #only with edata
# subjects = [110,111,112,
#            114,115,120,122,123,124,125,126,
#            128,129,130,133]

# incomplete edata: 119, 127
# I ran the same exact thing for all participants
#subjects = [110] #,114,115,120,122,123,124,125,126,128,129,130,133]
chk_md = 0 #checking number of trials, or actually save files
#sessions = [1]
if chk_md:
    print('When running this code check the following:\n'
          '1. Sequences of more than 2 trials - will alert - check that makes sense against the behavioral file \n'
          '2. First and second trial errors - check that makes sense, and it did not miss. '
          '3. First trial too long - will alert')
    
for i in subjects:
    sub_onsets_dir = analysis_dir + '/sub-{sub}'.format(sub=i) + '/onsets/single_trial'
    if not os.path.exists(sub_onsets_dir):
        os.makedirs(sub_onsets_dir)
        
    for sess in sessions:
        tsv_file = '{b_dir}/FHSS_Learn_{s}_{sub}behavioral.txt'.format(b_dir=behav_dir,sub=i,s=sess)

        with open(tsv_file) as f:
            header = np.array(f.readline().strip().split(','))
            #The header has more variables than the data, select the relvant ones:
            cols = list(range(10)) + [15,16,17,19,21,22,23]
            header = header[cols] #12 13 15:17
        #read the data and attach the header:
        df = pd.read_csv(tsv_file, sep='\t',skiprows=(1),names=header)
        
        #set the runs numbers:
        if sess == 1:
            runs = [1,2,3,4] #
        elif sess == 2:
            runs = [5,6,7,8]
        
        #run per run
        for r in runs:
            print('creating onsets for subj {sub}, sess {s}, run {r}:'.format(sub=i,s=sess,r=r))
            #filter the data to have only current run:
            curr_df = df[df['block'] == r]
            #run the function that creates the onsets files:
            create_16states_onsets_per_trial(curr_df,sub_onsets_dir,r,chk_md,i)
            
    print('### Finish all runs subj ' + str(i) + ' ###')

creating onsets for subj 103, sess 1, run 1:
no errors
creating onsets for subj 103, sess 1, run 2:
working on error trial: 54
this error is NOT a category switch
check that dur is larger than min time *2: 9.52
working on error trial: 97
this error is a category switch
creating onsets for subj 103, sess 1, run 3:
working on error trial: 79
this error is a category switch
creating onsets for subj 103, sess 1, run 4:
no errors
creating onsets for subj 103, sess 2, run 5:
working on error trial: 45
this error is NOT a category switch
check that dur is larger than min time *2: 7.43
creating onsets for subj 103, sess 2, run 6:
working on error trial: 93
this error is a category switch
creating onsets for subj 103, sess 2, run 7:
working on error trial: 37
this error is a category switch
creating onsets for subj 103, sess 2, run 8:
no errors
### Finish all runs subj 103 ###
creating onsets for subj 104, sess 1, run 1:
first of two consecuitive trials is: SCDA
second of two consecuitive trial

working on error trial: 4
the first of two consecuitive errors, time difference is:
17.883999
curr cond is: SCSA second is: SCSA
first of two consecuitive trials that are SCSA. prev is DC. mark previous errors
this error is NOT a category switch
check that dur is larger than min time *2: 14.64
working on error trial: 5
the first of two consecuitive errors, time difference is:
3.283001
curr cond is: SCSA second is: SCSA
Middle trial in a SEQUENCE OF THREE ERROR, CHECK CODE. marks changes
this error is NOT a category switch
check that dur is larger than min time *2: 7.33
working on error trial: 6
this error is NOT a category switch
check that dur is larger than min time *2: 14.63
working on error trial: 9
this error is NOT a category switch
check that dur is larger than min time *2: 7.33
working on error trial: 37
this error is NOT a category switch
check that dur is larger than min time *2: 7.33
working on error trial: 40
this error is a category switch
working on error trial: 71
the fi

working on error trial: 30
the first of two consecuitive errors, time difference is:
3.2839966
curr cond is: SCSA second is: SCSA
first of two consecuitive trials that are SCSA. prev is SCSA
NOT marking additional errors
working on error trial: 31
this error is NOT a category switch
check that dur is larger than min time *2: 7.43
working on error trial: 45
this error is NOT a category switch
check that dur is larger than min time *2: 7.85
working on error trial: 51
this error is a category switch
working on error trial: 54
this error is a category switch
working on error trial: 62
this error is NOT a category switch
check that dur is larger than min time *2: 7.87
working on error trial: 70
this error is a category switch
working on error trial: 85
this error is NOT a category switch
check that dur is larger than min time *2: 7.33
working on error trial: 91
this error is a category switch
working on error trial: 97
this error is a category switch
creating onsets for subj 107, sess 2, ru

working on error trial: 9
this error is NOT a category switch
check that dur is larger than min time *2: 8.09
working on error trial: 22
this error is a category switch
working on error trial: 26
this error is NOT a category switch
check that dur is larger than min time *2: 7.32
working on error trial: 45
this error is NOT a category switch
check that dur is larger than min time *2: 7.87
working on error trial: 51
this error is a category switch
working on error trial: 54
this error is NOT a category switch
check that dur is larger than min time *2: 7.39
working on error trial: 71
this error is a category switch
working on error trial: 89
this error is NOT a category switch
check that dur is larger than min time *2: 18.56
creating onsets for subj 108, sess 2, run 6:
working on error trial: 36
this error is a category switch
working on error trial: 44
this error is NOT a category switch
check that dur is larger than min time *2: 14.63
creating onsets for subj 108, sess 2, run 7:
working

working on error trial: 80
this error is a category switch
working on error trial: 88
this error is NOT a category switch
check that dur is larger than min time *2: 7.77
creating onsets for subj 111, sess 1, run 3:
working on error trial: 75
this error is NOT a category switch
check that dur is larger than min time *2: 10.29
creating onsets for subj 111, sess 1, run 4:
working on error trial: 8
this error is NOT a category switch
check that dur is larger than min time *2: 7.33
working on error trial: 63
this error is NOT a category switch
check that dur is larger than min time *2: 8.38
creating onsets for subj 111, sess 2, run 5:
working on error trial: 28
this error is a category switch
creating onsets for subj 111, sess 2, run 6:
working on error trial: 64
this error is NOT a category switch
check that dur is larger than min time *2: 8.36
creating onsets for subj 111, sess 2, run 7:
no errors
creating onsets for subj 111, sess 2, run 8:
no errors
### Finish all runs subj 111 ###
crea

working on error trial: 17
this error is a category switch
working on error trial: 27
this error is a category switch
working on error trial: 54
this error is NOT a category switch
check that dur is larger than min time *2: 11.18
working on error trial: 68
this error is a category switch
working on error trial: 86
the first of two consecuitive errors, time difference is:
2.9169922
curr cond is: DCSA second is: SCDA
first trial of two consecuitive errors, not a special case
NOT marking additional errors
working on error trial: 87
this error is NOT a category switch
check that dur is larger than min time *2: 22.87
working on error trial: 95
this error is a category switch
*** CHECK FIRST TRIAL, dur is: 14.606 ***
creating onsets for subj 114, sess 2, run 6:
working on error trial: 30
this error is a category switch
working on error trial: 40
this error is NOT a category switch
check that dur is larger than min time *2: 17.01
working on error trial: 58
this error is NOT a category switch


working on error trial: 59
this error is a category switch
working on error trial: 74
this error is NOT a category switch
check that dur is larger than min time *2: 11.18
working on error trial: 84
this error is a category switch
working on error trial: 89
this error is a category switch
creating onsets for subj 119, sess 1, run 3:
working on error trial: 36
this error is a category switch
working on error trial: 39
this error is NOT a category switch
check that dur is larger than min time *2: 11.18
working on error trial: 68
this error is NOT a category switch
check that dur is larger than min time *2: 11.18
working on error trial: 84
this error is a category switch
creating onsets for subj 119, sess 1, run 4:
no errors
creating onsets for subj 119, sess 2, run 5:
working on error trial: 64
this error is NOT a category switch
check that dur is larger than min time *2: 11.18
*** CHECK FIRST TRIAL, dur is: 14.602 ***
creating onsets for subj 119, sess 2, run 6:
working on error trial: 1

working on error trial: 2
the first of two consecuitive errors, time difference is:
17.517998
curr cond is: SCSA second is: SCSA
might be a scenario I didn't check, check code
working on error trial: 3
the first of two consecuitive errors, time difference is:
2.9160004
curr cond is: SCSA second is: SCSA
Middle trial in a SEQUENCE OF THREE ERROR, CHECK CODE. marks changes
this error is NOT a category switch
check that dur is larger than min time *2: 31.63
working on error trial: 4
this error is NOT a category switch
check that dur is larger than min time *2: 14.10
working on error trial: 11
this error is NOT a category switch
check that dur is larger than min time *2: 8.27
working on error trial: 18
this error is NOT a category switch
check that dur is larger than min time *2: 14.10
working on error trial: 20
the first of two consecuitive errors, time difference is:
40.886993
curr cond is: SCSA second is: SCSA
first of two consecuitive trials that are SCSA. prev is DC. mark previous err

working on error trial: 3
this error is a category switch
working on error trial: 7
this error is NOT a category switch
check that dur is larger than min time *2: 14.10
working on error trial: 9
this error is a category switch
working on error trial: 24
this error is NOT a category switch
check that dur is larger than min time *2: 17.01
working on error trial: 46
this error is NOT a category switch
check that dur is larger than min time *2: 17.03
working on error trial: 66
this error is a category switch
working on error trial: 69
this error is NOT a category switch
check that dur is larger than min time *2: 17.01
working on error trial: 73
this error is a category switch
working on error trial: 78
this error is NOT a category switch
check that dur is larger than min time *2: 11.18
working on error trial: 82
the first of two consecuitive errors, time difference is:
5.8519897
curr cond is: DCSA second is: SCSA
first trial of two consecuitive errors, not a special case
NOT marking additi

working on error trial: 1
first trial is an error
working on error trial: 4
this error is NOT a category switch
check that dur is larger than min time *2: 17.02
working on error trial: 31
this error is NOT a category switch
check that dur is larger than min time *2: 8.26
working on error trial: 51
the first of two consecuitive errors, time difference is:
2.9169922
curr cond is: DCSA second is: SCSA
first trial of two consecuitive errors, not a special case
NOT marking additional errors
working on error trial: 52
this error is NOT a category switch
check that dur is larger than min time *2: 22.87
creating onsets for subj 126, sess 1, run 2:
first of two consecuitive trials is: SCSA
second of two consecuitive trials is: SCDA
DO NOT mark the trial before the first error as error (exclude from er_tc)
working on error trial: 15
the first of two consecuitive errors, time difference is:
2.9179993
curr cond is: SCSA second is: SCDA
first trial of two consecuitive errors, not a special case
NOT

working on error trial: 1
first trial is an error
working on error trial: 8
this error is NOT a category switch
check that dur is larger than min time *2: 17.01
working on error trial: 38
this error is NOT a category switch
check that dur is larger than min time *2: 8.26
working on error trial: 46
this error is a category switch
working on error trial: 52
this error is NOT a category switch
check that dur is larger than min time *2: 14.12
working on error trial: 59
this error is a category switch
working on error trial: 69
this error is a category switch
working on error trial: 71
this error is a category switch
working on error trial: 77
this error is a category switch
working on error trial: 83
this error is NOT a category switch
check that dur is larger than min time *2: 14.10
working on error trial: 85
this error is NOT a category switch
check that dur is larger than min time *2: 17.01
working on error trial: 87
this error is a category switch
working on error trial: 91
this error 

working on error trial: 12
this error is a category switch
working on error trial: 25
this error is a category switch
working on error trial: 27
this error is NOT a category switch
check that dur is larger than min time *2: 14.10
working on error trial: 58
this error is a category switch
working on error trial: 65
this error is NOT a category switch
check that dur is larger than min time *2: 8.25
working on error trial: 67
the first of two consecuitive errors, time difference is:
5.833008
curr cond is: SCSA second is: SCDA
first trial of two consecuitive errors, not a special case
NOT marking additional errors
working on error trial: 68
this error is NOT a category switch
check that dur is larger than min time *2: 14.10
working on error trial: 79
the first of two consecuitive errors, time difference is:
8.75
curr cond is: SCDA second is: DCSA
the first of two consecuitive errors (not middle of three), second is a category switch, mark previous errors
this error is NOT a category switch

working on error trial: 2
this error is NOT a category switch
this error is the second trial
check that dur is larger than min time *2: 17.09
working on error trial: 8
this error is NOT a category switch
check that dur is larger than min time *2: 8.26
working on error trial: 25
this error is a category switch
working on error trial: 35
the first of two consecuitive errors, time difference is:
17.518005
curr cond is: SCDA second is: DCDA
the first of two consecuitive errors (not middle of three), second is a category switch, mark previous errors
this error is NOT a category switch
check that dur is larger than min time *2: 11.18
working on error trial: 36
this error is a category switch
working on error trial: 41
this error is a category switch
working on error trial: 44
this error is NOT a category switch
check that dur is larger than min time *2: 14.10
working on error trial: 51
this error is a category switch
working on error trial: 65
this error is NOT a category switch
check that d

working on error trial: 3
this error is a category switch
working on error trial: 62
this error is NOT a category switch
check that dur is larger than min time *2: 11.18
working on error trial: 65
the first of two consecuitive errors, time difference is:
2.9170227
curr cond is: DCSA second is: SCDA
first trial of two consecuitive errors, not a special case
NOT marking additional errors
working on error trial: 66
the first of two consecuitive errors, time difference is:
11.6839905
curr cond is: SCDA second is: DCDA
Middle trial in a SEQUENCE OF THREE ERROR, CHECK CODE. marks changes
this error is NOT a category switch
check that dur is larger than min time *2: 31.63
working on error trial: 67
this error is a category switch
working on error trial: 90
this error is NOT a category switch
check that dur is larger than min time *2: 11.18
creating onsets for subj 135, sess 1, run 3:
working on error trial: 80
this error is a category switch
creating onsets for subj 135, sess 1, run 4:
workin

working on error trial: 25
this error is a category switch
working on error trial: 40
this error is NOT a category switch
check that dur is larger than min time *2: 11.16
working on error trial: 64
this error is a category switch
working on error trial: 78
this error is a category switch
working on error trial: 92
the first of two consecuitive errors, time difference is:
5.8320007
curr cond is: DCSA second is: SCDA
first trial of two consecuitive errors, not a special case
NOT marking additional errors
working on error trial: 93
this error is NOT a category switch
check that dur is larger than min time *2: 25.78
### Finish all runs subj 136 ###
creating onsets for subj 138, sess 1, run 1:
first of two consecuitive trials is: SCSA
second of two consecuitive trials is: SCSA
curr trial: 5 error: 2 congruency: incong
next trial: 6 error: 2 congruency: incong
first of two consecuitive trials that are SCSA. prev is SCSA
DO NOT mark the trial before the first error as error (exclude from er_t

### Finish all runs subj 139 ###
creating onsets for subj 140, sess 1, run 1:
first of two consecuitive trials is: SCDA
second of two consecuitive trials is: DCSA
first/second of two consecuitive trials is a category switch
Need to mark the trial before the first error as error (SO DO NOT exclude from er_tc)
working on error trial: 3
this error is NOT a category switch
check that dur is larger than min time *2: 14.11
working on error trial: 20
this error is NOT a category switch
check that dur is larger than min time *2: 11.18
working on error trial: 25
this error is a category switch
working on error trial: 39
the first of two consecuitive errors, time difference is:
14.600998
curr cond is: SCDA second is: DCSA
the first of two consecuitive errors (not middle of three), second is a category switch, mark previous errors
this error is NOT a category switch
check that dur is larger than min time *2: 22.87
working on error trial: 40
this error is a category switch
working on error trial: 

working on error trial: 31
this error is NOT a category switch
check that dur is larger than min time *2: 11.18
working on error trial: 34
this error is a category switch
working on error trial: 90
this error is a category switch
creating onsets for subj 142, sess 2, run 5:
working on error trial: 40
this error is NOT a category switch
check that dur is larger than min time *2: 14.11
working on error trial: 65
this error is a category switch
*** CHECK FIRST TRIAL, dur is: 14.602 ***
creating onsets for subj 142, sess 2, run 6:
working on error trial: 12
this error is NOT a category switch
check that dur is larger than min time *2: 8.26
working on error trial: 74
this error is NOT a category switch
check that dur is larger than min time *2: 17.02
working on error trial: 81
this error is NOT a category switch
check that dur is larger than min time *2: 8.26
creating onsets for subj 142, sess 2, run 7:
working on error trial: 40
this error is NOT a category switch
check that dur is larger 

no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other onsets stateFyHo
no other ons

working on error trial: 10
this error is NOT a category switch
check that dur is larger than min time *2: 8.26
working on error trial: 30
this error is NOT a category switch
check that dur is larger than min time *2: 11.18
working on error trial: 68
this error is a category switch
*** CHECK FIRST TRIAL, dur is: 14.609 ***
creating onsets for subj 143, sess 2, run 6:
working on error trial: 7
this error is NOT a category switch
check that dur is larger than min time *2: 8.25
working on error trial: 63
this error is a category switch
working on error trial: 65
this error is NOT a category switch
check that dur is larger than min time *2: 8.26
working on error trial: 73
this error is a category switch
creating onsets for subj 143, sess 2, run 7:
no errors
creating onsets for subj 143, sess 2, run 8:
working on error trial: 18
this error is a category switch
working on error trial: 44
this error is NOT a category switch
check that dur is larger than min time *2: 8.26
working on error trial

### check that subjects have all files
(should be 136 per subjects)

In [5]:
for i in subjects:
    sub_onsets_dir = analysis_dir + '/sub-{sub}'.format(sub=i) + '/onsets'
    print('subj {sub}'.format(sub=i))
    print(len([name for name in os.listdir(sub_onsets_dir) if os.path.isfile(sub_onsets_dir + '/' + name)]))


subj 103
136
subj 104
136
subj 105
136
subj 106
136
subj 107
136
subj 108
136
subj 109
136
subj 119
136
subj 127
136
subj 134
136
subj 135
136
subj 136
136
subj 138
136
subj 139
136
subj 140
136
subj 141
136
subj 142
136
subj 143
136


In [None]:
#try load matlab - this eventually was not used - I decided not to use the edata (see github repo) - but kept here for ref
i=110
sess=1
file_name=glob.glob('{b_dir}/FHSS_Learn_{s}_{sub}*.mat'.format(b_dir=behav_dir,sub=i,s=sess))[0]
#print(file_name)
mat_temp = loadmat(file_name)
rt = mat_temp['edata']['rt'][0][0][0]
print(rt)

#### Trash:

In [None]:
#chunk that calculated the error timing by looking at RTs and then adding,
#eventually I decided to use the miminal gap between trials - more accurate.
if ((curr_df.loc[curr_er,'cond'] == 'DCSA') | (curr_df.loc[curr_er,'cond']=='DCDA')):
            #category switch, model the time between this error and the previous trial as error:
            #the onset is 500 ms after the response of the previous trial:
            prev_time.append(curr_df.loc[curr_er-1,'treact'] + .5)
            #the duration is until -.5 from the current trial
            dur=(float(curr_df.loc[curr_er,'tstimon']) - .5 - prev_time[-1])
            if (dur < min_time): #minimal duration of a trial
                raise Exception("something is wrong, DC error duration is too short")
            else:
                duration.append(dur)
                
        else: #this is not a category switch, we need to model the time between n-1 and n-2
            #the onset is 500 ms after the response of the n-2 trial:
            prev_time.append(curr_df.loc[curr_er-2,'treact'] + .5)
            #print(prev_time)
            #the duration is until -.5 from the n-1 trial
            dur=(float(curr_df.loc[curr_er-1,'tstimon']) - .5 - prev_time[-1])
            if (dur < (min_time*2)): #minimal duration of two trials - that's how much we miss
                raise Exception("something is wrong, error duration is too short")
            else:
                duration.append(dur)
                
                
 #if the second is an error, add an error after, the software did not repeat the trial - so the 
    #trial after might not be judged as what we think it is..
    #(this happens in subj 104, sess 1, block 1)
    if (curr_df.loc[curr_df['btrial'] == 2,'error'].all() != 0):
        print('second trial is an error')
        if ((curr_df.loc[curr_df['btrial'] == 3,'error'] == 0).bool()):
            curr_df.loc[curr_df['btrial'] == 3,'error'] = 1
            
    #if the first is an error, add an error after, the software did not repeat the trial - so the 
    #trial after might not be judged as what we think it is..
    if (curr_df.loc[curr_df['btrial'] == 1,'error'].all() != 0):
        print('first trial is an error, marking the second as an error as well - its a repeat')
        if ((curr_df.loc[curr_df['btrial'] == 2,'error'] == 0).bool()):
            curr_df.loc[curr_df['btrial'] == 2,'error'] = 1 #mark the next one as well, since this is a repeat trial
        #print(curr_df.loc[curr_df['btrial'].isin(list(range(1,4))),:])
    else:
        #if not already an error, include the first trial in a trash bin
        #by marking the first trial as an error
        curr_df.loc[curr_df['btrial'] == 1, 'error'] = 1 

In [None]:
#creates onsets per run:
def create_16states_onsets_split2(curr_df,sub_onsets_dir,r,chk_md,subj):
    #define the name of the model:
    model = 'all_16states_per_trial'
    ### clean up ###
    #RTs need to be in seconds for the duration, and lets align all to have 3 digits:
    curr_df['rt'] = (curr_df['rt']/1000).map('{:,.3f}'.format)
    #the timing of all should be aligned, so let's make it all up to 3 digits after the dot:
    curr_df['tstimon'] = curr_df['tstimon'].map('{:,.3f}'.format)
    
    ##add cong/incong:
    dimA = curr_df['dimA'].values
    dimB = curr_df['dimB'].values
    cong=[]
    for i in list(range(len(dimA))):
        if dimA[i][1] == dimB[i][1]:
            cong.append('cong')
        else:
            cong.append('incong')
    curr_df['congruency'] = cong
    
    ### compute the state based on same and prior step, and attach to curr_df ####
    curr_t = np.array(curr_df['dimA'][1:])
    prev_t = np.array(curr_df['dimA'][0:-1])
    state = prev_t + curr_t #this is prev trial,curr trial
    all_states = np.unique(state)
    #it's parallel to how it is in the paper: e.g., (Ho)Fo, only I didn't include the parantheses
    
    state = np.insert(state,0,np.nan)
    
    curr_df['state'] = state
    
    ### compute the 32state based on same and prior state, and attach to curr_df ####
    curr_t = np.array(curr_df['state'][1:])
    prev_t = np.array(curr_df['state'][0:-1])
    prev_t[0]='nan'
    state2 = curr_t + '_p' + prev_t #this is prev trial,curr trial
    
    #first two trials are nan:
    state2 = np.insert(state2,0,np.nan)
    state2[1] = np.nan
    all_states32 = np.unique(state2[2:])
    curr_df['state32'] = state2

    
    #get indices of where there are errors marked in the behavioral files:
    er_t=curr_df.index[curr_df['error'] != 0] 
    #print(er_t)
    if (er_t.empty):
        print('no errors')
    
    #mark one trial before the trial that is marked as an error as error:
    #this should not be done for the last error, in cases where there are two consecutive errors:
    #copy er_t (I don't want to use er_t for that, because I still want the loop below to go through it, for checking)
    print(er_t[:])
    er_tc=np.array(er_t).astype("float")
    #go through er_tc and remove the first error from two consecuitive trials, or the second, if answers the condition below:
    #go through er_t, so that er_tc can be changed independently - no problems with marking nan in first/second
    for ier in list(range(len(er_t))):
        if ier < (len(er_t)-1): #check until the last before one (implemented like this and not in the loop bc of the part I commented out below)
            if er_t[ier+1] - er_t[ier]  == 1: #this is the first of two consecuitive
                print('first of two consecuitive trials is: ' + curr_df.loc[er_t[ier],'cond'])
                print('second of two consecuitive trials is: ' + curr_df.loc[er_t[ier+1],'cond'])
                #if the second one is DCSA/DCDA, or the first is DCSA:
                if ((curr_df.loc[er_t[ier],'cond'] == 'DCSA') | 
                    (curr_df.loc[er_t[ier+1],'cond'] == 'DCSA') | (curr_df.loc[er_t[ier+1],'cond']=='DCDA')):
                    print('first/second of two consecuitive trials is a category switch')
                    print("Need to mark the trial before the first error as error (SO DO NOT exclude from er_tc)")
                elif ((curr_df.loc[er_t[ier],'cond'] == 'DCDA') & (curr_df.loc[er_t[ier+1],'cond'] == 'SCDA') & (curr_df.loc[er_t[ier],'congruency'] == 'incong')):
                    print('first of two consecuitive trials is DCDA and incong, second is SCDA')
                    print("Need to mark the trial before the first error as error (SO DO NOT exclude from er_tc)")
                elif ((curr_df.loc[er_t[ier],'cond'] == 'SCSA') & (curr_df.loc[er_t[ier+1],'cond'] == 'SCSA')):
                    print('curr trial: ' + str(curr_df.loc[er_t[ier],'btrial']) + ' error: ' + str(curr_df.loc[er_t[ier],'error']) + ' congruency: ' + str(curr_df.loc[er_t[ier],'congruency']))
                    print('next trial: ' + str(curr_df.loc[er_t[ier+1],'btrial']) + ' error: ' + str(curr_df.loc[er_t[ier+1],'error']) + ' congruency: ' + str(curr_df.loc[er_t[ier +1],'congruency']))
                    if (curr_df.loc[er_t[ier],'congruency'] == 'cong'):
                        print('cong first of two consecuitive trials that are SCSA')
                        print("DO NOT mark the trial before the first error as error (exclude from er_tc)")
                        er_tc[ier] = 'nan'
                    else: #an incong trial:
                        if (curr_df.loc[er_t[ier],'error'] == 2) & (curr_df.loc[er_t[ier+1],'error'] == 1):
                            print('first of two consecuitive trials that are SCSA. errors are 2 then 1')
                            print("Need to mark the trial before the first error as error (SO DO NOT exclude from er_tc)")
                        elif (curr_df.loc[er_t[ier]-1,'cond'] == 'SCSA'):
                            #not 2,1 - depends on the previous trial - if SCSA - no need to exclude
                            print('first of two consecuitive trials that are SCSA. prev is SCSA') #errors are 2 then 1
                            print("DO NOT mark the trial before the first error as error (exclude from er_tc)")
                            er_tc[ier] = 'nan'
                        elif ((curr_df.loc[er_t[ier]-1,'cond'] == 'DCSA') | (curr_df.loc[er_t[ier]-1,'cond']=='DCDA')):
                            print('first of two consecuitive trials that are SCSA. prev is DCSA/DCDA')
                            print("Need to mark the trial before the first error as error (SO DO NOT exclude from er_tc)") 
                        else:
                              print("might be a scenario I didn't check, check code")

        else: #if none of the above, generally do not mark:
            print("DO NOT mark the trial before the first error as error (exclude from er_tc)")
            er_tc[ier] = 'nan'
            
        #if the second of two consecutive trials,which fullfile these conditions, exclude so that we do not change the error from 2 to 1.         
        if ((ier > 0) & ((er_t[ier] - er_t[ier-1])  == 1)): #this is the second of two consecuitive
            if ((curr_df.loc[er_t[ier-1],'cond'] == 'SCSA') & (curr_df.loc[er_t[ier],'cond'] == 'SCSA')):
                print('second of two error trials where the first and second are SCSA')
                print("need to exclude the second trial from er_tc, so that it'll not change the 2 to 1")
                er_tc[ier] = 'nan'

    #if the first trial is an error:
    if ((curr_df.loc[curr_df['btrial'] == 1, 'error'] != 0).bool()):
        print('first trial is an error, remove from er_tc so it does not cause problems\n' + 
        'with marking er_tc-1 trials')
        er_tc=er_tc[1:]
        #if the first is not an error, mark as one:
    else:
        #if not already an error, include the first trial in a trash bin
        #by marking the first trial as an error:
        curr_df.loc[curr_df['btrial'] == 1, 'error'] = 1 
        #mark the second as an error - cannot detect a previous state
        curr_df.loc[curr_df['btrial'] == 2, 'error'] = 1 
        
    er_tc=er_tc[~np.isnan(er_tc)].astype(int)
    print(er_tc)
    curr_df.loc[er_tc-1,'error']=1 #if empty - won't do a thing :)
    
    
    #find the shortest time gap - we'll add that to calculate the timing of the missing trials:
    curr_t = np.array(curr_df['tstimon'][1:])
    prev_t = np.array(curr_df['tstimon'][0:-1])
    #min_time=round_down(min(curr_t.astype(np.float)-prev_t.astype(np.float)),2) #used to define it by this - but didn't work well because depends on RT - better taking it from what is pre-difined
    min_time=min_time=round_down(min(curr_df['soa'][1:]),2) #that defines the minimal time to pass - no trial should be shorter than that
    print('min time is: ' + str(min_time))
    
    ### for each state, create onsets file:
    for st in all_states:
        curr_st = curr_df[(curr_df['state'] == st) & (curr_df['error'] == 0)] 
        curr_sub_states = curr_st.state32.unique()
        cat_st = curr_st.cond.unique()
        if len(cat_st) > 1:
            print('WRONG - more than one category of state')
        
        ## mark different reg1/reg2 based on Enter/Exit/Internal:
        
    for st in all_states32:       
        curr_st = curr_df[(curr_df['state32'] == st) & (curr_df['error'] == 0)] 
        onsets = curr_st[['tstimon','rt']]
        onsets['modulation'] = 1
        if chk_md == 1:
            #print how many trials are in the current state: 
            print('#trials in state {st}: {n}'.format(st=st,n=curr_st.shape[0]))
            print(curr_st['btrial'].values)
        else:
            #save file:
            onsetsfile = '{o_dir}/{m}_run{r}_{st}.txt'.format(o_dir=sub_onsets_dir, m=model,r=r,st=st)
            onsets.to_csv(onsetsfile,sep='\t',index=False, header = False)
    
    ### create the trash regressor for the first trial and mistakes:
    curr_st = curr_df[(curr_df['error'] != 0)] 
    onsets = curr_st[['tstimon','rt']]
    
    #change 'tstimon' in curr_df to numeric so I can add and subtract to it:
    curr_df['tstimon'] = pd.to_numeric(curr_df['tstimon'],downcast="float") 
    
    #add the onsets for the missing trials:
    prev_time=[]
    duration=[]
    
    for ier in range(len(er_t)):
        curr_er = er_t[ier]
        print('working on error trial: ' + str(curr_df.loc[curr_er,'btrial']))
        if ier == (len(er_t)-1): #this is the last error
            #this is the last error, cannot be the first of consecuitive, just add the onsets:
            add_error_onsets(curr_df,curr_er,prev_time,duration,min_time)
        else: #this is not the last error, check if the first of consecuitive: 
            if er_t[ier+1] - curr_er  == 1: #if so: 
                print('the first of two consecuitive errors, time difference is:')
                print(curr_df.loc[er_t[ier+1],'tstimon'] - curr_df.loc[curr_er,'tstimon'])
                print('curr cond is: ' + curr_df.loc[er_t[ier],'cond'] + ' second is: ' + curr_df.loc[er_t[ier+1],'cond'])

                if ((ier > 0) & (curr_er - er_t[ier-1] == 1)): #this might be a middle one, check for middle of three consec errors - treat differently:
                    print('Middle trial in a SEQUENCE OF THREE ERROR, CHECK CODE. marks changes')
                    #if category switch - might be different, raise excpetion:
                    #I now checked it (subj 110) - need to mark previous changes anyway, left her for reference
                    #if ((curr_df.loc[curr_er,'cond'] == 'DCSA') | (curr_df.loc[curr_er,'cond']=='DCDA')):
                    #   raise Exception("Middle trial in a sequence of three errors is a category switch, check code")
                    if ((subj == 108) & (r == 1) & (curr_df.loc[curr_er,'btrial'] != 2)): #if it's the seconf one, need to mark the time before the first trial
                        print('sub 108, begining, no need to mark changes')
                    elif ((subj == 134) & (r == 2) & (curr_df.loc[er_t[ier],'cond'] == 'SCSA')): 
                        print('sub 134, 4 trials, no need to mark changes')
                    else:
                        #middle of three errors, need to mark previous changes:
                        add_error_onsets(curr_df,curr_er,prev_time,duration,min_time)

                else: #ier == 0, so this cannot be the first of three errors, or ier >0, but only the first of two:
                    if ((curr_df.loc[er_t[ier+1],'cond'] == 'DCSA') | (curr_df.loc[er_t[ier+1],'cond']=='DCDA')):
                        print('the first of two consecuitive errors (not middle of three), second is a category switch, mark previous errors')
                        add_error_onsets(curr_df,curr_er,prev_time,duration,min_time)
                        #if  (curr_df.loc[er_t[ier+1],'cond']=='DCDA'):
                        #    print('next error is: ' + curr_df.loc[er_t[ier+1],'cond'] + ' check that indeed needs to mark errors')
                    elif ((curr_df.loc[er_t[ier],'cond'] == 'SCSA') & (curr_df.loc[er_t[ier+1],'cond'] == 'SCSA')):
                        if (curr_df.loc[er_t[ier],'congruency'] == 'cong'):
                            print('cong first of two consecuitive trials that are SCSA')
                            print('NOT marking additional errors')
                        else: #an incong trial:
                            if (curr_df.loc[er_t[ier],'error'] == 2) & (curr_df.loc[er_t[ier]+1,'error'] == 1):
                                print('first of two consecuitive trials that are SCSA. errors are 2 then 1. mark previous errors')
                                add_error_onsets(curr_df,curr_er,prev_time,duration,min_time)
                            elif (curr_df.loc[er_t[ier]-1,'cond'] == 'SCSA'):
                                #not 2,1 - depends on the previous trial - if SCSA - no need to exclude
                                print('first of two consecuitive trials that are SCSA. prev is SCSA') #errors are 2 then 1
                                print('NOT marking additional errors')
                            elif ((curr_df.loc[er_t[ier]-1,'cond'] == 'DCSA') | (curr_df.loc[er_t[ier]-1,'cond']=='DCDA')):
                                print('first of two consecuitive trials that are SCSA. prev is DC. mark previous errors')
                                add_error_onsets(curr_df,curr_er,prev_time,duration,min_time)
                            else:
                                  print("might be a scenario I didn't check, check code")

                    else: #not a special case of two consecuitive errors
                        print('first trial of two consecuitive errors, not a special case')
                        print('NOT marking additional errors')

            else: #if not the first of two consecuitive trials (i.e., the second, or a single one), add onsets:
                add_error_onsets(curr_df,curr_er,prev_time,duration,min_time)


    
    temp_ons = pd.DataFrame({"tstimon": prev_time,'rt': duration})
    onsets = onsets.append(temp_ons)
    #change to numeric so I can sort:
    onsets['tstimon'] = pd.to_numeric(onsets['tstimon'],downcast="float") 
    onsets=onsets.sort_values(by=['tstimon'])
    
    ## check first trial:
    if (onsets.iloc[0,onsets.columns.get_loc('tstimon')] > 10):
        print('*** CHECK FIRST TRIAL, dur is: %.3f ***' % onsets.iloc[0,onsets.columns.get_loc('tstimon')])
    #ls = [type(item) for item in onsets['tstimon']]
    #print(ls)
    
    #format nicely:
    onsets['rt'] = pd.to_numeric(onsets['rt'],downcast="float").map('{:,.3f}'.format)
    onsets['tstimon'] = onsets['tstimon'].map('{:,.3f}'.format)
    
    #if there are three consecuitive errors, hard to say in advance when to mark changes or not
    #becuase it depends on the specific sequence of trials. So I added this to exclude if marked twice:
    onsets=onsets.drop_duplicates()
    
    #add modulation
    onsets['modulation'] = 1
    print(onsets)
    #save to a file
    if chk_md == 0:
        #save file:
        onsetsfile = '{o_dir}/{m}_run{r}_trash.txt'.format(o_dir=sub_onsets_dir, m=model,r=r)
        onsets.to_csv(onsetsfile,sep='\t',index=False, header = False)