In [77]:
import sys
import pandas as pd
import os
import numpy as np
from scipy import stats
import re
import csv
import tabulate
import math as m
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')
import scipy.io

path_data = '/home/juank/repos/corregistro/Analysis_dac/data/input/'
path2save = '/home/juank/Desktop/EJN2022/python/'

In [78]:
fixs = [f for f in os.listdir(path_data) if f.endswith('FIXstruct.csv')]

In [79]:
df = pd.DataFrame()
for f in fixs:
    filename = path_data+f
    data = pd.read_csv(filename) 
    df = df.append(data)

In [80]:
# we exclude J05-J06-J07-J10
df = df[(df['trial_subject']!='J05') & (df['trial_subject']!='J06') & \
                (df['trial_subject']!='J07') & (df['trial_subject']!='J10')]

In [81]:
len(df)

22482

In [82]:
df.columns

Index(['refix', 'pretarget', 'multifix', 'urevent', 'tonset', 'dur_added',
       'rank', 'first_rank', 'stposrel', 'stposabs', 'n', 'x', 'y', 'istarget',
       'isdistractor', 'distractor_cat', 'ditractor_atr', 'trial_targetseen',
       'trial_correct', 'trial_cat', 'trial_resp', 'trial_atr', 'trial_type',
       'trial_cattype', 'trial_number', 'trial_subject', 'trial_bgn_latency'],
      dtype='object')

# General filters:

- fix duration > 0.1
- fix duration < 1
- refix < 2
- correct
- rank > 1

In [83]:
# general conditions to every analysis
df = df[(df['dur_added']>0.1) & (df['dur_added']<1) & (df['refix']<2)  & (df['trial_correct']==1) & (df['rank']>1)]
# df = df[(df['dur_added']>0.1) & (df['dur_added']<1) & (df['refix']<2)  & (df['trial_correct']==1)]
# df = df[(df['dur_added']>0.1) & (df['dur_added']<1) & (df['refix']<2) & (df['rank']>1)]

In [84]:
df.sort_values(by = ['trial_subject', 'trial_number'])

Unnamed: 0,refix,pretarget,multifix,urevent,tonset,dur_added,rank,first_rank,stposrel,stposabs,...,trial_targetseen,trial_correct,trial_cat,trial_resp,trial_atr,trial_type,trial_cattype,trial_number,trial_subject,trial_bgn_latency
1,0,1,0,23205,0.728516,0.210938,2,1,10,37,...,0,1,faces,A,female,VS,I,1,E01,0.201172
2,0,1,0,23206,0.978516,0.283203,3,1,12,46,...,0,1,faces,A,female,VS,I,1,E01,0.201172
3,0,1,0,23207,1.328125,0.119141,4,1,8,29,...,0,1,faces,A,female,VS,I,1,E01,0.201172
4,0,1,1,23208,1.525391,0.125000,5,0,2,4,...,0,1,faces,A,female,VS,I,1,E01,0.201172
5,0,1,0,23210,1.849609,0.187500,6,0,3,6,...,0,1,faces,A,female,VS,I,1,E01,0.201172
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1515,0,0,0,31632,2.455078,0.664062,4,1,8,42,...,1,1,objects,P,small,VS,O,215,J12,898.833984
1516,0,0,0,31634,3.927734,0.166016,5,0,6,34,...,1,1,objects,P,small,VS,O,215,J12,898.833984
1518,0,1,0,31647,0.552734,0.212891,2,1,1,4,...,0,1,faces,P,female,VS,F,216,J12,903.033203
1519,0,1,0,31648,0.783203,0.246094,3,1,2,5,...,0,1,faces,P,female,VS,F,216,J12,903.033203


In [85]:
len(df)

12701

In [86]:
print('Fig. 3: Distractors + Target')
print('EX: %d'%sum(df['trial_type']=='EX'))
print('VS: %d'%(sum(df['trial_type']=='VS')))
print('VS + Distractor: %d'%(sum((df['trial_type']=='VS') & (df['istarget']==0))))
print('VS-P: %d'%sum((df['trial_type']=='VS') & (df['trial_resp']=='P')))
print('VS-A: %d'%sum((df['trial_type']=='VS') & (df['trial_resp']=='A')))
print('EX + VS-P: %d'%(sum(df['trial_type']=='EX') + sum((df['trial_type']=='VS') & (df['trial_resp']=='P'))))
print('VS-P-seen: %d'%sum((df['trial_type']=='VS') & (df['trial_resp']=='P') & (df['trial_targetseen']==1)))
print('EX + VS-P-seen: %d'%(sum(df['trial_type']=='EX') + sum((df['trial_type']=='VS') & (df['trial_resp']=='P') & (df['trial_targetseen']==1))))

Fig. 3: Distractors + Target
EX: 3292
VS: 9409
VS + Distractor: 8653
VS-P: 4689
VS-A: 4720
EX + VS-P: 7981
VS-P-seen: 4057
EX + VS-P-seen: 7349


In [87]:
df.to_csv(path2save+'EJN2022_16subj_GeneralFilters.csv',index=False,encoding='utf-8',columns=df.columns)

# VS(NT) & EX

In [88]:
# check every EX fixations are labeled as distractors
df[df['trial_type']=='EX']['istarget'].unique()

array([0])

In [89]:
# select distractors
df_type = df.copy()
df_type = df_type[df_type['istarget']==0]

In [90]:
len(df_type)

11945

In [91]:
# check if VS fixations selected are distractors
len(df_type[(df_type['trial_type']=='VS') & (df_type['isdistractor']==0)])

0

In [92]:
unique_sub = df_type['trial_subject'].unique()
Nrank = []
for sub in unique_sub:
    subs = df_type[df_type['trial_subject']==sub]
    unique_trial = subs['trial_number'].unique()
    for trial in unique_trial:
        dftr = subs[subs['trial_number']==trial]
        max_rank = max(dftr['rank'])
        min_rank = min(dftr['rank'])
        Nrank.append((np.array(dftr['rank'])-min_rank)/(max_rank-min_rank))
Nrank = np.concatenate(Nrank).ravel().tolist()
df_type['Nrank'] = Nrank
print('NAs in Nrank: %d'%sum(np.isnan(df_type['Nrank'])))

df_type = df_type[~np.isnan(df_type['Nrank'])]

NAs in Nrank: 221


In [93]:
# N_subj_rank
unique_sub = df_type['trial_subject'].unique()
N_subj_rank = []
for sub in unique_sub:
    subs = df_type[df_type['trial_subject']==sub]
    max_rank = max(subs['rank'])
    min_rank = min(subs['rank'])
    N_subj_rank.append((np.array(subs['rank'])-min_rank)/(max_rank-min_rank))
N_subj_rank = np.concatenate(N_subj_rank).ravel().tolist()
df_type['N_subj_rank'] = N_subj_rank
print('NAs in N_subj_rank: %d'%sum(np.isnan(df_type['N_subj_rank'])))

df_type['NC_subj_rank']  = df_type['N_subj_rank']-0.5
df_type['NC_rank']       = df_type['Nrank']-0.5

NAs in N_subj_rank: 0


In [94]:
len(df_type)

11724

## VS(NT) absent & EX

In [19]:
# check every EX fixations are labeled as absent
df_type[df_type['trial_type']=='EX']['trial_resp'].unique()

array(['A'], dtype=object)

In [20]:
df_absent = df_type.copy()
df_absent = df_absent[df_absent['trial_resp']=='A']

# add VSNT_EX and isEX columns
conditions = [(df_absent['trial_type']=='VS'),(df_absent['trial_type']=='EX')]

# create a list of the values we want to assign for each condition
values = [1, 0]
values2 = [0, 1]

# create a new column and use np.select to assign values to it using our lists as arguments
df_absent['VSNT_EX'] = np.select(conditions, values)
df_absent['isEX'] = np.select(conditions, values2)

# checks...
print('Maximum rank: %d'%max(df_absent['rank']))
print('Maximum normalized rank: %d'%max(df_absent['Nrank']))

print('Length = %d'%len(df_absent))

#save
df_absent.to_csv(path2save+'EJN2022_16subj_VSNTabsent_EX_manyNranks.csv',index=False,encoding='utf-8',columns=df_absent.columns)

Maximum rank: 16
Maximum normalized rank: 1
Length = 7950


## VS(NT) present&targetseen & EX

In [21]:
df_type[df_type['trial_type']=='EX']['trial_targetseen'].unique()

array([0])

In [22]:
df_type[df_type['trial_type']=='VS']['trial_targetseen'].unique()

array([1, 0])

In [23]:
df_TS = df_type.copy()

In [24]:
# check VS trials are pre+pos target

df_TS[df_TS['trial_type']=='VS']['pretarget'].unique()

array([1, 0])

In [25]:
# add TS column for VS targetseen
conditions = [((df_TS['trial_type']=='VS') & (df_TS['trial_targetseen']==1)),\
              (df_TS['trial_type']=='EX'),((df_TS['trial_type']=='VS') & (df_TS['trial_targetseen']==0))]

# create a list of the values we want to assign for each condition
values = [1, 0, np.nan]

# create a new column and use np.select to assign values to it using our lists as arguments
df_TS['TS'] = np.select(conditions, values)

# delete TS nan values
print('VS-A o VS-P+unseen targets: %d'%sum(np.isnan(df_TS['TS'])))
df_TS = df_TS[~np.isnan(df_TS['TS'])]

# checks...
print('Maximum rank: %d'%max(df_TS['rank']))
print('Maximum normalized rank: %d'%max(df_TS['Nrank']))

print('Length = %d'%len(df_TS))

# save
df_TS.to_csv(path2save+'EJN2022_16subj_VSNT_EX.csv',index=False,encoding='utf-8',columns=df_TS.columns)

VS-A o VS-P+unseen targets: 5303
Maximum rank: 14
Maximum normalized rank: 1
Length = 6421


In [26]:
df_TS[df_TS['trial_type']=='VS']['trial_targetseen'].unique()

array([1])

### VS(NT) pretarget & EX

In [27]:
df_TS[df_TS['trial_type']=='EX']['pretarget'].unique()

array([1])

In [28]:
df_pre = df_TS.copy()

In [29]:
# add VSNT_EX and isEX columns
conditions = [((df_pre['trial_type']=='VS') & (df_pre['pretarget']==1)),\
              (df_pre['trial_type']=='EX'),((df_pre['trial_type']=='VS') & (df_pre['pretarget']==0))]

# create a list of the values we want to assign for each condition
values = [1, 0, np.nan]
values2 = [0, 1, np.nan]

# create a new column and use np.select to assign values to it using our lists as arguments
df_pre['VSNT_EX'] = np.select(conditions, values)
df_pre['isEX'] = np.select(conditions, values2)

print(len(df_pre))


6421


In [30]:
# check if VS fixations selected are pretargets
df_pre[(df_pre['VSNT_EX']==1)]['pretarget'].unique()

array([1])

In [31]:
# check if nan values for VSNT_EX fixations are posttarget
df_pre[np.isnan(df_pre['VSNT_EX'])]['pretarget'].unique()

array([0])

In [32]:
# delete nan values
df_pre = df_pre[~np.isnan(df_pre['VSNT_EX'])]
print(len(df_pre))

print('Maximum rank: %d'%max(df_pre['rank']))
print('Maximum normalized rank: %d'%max(df_pre['Nrank']))

# save
df_pre.to_csv(path2save+'EJN2022_16subj_VSNTpre_EX.csv',index=False,encoding='utf-8',columns=df_pre.columns)

5578
Maximum rank: 14
Maximum normalized rank: 1


In [33]:
# We have to recalculate because now the maximum corresponds to the target fix and not to the end of the trial
# N_subj_rank
unique_sub = df_pre['trial_subject'].unique()
N_subj_rank = []
for sub in unique_sub:
    subs = df_pre[df_pre['trial_subject']==sub]
    max_rank = max(subs['rank'])
    min_rank = min(subs['rank'])
    N_subj_rank.append((np.array(subs['rank'])-min_rank)/(max_rank-min_rank))
N_subj_rank = np.concatenate(N_subj_rank).ravel().tolist()
df_pre['N_subj_rank'] = N_subj_rank
print('NAs in N_subj_rank: %d'%sum(np.isnan(df_pre['N_subj_rank'])))

df_pre['NC_subj_rank']  = df_pre['N_subj_rank']-0.5
df_pre['NC_rank']       = df_pre['Nrank']-0.5

#save
df_pre.to_csv(path2save+'EJN2022_16subj_VSNTpre_EX_manyNranks.csv',index=False,encoding='utf-8',columns=df_pre.columns)

NAs in N_subj_rank: 0


In [34]:
# check if VS fixations selected are pretargets
df_pre[df_pre['VSNT_EX']==1]['pretarget'].unique()

array([1])

In [35]:
# check if nan values in isEX where avoided
df_pre['isEX'].unique()

array([1., 0.])

In [36]:
# check if nan values in VSNT_EX where avoided
df_pre['VSNT_EX'].unique()

array([0., 1.])

In [37]:
# check if nan values in VSNT_EX where avoided
df_pre[np.isnan(df_pre['VSNT_EX'])]['pretarget'].unique()

array([], dtype=int64)

In [38]:
len(df_pre['trial_subject'].unique())

16

In [39]:
len(df_pre[(df_pre['trial_subject']=='E01')]['trial_number'].unique())

102

In [40]:
df_pre[df_pre['Nrank'].isnull()][['rank','Nrank','trial_number', 'trial_subject']].head(30)

Unnamed: 0,rank,Nrank,trial_number,trial_subject


### Check one participant

In [41]:
subj1 = df_pre[(df_pre['trial_subject']=='E01')]

In [42]:
subj1['refix'].unique()

array([0, 1])

In [43]:
subj1['pretarget'].unique()

array([1])

In [44]:
subj1[subj1['isEX']==1]['pretarget'].unique()

array([1])

In [45]:
subj1['multifix'].unique()

array([0, 1])

In [46]:
subj1['dur_added'].min()

0.1015625

In [47]:
subj1['dur_added'].max()

0.56640625

In [48]:
len(subj1['rank'].unique())

12

In [49]:
len(subj1)

509

In [50]:
subj1['first_rank'].unique()

array([1, 0])

In [51]:
subj1['istarget'].unique()

array([0])

In [52]:
subj1['isdistractor'].unique()

array([1])

In [53]:
subj1['distractor_cat'].unique()

array(['faces', 'objects'], dtype=object)

In [54]:
subj1['ditractor_atr'].unique()

array(['male', 'small', 'female', 'big'], dtype=object)

In [55]:
subj1['trial_targetseen'].unique()

array([1, 0])

In [56]:
subj1[subj1['VSNT_EX']==1]['trial_targetseen'].unique()

array([1])

In [57]:
subj1['trial_correct'].unique()

array([1])

In [58]:
subj1['trial_cat'].unique()

array(['objects', 'faces'], dtype=object)

In [59]:
subj1['trial_resp'].unique()

array(['P', 'A'], dtype=object)

In [60]:
subj1[subj1['VSNT_EX']==1]['trial_resp'].unique()

array(['P'], dtype=object)

In [61]:
subj1['trial_atr'].unique()

array(['big', 'female', 'small', 'male'], dtype=object)

In [62]:
subj1[subj1['VSNT_EX']==1]['trial_type'].unique()

array(['VS'], dtype=object)

In [63]:
subj1['trial_cattype'].unique()

array(['O', 'F', 'I'], dtype=object)

In [64]:
len(subj1['trial_number'].unique())

102

In [65]:
subj1['trial_number'].unique()

array([  2,   3,   4,   5,   9,  11,  13,  15,  16,  18,  21,  22,  23,
        25,  28,  29,  37,  38,  40,  44,  45,  47,  49,  51,  52,  54,
        55,  56,  63,  64,  66,  67,  71,  76,  78,  79,  80,  81,  84,
        85,  86,  87,  88,  94,  96,  98,  99, 101, 102, 103, 104, 112,
       116, 118, 119, 120, 121, 122, 127, 128, 133, 136, 139, 141, 147,
       148, 149, 151, 152, 153, 154, 155, 156, 161, 162, 163, 168, 169,
       170, 171, 173, 178, 180, 181, 184, 186, 187, 188, 189, 190, 191,
       192, 193, 197, 199, 200, 201, 202, 203, 207, 209, 213])

In [66]:
#exploro trials que están acá y no en 16subj_VSNTpostvsEX_for_unfold_GlobalStandarisedCenteredRank.csv

In [67]:
subj1[subj1['trial_number']==23]

Unnamed: 0,refix,pretarget,multifix,urevent,tonset,dur_added,rank,first_rank,stposrel,stposabs,...,trial_number,trial_subject,trial_bgn_latency,Nrank,N_subj_rank,NC_subj_rank,NC_rank,TS,VSNT_EX,isEX
196,0,1,0,23759,0.875,0.201172,2,1,5,25,...,23,E01,92.583984,0.0,0.0,-0.5,-0.5,1.0,1.0,0.0
197,0,1,0,23760,1.140625,0.169922,3,1,3,14,...,23,E01,92.583984,0.2,0.090909,-0.409091,-0.3,1.0,1.0,0.0
198,0,1,0,23761,1.357422,0.140625,4,1,8,31,...,23,E01,92.583984,0.4,0.181818,-0.318182,-0.1,1.0,1.0,0.0
199,0,1,0,23762,1.564453,0.140625,5,0,1,4,...,23,E01,92.583984,0.6,0.272727,-0.227273,0.1,1.0,1.0,0.0
200,0,1,0,23764,2.050781,0.1875,6,0,11,46,...,23,E01,92.583984,0.8,0.363636,-0.136364,0.3,1.0,1.0,0.0
201,0,1,0,23765,2.287109,0.113281,7,0,10,38,...,23,E01,92.583984,1.0,0.454545,-0.045455,0.5,1.0,1.0,0.0


In [68]:
subj1[subj1['trial_number']==52]

Unnamed: 0,refix,pretarget,multifix,urevent,tonset,dur_added,rank,first_rank,stposrel,stposabs,...,trial_number,trial_subject,trial_bgn_latency,Nrank,N_subj_rank,NC_subj_rank,NC_rank,TS,VSNT_EX,isEX
437,0,1,0,24500,0.691406,0.128906,2,1,9,49,...,52,E01,214.361328,0.0,0.0,-0.5,-0.5,1.0,1.0,0.0


In [69]:
subj1[subj1['trial_number']==54]

Unnamed: 0,refix,pretarget,multifix,urevent,tonset,dur_added,rank,first_rank,stposrel,stposabs,...,trial_number,trial_subject,trial_bgn_latency,Nrank,N_subj_rank,NC_subj_rank,NC_rank,TS,VSNT_EX,isEX
455,0,1,0,24560,0.804688,0.248047,2,1,13,57,...,54,E01,222.759766,0.0,0.0,-0.5,-0.5,1.0,1.0,0.0
457,0,1,0,24562,1.324219,0.146484,4,1,3,12,...,54,E01,222.759766,1.0,0.181818,-0.318182,0.5,1.0,1.0,0.0


In [70]:
subj1[subj1['trial_number']==55]

Unnamed: 0,refix,pretarget,multifix,urevent,tonset,dur_added,rank,first_rank,stposrel,stposabs,...,trial_number,trial_subject,trial_bgn_latency,Nrank,N_subj_rank,NC_subj_rank,NC_rank,TS,VSNT_EX,isEX
460,0,1,0,24577,0.539062,0.363281,2,1,13,56,...,55,E01,226.958984,0.0,0.0,-0.5,-0.5,1.0,1.0,0.0


In [71]:
subj1[subj1['trial_number']==86]

Unnamed: 0,refix,pretarget,multifix,urevent,tonset,dur_added,rank,first_rank,stposrel,stposabs,...,trial_number,trial_subject,trial_bgn_latency,Nrank,N_subj_rank,NC_subj_rank,NC_rank,TS,VSNT_EX,isEX
675,0,1,1,25591,0.53125,0.314453,2,1,12,65,...,86,E01,357.134766,0.0,0.0,-0.5,-0.5,1.0,1.0,0.0
676,0,1,0,25594,1.564453,0.294922,3,1,13,67,...,86,E01,357.134766,0.333333,0.090909,-0.409091,-0.166667,1.0,1.0,0.0
677,0,1,0,25595,1.9375,0.140625,4,1,10,60,...,86,E01,357.134766,0.666667,0.181818,-0.318182,0.166667,1.0,1.0,0.0
678,0,1,0,25596,2.152344,0.126953,5,0,4,24,...,86,E01,357.134766,1.0,0.272727,-0.227273,0.5,1.0,1.0,0.0


In [72]:
subj1[subj1['trial_number']==190]

Unnamed: 0,refix,pretarget,multifix,urevent,tonset,dur_added,rank,first_rank,stposrel,stposabs,...,trial_number,trial_subject,trial_bgn_latency,Nrank,N_subj_rank,NC_subj_rank,NC_rank,TS,VSNT_EX,isEX
1408,0,1,0,28532,0.814453,0.140625,2,1,3,10,...,190,E01,793.853516,0.0,0.0,-0.5,-0.5,1.0,1.0,0.0
1409,0,1,0,28533,1.015625,0.25,3,1,8,29,...,190,E01,793.853516,0.166667,0.090909,-0.409091,-0.333333,1.0,1.0,0.0
1410,0,1,1,28534,1.322266,0.316406,4,1,10,39,...,190,E01,793.853516,0.333333,0.181818,-0.318182,-0.166667,1.0,1.0,0.0
1411,0,1,0,28537,2.316406,0.302734,5,0,1,4,...,190,E01,793.853516,0.5,0.272727,-0.227273,0.0,1.0,1.0,0.0
1412,0,1,0,28539,2.890625,0.25,6,0,6,25,...,190,E01,793.853516,0.666667,0.363636,-0.136364,0.166667,1.0,1.0,0.0
1413,0,1,0,28540,3.212891,0.15625,7,0,13,61,...,190,E01,793.853516,0.833333,0.454545,-0.045455,0.333333,1.0,1.0,0.0
1414,0,1,0,28541,3.4375,0.101562,8,0,14,72,...,190,E01,793.853516,1.0,0.545455,0.045455,0.5,1.0,1.0,0.0


In [73]:
subj1[subj1['trial_number']==202]

Unnamed: 0,refix,pretarget,multifix,urevent,tonset,dur_added,rank,first_rank,stposrel,stposabs,...,trial_number,trial_subject,trial_bgn_latency,Nrank,N_subj_rank,NC_subj_rank,NC_rank,TS,VSNT_EX,isEX
1496,0,1,0,28830,0.605469,0.185547,2,1,8,38,...,202,E01,844.244141,0.0,0.0,-0.5,-0.5,1.0,1.0,0.0


In [74]:
subj1[subj1['trial_number']==207]

Unnamed: 0,refix,pretarget,multifix,urevent,tonset,dur_added,rank,first_rank,stposrel,stposabs,...,trial_number,trial_subject,trial_bgn_latency,Nrank,N_subj_rank,NC_subj_rank,NC_rank,TS,VSNT_EX,isEX
1519,0,1,1,28969,1.951172,0.166016,2,1,1,1,...,207,E01,865.240234,0.0,0.0,-0.5,-0.5,1.0,1.0,0.0
1520,0,1,0,28972,2.605469,0.300781,3,1,13,78,...,207,E01,865.240234,0.2,0.090909,-0.409091,-0.3,1.0,1.0,0.0
1522,0,1,0,28974,3.125,0.294922,5,0,14,80,...,207,E01,865.240234,0.6,0.272727,-0.227273,0.1,1.0,1.0,0.0
1523,0,1,0,28975,3.478516,0.109375,6,0,10,62,...,207,E01,865.240234,0.8,0.363636,-0.136364,0.3,1.0,1.0,0.0
1524,0,1,0,28977,3.798828,0.105469,7,0,3,8,...,207,E01,865.240234,1.0,0.454545,-0.045455,0.5,1.0,1.0,0.0


In [75]:
len(subj1)

509

In [76]:
#dforig = pd.read_csv('/home/juank/Desktop/EJN2022/python/16subj_VSNTpostvsEX_for_unfold_GlobalStandarisedCenteredRank.csv')
