In [6]:
import os
import pandas as pd
import numpy as np


%load_ext rpy2.ipython
%R library(IAT)

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


array(['IAT', 'tools', 'stats', 'graphics', 'grDevices', 'utils',
       'datasets', 'methods', 'base'], 
      dtype='<U9')

In [19]:
os.chdir('/Users/alexmillner/Google Drive/Work/GitHubArchive/addingBIATfunctionality')
from pyiat import *
#os.chdir('/Users/alexmillner/Google Drive/Work/GitHub/pyiat/example')

**Example data from the Death Implicit Association Test**

**Nock, M.K., Park, J.M., Finn, C.T., Deliberto, T.L., Dour, H.J., & Banaji, M.R. (2010). Measuring the suicidal mind: Implicit cognition predicts suicidal behavior. Psychological Science, 21(4), 511–517. https://doi.org/10.1177/0956797610364762**

**pyiat will work with any IAT data.**


## import data

In [8]:
d=pd.read_csv('iat_data.csv',index_col=0)

In [9]:
d.head()

Unnamed: 0,block,condition,trial_word,latency,errors,correct,response,subjnum
0,0,"Death,Life",Dead,809,0,1,Death,1
1,0,"Death,Life",Deceased,852,0,1,Death,1
2,0,"Death,Life",Alive,606,0,1,Life,1
3,0,"Death,Life",Living,420,0,1,Life,1
4,0,"Death,Life",Suicide,1347,0,1,Death,1


In [10]:
#Number of trials per subject
#Note that Subject 1 has too few trials
d.groupby('subjnum').subjnum.count().head()

subjnum
1     40
2    200
3    200
4    200
5    200
Name: subjnum, dtype: int64

In [11]:
#Number of subjects in this data set
d.subjnum.unique()

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21])

In [12]:
#Conditions
d.condition.unique()

array(['Death,Life', 'Not Me,Me', 'Life,Death', 'Life/Not Me,Death/Me',
       'Death/Not Me,Life/Me'], dtype=object)

In [13]:
#Blocks
d.block.unique()

array([0, 1, 2, 3, 4, 5, 6])

In [14]:
#Correct coded as 1, errors coded as 0 in correct column
d.correct.unique()

array([1, 0])

<blockquote> 
Blocks 0,1 & 4 - which contain conditions 'Death,Life', 'Not Me,Me', 'Life,Death' are practice blocks, meaning they do not contain relevant data because they do not contrast the different categories.
</blockquote>
<blockquote> 
Therefore, we will enter blocks 2,3,5,6 and contditions 'Life/Not Me,Death/Me', 'Death/Not Me,Life/Me' into analyze_iat.
</blockquote>
<blockquote> 
We are entering the "correct" column, which contains 1 for correct and 0 for errors. We could enter the "errors" column and then just set the error_or_correct argument to 'error.' 
</blockquote>
<blockquote> 
Finally, we have the option to return the total number and percentage of trials that are removed because they are either too fast (default : 400ms) or too slow (default : 10000ms). This will return the number and percentage across  all subjects and across just subjects that do not receive a flag indicating they had poor performance on some metric. 
</blockquote>

## pyiat

###### Return a weighted d-scores. It will also return all error and too fast/too slow trial information and flags indicating poor performance as well as the number of blocks

In [15]:
d1,fs1=analyze_iat(d,subject='subjnum',rt='latency',condition='condition',correct='correct'\
      ,cond1='Death/Not Me,Life/Me',cond2='Life/Not Me,Death/Me'\
      ,block='block',blocks=[2,3,5,6],fastslow_stats=True)

## output

###### First 14 columns contain the number of trials, - overall, for each condition and for each block - both before and after excluding fast\slow trials


In [16]:
d1.iloc[:,0:14].head()

Unnamed: 0_level_0,overall_num_trls_incl_fastslow_rt,"Death/Not Me,Life/Me_num_trls_incl_fastslow_rt","Life/Not Me,Death/Me_num_trls_incl_fastslow_rt","Death/Not Me,Life/Me_bl1_num_trls_incl_fastslow_rt","Life/Not Me,Death/Me_bl1_num_trls_incl_fastslow_rt","Death/Not Me,Life/Me_bl2_num_trls_incl_fastslow_rt","Life/Not Me,Death/Me_bl2_num_trls_incl_fastslow_rt",overall_num_trls_excl_fastslow_rt,"Death/Not Me,Life/Me_num_trls_excl_fastslow_rt","Life/Not Me,Death/Me_num_trls_excl_fastslow_rt","Death/Not Me,Life/Me_bl1_num_trls_excl_fastslow_rt","Life/Not Me,Death/Me_bl1_num_trls_excl_fastslow_rt","Death/Not Me,Life/Me_bl2_num_trls_excl_fastslow_rt","Life/Not Me,Death/Me_bl2_num_trls_excl_fastslow_rt"
subjnum,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2,120,60,60,20,40,20,40,119,59,60,20,39,20,40
3,120,60,60,20,40,20,40,120,60,60,20,40,20,40
4,120,60,60,20,40,20,40,118,58,60,19,39,20,40
5,120,60,60,20,40,20,40,120,60,60,20,40,20,40
6,120,60,60,20,40,20,40,119,59,60,20,39,20,40


###### Next 7 columns contain error the number of trials - overall, within each condition and within each block
###### Error rates are calculated prior to excluding fast\slow trials but there is an option - errors_after_fastslow_rmvd - that if set to True will remove fast/slow trials prior to calculating error rates

In [17]:
d1.iloc[:,14:21].head()

Unnamed: 0_level_0,overall_error_rate,"Death/Not Me,Life/Me_error_rate","Life/Not Me,Death/Me_error_rate","Death/Not Me,Life/Me_bl1_error_rate","Life/Not Me,Death/Me_bl1_error_rate","Death/Not Me,Life/Me_bl2_error_rate","Life/Not Me,Death/Me_bl2_error_rate"
subjnum,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2,0.083333,0.083333,0.083333,0.1,0.075,0.05,0.1
3,0.058333,0.033333,0.083333,0.05,0.025,0.05,0.1
4,0.041667,0.05,0.033333,0.0,0.075,0.0,0.05
5,0.125,0.066667,0.183333,0.05,0.075,0.15,0.2
6,0.15,0.1,0.2,0.05,0.125,0.2,0.2


###### Next 7 columns contain pct of too fast trials - overall, within each condition and within each block

In [14]:
d1.iloc[:,21:28].head()

Unnamed: 0_level_0,overall_fast_rt_rate_400ms,"Death/Not Me,Life/Me_fast_rt_rate_400ms","Life/Not Me,Death/Me_fast_rt_rate_400ms","Death/Not Me,Life/Me_bl1_fast_rt_rate_400ms","Death/Not Me,Life/Me_bl2_fast_rt_rate_400ms","Life/Not Me,Death/Me_bl1_fast_rt_rate_400ms","Life/Not Me,Death/Me_bl2_fast_rt_rate_400ms"
subjnum,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2,0.008333,0.016667,0.0,0.0,0.025,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.016667,0.033333,0.0,0.05,0.025,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.008333,0.016667,0.0,0.0,0.025,0.0,0.0


###### Next 7 columns contain pct of too slow trials - overall, within each condition and within each block

In [15]:
d1.iloc[:,28:35].head()

Unnamed: 0_level_0,overall_slow_rt_rate_10000ms,"Death/Not Me,Life/Me_slow_rt_rate_10000ms","Life/Not Me,Death/Me_slow_rt_rate_10000ms","Death/Not Me,Life/Me_bl1_slow_rt_rate_10000ms","Death/Not Me,Life/Me_bl2_slow_rt_rate_10000ms","Life/Not Me,Death/Me_bl1_slow_rt_rate_10000ms","Life/Not Me,Death/Me_bl2_slow_rt_rate_10000ms"
subjnum,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0


###### Column 35 contains the number of blocks

In [16]:
d1.iloc[:,35].to_frame().head()

Unnamed: 0_level_0,num_blocks
subjnum,Unnamed: 1_level_1
2,4
3,4
4,4
5,4
6,4


###### Next 22 columns contain whether a poor performance criterion\cutoff was flagged - across error rates, too fast rates, too slow rates, and number of blocks

In [17]:
d1.iloc[:,36:58].head()

Unnamed: 0_level_0,overall_error_rate_flag,"Death/Not Me,Life/Me_error_rate_flag","Life/Not Me,Death/Me_error_rate_flag","Death/Not Me,Life/Me_bl1_error_rate_flag","Death/Not Me,Life/Me_bl2_error_rate_flag","Life/Not Me,Death/Me_bl1_error_rate_flag","Life/Not Me,Death/Me_bl2_error_rate_flag",overall_fast_rt_rate_400ms_flag,"Death/Not Me,Life/Me_fast_rt_rate_400ms_flag","Life/Not Me,Death/Me_fast_rt_rate_400ms_flag",...,"Life/Not Me,Death/Me_bl1_fast_rt_rate_400ms_flag","Life/Not Me,Death/Me_bl2_fast_rt_rate_400ms_flag",overall_slow_rt_rate_10000ms_flag,"Death/Not Me,Life/Me_slow_rt_rate_10000ms_flag","Life/Not Me,Death/Me_slow_rt_rate_10000ms_flag","Death/Not Me,Life/Me_bl1_slow_rt_rate_10000ms_flag","Death/Not Me,Life/Me_bl2_slow_rt_rate_10000ms_flag","Life/Not Me,Death/Me_bl1_slow_rt_rate_10000ms_flag","Life/Not Me,Death/Me_bl2_slow_rt_rate_10000ms_flag",num_blocks_flag
subjnum,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


###### Column 58 contains a 1 if subject passed any poor performance crierion\cuttoff

In [18]:
d1.iloc[:,58].to_frame().head()

Unnamed: 0_level_0,iat_flag
subjnum,Unnamed: 1_level_1
2,0
3,0
4,0
5,0
6,0


###### Columns 59-62 contain D scores for early and late trials and a final overall weighted D score

In [11]:
d1.iloc[:,59:62].head()

Unnamed: 0_level_0,dscore1,dscore2,dscore
subjnum,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,-0.664655,0.055078,-0.304788
3,0.535351,-0.305441,0.114955
4,-0.074985,0.05201,-0.011487
5,0.36188,0.03209,0.196985
6,-0.035555,-0.212647,-0.124101


#### Compare D scores with R package "iat"
##### https://cran.r-project.org/web/packages/IAT/

In [12]:
#Prepare data to enter into r package - need to have blocks be a string and need to divide data into 2 separate
#dataframes for people that received "Death,Me" first and for those that received "Life,Me" first. 
d['block_str']=d.block.astype(str)

d1_r_subn=d[(d.condition=='Death/Not Me,Life/Me')&(d.block>4)].subjnum.unique()
d1_r=d[d.subjnum.isin(d1_r_subn)]
d2_r_subn=d[(d.condition=='Life/Not Me,Death/Me')&(d.block>4)].subjnum.unique()
d2_r=d[d.subjnum.isin(d2_r_subn)]

%R -i d1_r
%R -i d2_r

In [13]:
%%R
dscore_first <- cleanIAT(my_data = d1_r,
                         block_name = "block_str",
                         trial_blocks = c("2","3", "5", "6"),
                         session_id = "subjnum",
                         trial_latency = "latency",
                         trial_error = "errors",
                         v_error = 1, v_extreme = 2, v_std = 1)

dscore_second <- cleanIAT(my_data = d2_r,
                         block_name = "block_str",
                         trial_blocks = c("2","3", "5", "6"),
                         session_id = "subjnum",
                         trial_latency = "latency",
                         trial_error = "errors",
                         v_error = 1, v_extreme = 2, v_std = 1)

r_dsc <- rbind(dscore_first, dscore_second)

In [14]:
%R -o dscore_first
%R -o dscore_second
#Then we need to combine the separate dataframes
#One of these the scores are flipped so need to flip back
dscore_second.IAT=dscore_second.IAT*-1
iat_r_dsc=pd.concat([dscore_first,dscore_second])
iat_r_dsc.index=iat_r_dsc.subjnum
iat_r_dsc=iat_r_dsc.sort_index()

In [15]:
py_r_iat=pd.concat([d1.dscore,iat_r_dsc.IAT],axis=1)
py_r_iat.head()

Unnamed: 0_level_0,dscore,IAT
subjnum,Unnamed: 1_level_1,Unnamed: 2_level_1
2,-0.304788,-0.304788
3,0.114955,0.114955
4,-0.011487,-0.011487
5,0.196985,0.196985
6,-0.124101,-0.1241


In [16]:
#Correlation between pyiat (dscore) and R package (IAT) = 1
py_r_iat.corr()

Unnamed: 0,dscore,IAT
dscore,1.0,1.0
IAT,1.0,1.0


### In the pyiat command above, we entered an argument to return fast-slow stats
### This returns total perecentage of too fast and too slow trials across all subjects and only unflagged, presumably included subjects

In [23]:
fs1

Unnamed: 0,fast_slow_rt
fast_rt_count_all_subs,28.0
fast_rt_pct_all_subs,0.012281
slow_rt_count_all_subs,0.0
slow_rt_pct_all_subs,0.0
fast_rt_count_included_subs,26.0
fast_rt_pct_included_subs,0.012037
slow_rt_count_included_subs,0.0
slow_rt_pct_included_subs,0.0


## Other options

### D scores for each stimulus (i.e. each word)
###### Requires each_stim=True and name of the column containing the stimuli in the stimulus column

In [24]:
d2,fs2=analyze_iat(d,subject='subjnum',rt='latency',condition='condition',correct='correct'\
      ,cond1='Death/Not Me,Life/Me',cond2='Life/Not Me,Death/Me'\
      ,block='block',blocks=[2,3,5,6],fastslow_stats=True,each_stim=True,stimulus='trial_word')

###### D scores for each word as well as all error and fast\slow trial output

In [25]:
d2.iloc[:,59:].head()

Unnamed: 0_level_0,Alive,Breathing,Dead,Deceased,Die,I,Living,Mine,Myself,Other,Self,Suicide,Their,Them,They,Thrive
subjnum,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2,-0.98536,0.888583,-0.928888,0.849257,-0.583566,0.095183,0.912821,-0.271138,-0.582636,-0.656637,-0.915718,1.180729,-0.260372,0.409289,0.346441,-0.886016
3,-0.228326,-1.09368,0.54677,-0.725671,-0.674954,0.234189,0.124746,0.601094,0.263791,0.626292,0.204474,1.102111,-0.066274,0.106796,0.528139,0.376461
4,1.287682,1.170938,-0.084933,0.705147,0.084087,-1.290731,-0.473819,,-1.231712,-0.134547,-0.027644,0.269901,0.797983,-0.61115,1.367861,0.225041
5,0.670642,-1.481677,-0.13138,0.218148,-0.08534,-0.711548,0.285979,0.043037,-0.705098,-0.186102,1.121443,1.547784,0.703224,-0.342463,-1.156475,-0.176667
6,0.331365,0.955485,1.175333,-1.099263,0.200367,0.176193,-1.65392,0.317128,-1.264377,-0.748313,-0.923371,1.169169,-1.157834,0.025647,-0.730981,1.373918


### Unweighted D scores

In [26]:
d3,fs3=analyze_iat(d,subject='subjnum',rt='latency',condition='condition',correct='correct'\
      ,cond1='Death/Not Me,Life/Me',cond2='Life/Not Me,Death/Me'\
      ,block='block',blocks=[2,3,5,6],fastslow_stats=True,weighted=False)

###### This produces less output as it does not report any information on a block basis

In [27]:
d3.iloc[:,24:].head()

Unnamed: 0_level_0,iat_flag,dscore
subjnum,Unnamed: 1_level_1,Unnamed: 2_level_1
2,0,-0.186741
3,0,-0.028275
4,0,0.014719
5,0,0.134814
6,0,-0.169741


### Unweighted D scores for each stimulus

In [20]:
d4,fs4=analyze_iat(d,subject='subjnum',rt='latency',condition='condition',correct='correct'\
      ,cond1='Death/Not Me,Life/Me',cond2='Life/Not Me,Death/Me'\
      ,block='block',blocks=[2,3,5,6],fastslow_stats=True,each_stim=True,stimulus='trial_word',weighted=False)

In [29]:
d4.iloc[:,26:].head()

Unnamed: 0_level_0,Breathing,Dead,Deceased,Die,I,Living,Mine,Myself,Other,Self,Suicide,Their,Them,They,Thrive
subjnum,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2,0.255883,-1.014222,0.509232,-0.390387,1.295332,0.737379,0.339328,-0.741623,-0.449167,-0.959577,0.780971,-0.360803,-0.347474,0.392907,-0.369755
3,-0.737611,0.064561,-1.067423,-0.224777,-0.587997,-0.678621,0.40211,-0.183775,0.587851,0.219246,0.949326,0.718642,1.212748,0.437229,0.442829
4,0.875262,-0.095717,0.501377,-0.825456,-1.028155,0.087735,0.979749,-0.947307,-0.509266,0.937908,-0.645969,0.368724,0.01788,0.718572,-0.223557
5,-1.503671,0.865254,-0.596999,0.793221,-0.866229,-0.337516,0.981948,0.018283,-0.019362,0.67921,1.026762,0.506938,0.47636,-0.821311,-0.582995
6,0.789436,0.954122,-1.141469,0.270374,-0.756038,-1.658917,0.891638,-0.968083,-0.980864,-0.731248,1.212799,-0.793774,-0.172779,-0.712242,1.247653


### There are a few more options, including (1) setting the too fast\too slow threshold, (2) setting the cutoffs for flags, (3) reporting errors and too fast\slow trial counts instead of percentages (4) printing the output to an excel spreadsheet. 

In [20]:
dbt=pd.read_csv('_orig_biat.csv',index_col=0)

In [21]:
dbt.head()

Unnamed: 0,block_number,block_name,block_trial_count,block_pairing_definition,study_name,task_number,task_name,trial_number,trial_name,trial_response,trial_latency,trial_error,session_id
0,1,BLOCK1,20,"(unnamed)/Death,Me/Life",MentalHealth.DeathlifewordBIAT,7,pimh_deathlifeword1biat,0,Suicide,(unnamed)/Death,1010,0,2615984175
1,0,BLOCK0,20,"(unnamed)/Life,Me/Death",MentalHealth.DeathlifewordBIAT,7,pimh_deathlifeword1biat,19,Other,(unnamed)/Life,777,0,2615984175
2,1,BLOCK1,20,"(unnamed)/Death,Me/Life",MentalHealth.DeathlifewordBIAT,7,pimh_deathlifeword1biat,2,Deceased,(unnamed)/Death,1457,0,2615984175
3,1,BLOCK1,20,"(unnamed)/Death,Me/Life",MentalHealth.DeathlifewordBIAT,7,pimh_deathlifeword1biat,3,Alive,Me/Life,1143,0,2615984175
4,1,BLOCK1,20,"(unnamed)/Death,Me/Life",MentalHealth.DeathlifewordBIAT,7,pimh_deathlifeword1biat,4,Alive,Me/Life,588,0,2615984175


In [22]:
db=pd.read_csv('_orig_biat_scored.csv',index_col=0)

biatd1=analyze_iat(biatd,subject='session_id',rt='trial_latency',condition='block_pairing_definition',\
                   correct='trial_error',error_or_correct='error'\
      ,cond1='(unnamed)/Death,Me/Life',cond2='(unnamed)/Life,Me/Death'\
      ,block='block_number',blocks=[0, 1, 2, 3,4,5],biat=True)

In [23]:
biatd1=analyze_iat(dbt,subject='session_id',rt='trial_latency',condition='block_pairing_definition',\
                   correct='trial_error',error_or_correct='error'\
      ,cond2='(unnamed)/Death,Me/Life',cond1='(unnamed)/Life,Me/Death'\
      ,block='block_number',blocks=[0, 1, 2, 3,4,5],biat=True,rmv_1st_4trls=True,trl_num='trial_number',\
       each_stim=True,stimulus='trial_name')

In [24]:
biatd1

Unnamed: 0_level_0,overall_num_trls_incl_fastslow_rt,"(unnamed)/Life,Me/Death_num_trls_incl_fastslow_rt","(unnamed)/Death,Me/Life_num_trls_incl_fastslow_rt","(unnamed)/Life,Me/Death_bl1_num_trls_incl_fastslow_rt","(unnamed)/Death,Me/Life_bl1_num_trls_incl_fastslow_rt","(unnamed)/Life,Me/Death_bl2_num_trls_incl_fastslow_rt","(unnamed)/Death,Me/Life_bl2_num_trls_incl_fastslow_rt","(unnamed)/Life,Me/Death_bl3_num_trls_incl_fastslow_rt","(unnamed)/Death,Me/Life_bl3_num_trls_incl_fastslow_rt",overall_num_trls_excl_fastslow_rt,...,Living,Mine,Myself,Other,Self,Suicide,Their,Them,They,Thrive
session_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2615984175,32,16.0,16.0,16.0,16.0,,,,,32,...,,,,,,,,,,
2615984709,96,48.0,48.0,16.0,16.0,16.0,16.0,16.0,16.0,96,...,2.121320,2.121320,0.707107,-0.707107,0.707107,0.707107,-2.121320,2.121320,-0.707107,0.707107
2615989949,96,48.0,48.0,16.0,16.0,16.0,16.0,16.0,16.0,96,...,2.121320,2.121320,2.121320,0.707107,2.121320,2.121320,,2.121320,,2.121320
2615990766,96,48.0,48.0,16.0,16.0,16.0,16.0,16.0,16.0,96,...,0.707107,0.707107,2.121320,2.121320,0.707107,-2.121320,-2.121320,2.121320,0.707107,
2615998161,80,48.0,32.0,16.0,16.0,16.0,16.0,16.0,,80,...,,,,,,,,,,
2615999629,96,48.0,48.0,16.0,16.0,16.0,16.0,16.0,16.0,96,...,0.707107,0.707107,2.121320,2.121320,0.707107,-0.707107,-0.707107,-0.707107,2.121320,-0.707107
2615999713,96,48.0,48.0,16.0,16.0,16.0,16.0,16.0,16.0,96,...,2.121320,0.707107,-0.707107,-0.707107,0.707107,-2.121320,-2.121320,0.707107,-0.707107,0.707107
2616001079,96,48.0,48.0,16.0,16.0,16.0,16.0,16.0,16.0,96,...,2.121320,0.707107,2.121320,-0.707107,-0.707107,0.707107,-2.121320,0.707107,-0.707107,2.121320
2616002366,96,48.0,48.0,16.0,16.0,16.0,16.0,16.0,16.0,96,...,0.707107,0.707107,0.707107,0.707107,0.707107,0.707107,2.121320,2.121320,0.707107,2.121320
2616003079,96,48.0,48.0,16.0,16.0,16.0,16.0,16.0,16.0,96,...,0.707107,2.121320,-0.707107,-0.707107,-0.707107,2.121320,-2.121320,-0.707107,-0.707107,-0.707107


In [7]:
s=['session_id']
s.extend(list(db.filter(like='DS').columns.values))

In [8]:
db[s].head(10)

Unnamed: 0,session_id,DScore1,DScore2,DScore3,DScores
0,2615984175,,,-0.167096,
1,2615984709,0.77158,-0.243694,-0.239998,0.095962
2,2615990766,0.812589,-0.109142,0.336536,0.346661
3,2615998161,0.653263,,1.193078,
4,2615999629,0.461957,1.206912,0.053796,0.574222
5,2616001079,0.06735,0.154879,0.075434,0.099221
6,2616002366,0.588244,0.815263,1.052795,0.818767
7,2616003193,0.761246,0.143279,0.246429,0.383651
8,2616004319,0.102014,0.626668,-0.300729,0.142651
9,2616005031,0.04292,0.229018,1.050477,0.440805


In [9]:
dscore2=biatd1[biatd1.dscore2.notnull()].index

In [10]:
incld=db[db.session_id.isin(dscore2)][db[db.session_id.isin(dscore2)].DScore1.notnull()].session_id

In [11]:
biatd1[biatd1.index.isin(incld)].dscore2.corr(db[db.session_id.isin(incld)].DScore1)

nan

In [12]:
db.index=db.session_id

In [13]:
t=pd.concat([biatd1.dscore2.apply(lambda x: np.round(x,4)),db.DScore1.apply(lambda x: np.round(x,4))],axis=1)

In [14]:
t.corr()

Unnamed: 0,dscore2,DScore1
dscore2,1.0,1.0
DScore1,1.0,1.0


In [15]:
t[((t.dscore2.notnull())&(t.DScore1.notnull()))].corr()

Unnamed: 0,dscore2,DScore1
dscore2,1.0,1.0
DScore1,1.0,1.0


In [65]:
t.head()

Unnamed: 0_level_0,dscore2,DScore1
session_id,Unnamed: 1_level_1,Unnamed: 2_level_1
2615984175,,
2615984709,0.7716,0.7716
2615989949,1.137,1.137
2615990766,0.8126,0.8126
2615998161,0.6533,0.6533


In [102]:
df=dbt[dbt.session_id==2618881218].copy(deep=True)
rt='trial_latency'

In [103]:
df.loc[df[rt]>2000,rt]=2000

In [104]:
df[df[rt]>2000]

Unnamed: 0,block_number,block_name,block_trial_count,block_pairing_definition,study_name,task_number,task_name,trial_number,trial_name,trial_response,trial_latency,trial_error,session_id


In [101]:
df[df[rt]<400]

Unnamed: 0,block_number,block_name,block_trial_count,block_pairing_definition,study_name,task_number,task_name,trial_number,trial_name,trial_response,trial_latency,trial_error,session_id


In [97]:
t[(t.dscore2-t.DScore1)<0]

Unnamed: 0_level_0,dscore2,DScore1
session_id,Unnamed: 1_level_1,Unnamed: 2_level_1
2620609337,0.0008,0.0759
2623749977,-0.0751,0.0689
2626351000,-0.4092,-0.2373
2627162722,0.1348,0.1601


In [70]:
t[(t.dscore2-t.DScore1)>0]

Unnamed: 0_level_0,dscore2,DScore1
session_id,Unnamed: 1_level_1,Unnamed: 2_level_1
2616254705,0.2943,0.1607
2616494962,0.8798,0.8206
2616649449,0.8293,0.7357
2616708273,0.096,0.0119
2618881218,0.566,0.4723
2619198718,0.7345,0.6554
2619640268,0.092,-0.0499
2622104044,0.292,0.1291
2625543757,-0.0551,-0.3296
2626181303,-0.2464,-0.4389


In [18]:
biatd1.filter(like='dsc')

Unnamed: 0_level_0,dscore1,dscore2,dscore3,dscore
session_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2615984175,0.167096,,,
2615984709,0.239998,-0.771580,0.243694,-0.095962
2615989949,-1.051250,-1.137032,-0.755168,-0.981150
2615990766,-0.336536,-0.812589,0.109142,-0.346661
2615998161,-1.193078,-0.653263,,
2615999629,-0.053796,-0.461957,-1.206912,-0.574222
2615999713,0.018027,0.406340,0.252838,0.225735
2616001079,-0.075434,-0.067350,-0.154879,-0.099221
2616002366,-1.052795,-0.588244,-0.815263,-0.818767
2616003079,-0.529759,-0.869903,0.442017,-0.319215


In [4]:
biatd=pd.read_csv('biat.csv',index_col=0)

In [6]:
biatd1

Unnamed: 0_level_0,overall_num_trls_incl_fastslow_rt,"(unnamed)/Death,Me/Life_num_trls_incl_fastslow_rt","(unnamed)/Life,Me/Death_num_trls_incl_fastslow_rt","(unnamed)/Death,Me/Life_bl1_num_trls_incl_fastslow_rt","(unnamed)/Life,Me/Death_bl1_num_trls_incl_fastslow_rt","(unnamed)/Death,Me/Life_bl2_num_trls_incl_fastslow_rt","(unnamed)/Life,Me/Death_bl2_num_trls_incl_fastslow_rt","(unnamed)/Death,Me/Life_bl3_num_trls_incl_fastslow_rt","(unnamed)/Life,Me/Death_bl3_num_trls_incl_fastslow_rt",overall_num_trls_excl_fastslow_rt,...,"(unnamed)/Death,Me/Life_bl2_slow_rt_rate_10000ms_flag","(unnamed)/Life,Me/Death_bl2_slow_rt_rate_10000ms_flag","(unnamed)/Death,Me/Life_bl3_slow_rt_rate_10000ms_flag","(unnamed)/Life,Me/Death_bl3_slow_rt_rate_10000ms_flag",num_blocks_flag,iat_flag,dscore1,dscore2,dscore3,dscore
session_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2615984175,40,20,20,20,20,,,,,39,...,0,0,0,0,1,1,-0.066492,,,
2615984709,120,60,60,20,20,20.0,20.0,20.0,20.0,116,...,0,0,0,0,0,0,-0.205472,0.45848,0.320501,0.19117
2615989949,120,60,60,20,20,20.0,20.0,20.0,20.0,116,...,0,0,0,0,0,1,-0.287541,-0.203893,0.323653,-0.055927
2615990766,120,60,60,20,20,20.0,20.0,20.0,20.0,120,...,0,0,0,0,0,4,0.141317,-0.395249,0.445171,0.063746
2615998161,100,40,60,20,20,20.0,20.0,,20.0,98,...,0,0,0,0,0,0,0.710156,-0.480718,,
2615999629,120,60,60,20,20,20.0,20.0,20.0,20.0,120,...,0,0,0,0,0,0,0.31739,0.500385,0.055265,0.291013
2615999713,120,60,60,20,20,20.0,20.0,20.0,20.0,117,...,0,0,0,0,0,0,-0.302939,-0.047846,-0.803018,-0.384601
2616001079,120,60,60,20,20,20.0,20.0,20.0,20.0,116,...,0,0,0,0,0,0,-0.200965,-0.317912,-0.048262,-0.189046
2616002366,120,60,60,20,20,20.0,20.0,20.0,20.0,116,...,0,0,0,0,0,0,-0.538062,-0.010312,-0.178144,-0.242173
2616003079,120,60,60,20,20,20.0,20.0,20.0,20.0,116,...,0,0,0,0,0,0,-0.194654,0.435749,-0.093619,0.049159


In [14]:
overall_err_cut=.3
cond_err_cut=.4
block_err_cut=.4

In [26]:
cutoffs=[overall_err_cut,cond_err_cut,cond_err_cut]

In [27]:
cutoffs.extend(list(np.repeat(block_err_cut,len(blocks))))

In [28]:
cutoffs

[0.3,
 0.4,
 0.4,
 0.40000000000000002,
 0.40000000000000002,
 0.40000000000000002,
 0.40000000000000002]

In [10]:
biatd1

Unnamed: 0_level_0,overall_num_trls_incl_fastslow_rt,"(unnamed)/Death,Me/Life_num_trls_incl_fastslow_rt","(unnamed)/Life,Me/Death_num_trls_incl_fastslow_rt","(unnamed)/Death,Me/Life_bl1_num_trls_incl_fastslow_rt","(unnamed)/Death,Me/Life_bl2_num_trls_incl_fastslow_rt","(unnamed)/Life,Me/Death_bl1_num_trls_incl_fastslow_rt","(unnamed)/Life,Me/Death_bl2_num_trls_incl_fastslow_rt",overall_num_trls_excl_fastslow_rt,"(unnamed)/Death,Me/Life_num_trls_excl_fastslow_rt","(unnamed)/Life,Me/Death_num_trls_excl_fastslow_rt",...,"(unnamed)/Life,Me/Death_slow_rt_rate_10000ms_flag","(unnamed)/Death,Me/Life_bl1_slow_rt_rate_10000ms_flag","(unnamed)/Death,Me/Life_bl2_slow_rt_rate_10000ms_flag","(unnamed)/Life,Me/Death_bl1_slow_rt_rate_10000ms_flag","(unnamed)/Life,Me/Death_bl2_slow_rt_rate_10000ms_flag",num_blocks_flag,iat_flag,dscore1,dscore2,dscore
session_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2615984175,40,20,20,20,20,,,39,20,19,...,0,0,0,0,0,1,1,-0.066492,,
2615984709,120,60,60,20,20,20.0,20.0,116,59,57,...,0,0,0,0,0,0,0,-0.205472,0.45848,0.126504
2615989949,120,60,60,20,20,20.0,20.0,116,56,60,...,0,0,0,0,0,0,1,-0.287541,-0.203893,-0.245717
2615990766,120,60,60,20,20,20.0,20.0,120,60,60,...,0,0,0,0,0,0,4,0.141317,-0.395249,-0.126966
2615998161,100,40,60,20,20,20.0,20.0,98,39,59,...,0,0,0,0,0,0,0,0.710156,-0.480718,0.114719
2615999629,120,60,60,20,20,20.0,20.0,120,60,60,...,0,0,0,0,0,0,0,0.31739,0.500385,0.408888
2615999713,120,60,60,20,20,20.0,20.0,117,59,58,...,0,0,0,0,0,0,0,-0.302939,-0.047846,-0.175392
2616001079,120,60,60,20,20,20.0,20.0,116,57,59,...,0,0,0,0,0,0,0,-0.200965,-0.317912,-0.259438
2616002366,120,60,60,20,20,20.0,20.0,116,59,57,...,0,0,0,0,0,0,0,-0.538062,-0.010312,-0.274187
2616003079,120,60,60,20,20,20.0,20.0,116,59,57,...,0,0,0,0,0,0,0,-0.194654,0.435749,0.120547


In [45]:
biat

True

In [48]:
df=biatd
correct='trial_error'
subject='session_id'
condition='block_pairing_definition'
block='block_number'
cond1='(unnamed)/Death,Me/Life'
cond2='(unnamed)/Life,Me/Death'
blocks=[0, 1, 2, 3]
include_blocks=True
flag_outformat='pct'
rt='trial_latency'
fast_rt=400
slow_rt=10000
error_or_correct='error'
weighted=True
errors_after_fastslow_rmvd=False
df_fastslow_rts_rmvd=False
biat=True
var='trial_error'

In [54]:
idx=pd.IndexSlice

In [49]:
    outcms=get_error_fastslow_rates(df,correct,subject,condition,block,cond1,cond2,blocks,flag_outformat,include_blocks,\
    rt,fast_rt,slow_rt,error_or_correct,weighted,errors_after_fastslow_rmvd,df_fastslow_rts_rmvd,biat)


In [55]:
    if flag_outformat=='pct':
        all_df=df.groupby(subject)[var].mean()
        ##By condition
        cond1_df=df[(df[condition]==cond1)].groupby(subject)[var].mean()
        cond2_df=df[(df[condition]==cond2)].groupby(subject)[var].mean()
        ##By condition and block
        if include_blocks == True:
            blcnd=df.groupby([subject,condition,block])[var].mean()
    elif flag_outformat=='sum':
        all_df=df.groupby(subject)[var].sum()
        ##By condition
        cond1_df=df[(df[condition]==cond1)].groupby(subject)[var].sum()
        cond2_df=df[(df[condition]==cond2)].groupby(subject)[var].sum()
        ##By condition and block
        if include_blocks == True:
            blcnd=df.groupby([subject,condition,block])[var].sum()
    elif flag_outformat=='count':
        all_df=df.groupby(subject)[var].count()
        ##By condition
        cond1_df=df[(df[condition]==cond1)].groupby(subject)[var].count()
        cond2_df=df[(df[condition]==cond2)].groupby(subject)[var].count()
        ##By condition and block
        if include_blocks == True:
            blcnd=df.groupby([subject,condition,block])[var].count()

In [56]:
    if (include_blocks == True) and (biat==False):
        cond1_bl1=blcnd.loc[idx[:,cond1,[blocks[0],blocks[2]]]]
        cond1_bl2=blcnd.loc[idx[:,cond1,[blocks[1],blocks[3]]]]
        cond2_bl1=blcnd.loc[idx[:,cond2,[blocks[0],blocks[2]]]]
        cond2_bl2=blcnd.loc[idx[:,cond2,[blocks[1],blocks[3]]]]
        #Drop block and condidition levels to subtract means
        for df_tmp in [cond1_bl1,cond1_bl2,cond2_bl1,cond2_bl2]:
            df_tmp.index=df_tmp.index.droplevel([1,2])
        out=pd.concat([all_df,cond1_df,cond2_df,cond1_bl1,cond1_bl2,cond2_bl1,cond2_bl2],axis=1)

    elif (include_blocks == True) and (biat==True):
        if len(blocks)>=2:
            cond1_bl1=blcnd.loc[idx[:,cond1,[blocks[0],blocks[1]]]]
            cond2_bl1=blcnd.loc[idx[:,cond2,[blocks[0],blocks[1]]]]
            for df_tmp in [cond1_bl1,cond2_bl1]:
                df_tmp.index=df_tmp.index.droplevel([1,2])
            out=pd.concat([all_df,cond1_df,cond2_df,cond1_bl1,cond2_bl1],axis=1)
        if len(blocks)>=4:
            cond1_bl2=blcnd.loc[idx[:,cond1,[blocks[2],blocks[3]]]]
            cond2_bl2=blcnd.loc[idx[:,cond2,[blocks[2],blocks[3]]]]
            for df_tmp in [cond1_bl2,cond2_bl2]:
                df_tmp.index=df_tmp.index.droplevel([1,2])
            out=pd.concat([out,cond1_bl2,cond2_bl2],axis=1)
        if len(blocks)==6:
            cond1_bl3=blcnd.loc[idx[:,cond1,[blocks[4],blocks[5]]]]
            cond2_bl3=blcnd.loc[idx[:,cond2,[blocks[4],blocks[5]]]]
            for df_tmp in [cond1_bl3,cond2_bl3]:
                df_tmp.index=df_tmp.index.droplevel([1,2])
            out=pd.concat([out,cond1_bl3,cond2_bl3],axis=1)
    elif include_blocks == False:
        out=pd.concat([all_df,cond1_df,cond2_df],axis=1)

In [57]:
out

session_id
2615984175    0.050000
2615984709    0.058333
2615989949    0.208333
2615990766    0.308333
2615998161    0.080000
2615999629    0.091667
2615999713    0.050000
2616001079    0.191667
2616002366    0.083333
2616003079    0.183333
2616003193    0.066667
2616004319    0.033333
2616005031    0.100000
2616005469    0.091667
2616006197    0.025000
2616006499    0.091667
2616007692    0.116667
2616007970    0.316667
2616008036    0.150000
2616008297    0.175000
2615984175    0.100000
2615984709    0.033333
2615989949    0.100000
2615990766    0.183333
2615998161    0.000000
2615999629    0.066667
2615999713    0.050000
2616001079    0.233333
2616002366    0.050000
2616003079    0.133333
                ...   
2616003079    0.150000
2616003193    0.000000
2616004319    0.050000
2616005031    0.100000
2616005469    0.000000
2616006197    0.050000
2616006499    0.100000
2616007692    0.050000
2616007970    0.200000
2616008036    0.100000
2616008297    0.150000
2615984709    0.100000


In [30]:
pd.concat(outcms,axis=1)

InvalidIndexError: Reindexing only valid with uniquely valued Index objects

In [24]:
biat['correct']=np.abs(1-biat.trial_error)

In [35]:
blcnd=biat.groupby(['session_id','block_pairing_definition','block_number'])['trial_error'].count()

In [41]:

blocks=[0, 1, 2, 3]
cond1='(unnamed)/Death,Me/Life'
cond2='(unnamed)/Life,Me/Death'

In [44]:
        cond1_bl1=blcnd.loc[idx[:,cond1,[blocks[0],blocks[2]]]]
        cond1_bl2=blcnd.loc[idx[:,cond1,[blocks[1],blocks[3]]]]
        cond2_bl1=blcnd.loc[idx[:,cond2,[blocks[0],blocks[2]]]]
        cond2_bl2=blcnd.loc[idx[:,cond2,[blocks[1],blocks[3]]]]

In [45]:
cond2_bl2

session_id  block_pairing_definition  block_number
2615989949  (unnamed)/Life,Me/Death   1               20
                                      3               20
2615999713  (unnamed)/Life,Me/Death   1               20
                                      3               20
2616003079  (unnamed)/Life,Me/Death   1               20
                                      3               20
2616005469  (unnamed)/Life,Me/Death   1               20
                                      3               20
2616007692  (unnamed)/Life,Me/Death   1               20
                                      3               20
Name: trial_error, dtype: int64

In [46]:
        for df_tmp in [cond1_bl1,cond1_bl2,cond2_bl1,cond2_bl2]:
            df_tmp.index=df_tmp.index.droplevel([1,2])


In [47]:
all_df=biat.groupby('session_id')['trial_error'].mean()