# Tasks completed per wave

In [None]:
import os
from pathlib import Path
import pandas as pd
import numpy as np

In [None]:
path = os.getcwd()
parent_dir = Path().resolve().parents[0]
data_dir = os.path.join(parent_dir, 'data', 'task_data')
    

In [None]:
def longString(ser):
    strs=[]
    strCtr=1
    lval=ser.iloc[0]
    for ind in range(1,ser.shape[0]):
        if ser.iloc[ind]==lval:
            strCtr+=1
        else:
            strs.append(strCtr)
            strCtr=1
            lval=ser.iloc[ind]

    if len(strs)==0:
        strs.append(strCtr)
        
    maxLongStr=np.max(strs)
    meanStrLength=np.mean(strs)
    countLongStr=np.sum(np.array(strs)>5)
    
    out={'maxLongStr':maxLongStr,
           'meanLongString':np.round(meanStrLength,2),
           'countLongStr':countLongStr,
        }
           
    
    return out 

In [None]:
# CONSP TASK
consp = pd.read_csv(os.path.join(data_dir, 'CONSP_P_CVDID.csv'))
consp_qual = pd.DataFrame()
consp.CVDID = consp.CVDID.astype('float')

for cvdid in consp.CVDID.unique():
    for w in  consp.loc[consp.CVDID ==cvdid, 'cvd_consp_wave'].unique():
        
        tmp_dat = consp.loc[(consp.CVDID == cvdid)&(consp.cvd_consp_wave == w),:].copy()
        tmp_dat = tmp_dat.dropna(subset='cvd_consp_key_press').reset_index(drop = True)
                    
        sub_dict = {}
        sub_dict['CVDID'] = cvdid
        sub_dict['wave'] = w
        
        # response button press variance 
        consp_long_string = longString(tmp_dat.cvd_consp_key_press)
        sub_dict['cvd_consp_meanLongString'] = consp_long_string['maxLongStr']        
                
        # reaction time
        sub_dict['cvd_consp_rt_pctlt_300']=np.sum(tmp_dat.cvd_consp_rt<300)/tmp_dat.shape[0]
        
        sub_dict['cvd_consp_administered'] = True
        consp_qual = pd.concat([consp_qual,pd.DataFrame.from_dict(sub_dict, orient = 'index').T])

del consp
consp_qual

In [None]:
#TRUST TASK
tr1 = pd.read_csv(os.path.join(data_dir, 'TR1_P_CVDID.csv'))
tr1_qual = pd.DataFrame()
tr1.CVDID = tr1.CVDID.astype('float')

for cvdid in tr1.CVDID.unique():
    for w in  tr1.loc[tr1.CVDID ==cvdid, 'tr_1s_wave'].unique():
        
        tmp_dat = tr1.loc[(tr1.CVDID == cvdid)&(tr1.tr_1s_wave == w),:].copy().reset_index(drop = True)
                    
        sub_dict = {}
        sub_dict['CVDID'] = cvdid
        sub_dict['wave'] = w
        
        # response button press variance 
        tr1_long_string = longString(tmp_dat.tr_1s_key_press)
        sub_dict['tr_1s_meanLongString'] = tr1_long_string['maxLongStr']        
        if tmp_dat.tr_1s_key_press.var() == 0:
             sub_dict['tr_1s_noVar'] = 1
        else:
             sub_dict['tr_1s_noVar'] = 0  
                
        # reaction time/ task duration
        sub_dict['tr_1s_totalTime'] = tmp_dat.tr_1s_totalTime[0]
        sub_dict['tr_1s_medianRT'] = tmp_dat.tr_1s_rt.median()
        sub_dict['tr_1s_rt_pctlt_300']=np.sum(tmp_dat.tr_1s_rt<300)/tmp_dat.shape[0]
        
        sub_dict['tr_1s_administered'] = True
        tr1_qual = pd.concat([tr1_qual,pd.DataFrame.from_dict(sub_dict, orient = 'index').T])
        
del tr1
tr1_qual

In [None]:
iat = pd.read_csv(os.path.join(data_dir, 'IAT_P_CVDID.csv'))
iat_qual = pd.DataFrame()
iat.CVDID = iat.CVDID.astype('float')


for cvdid in iat.CVDID.unique():
    for w in  iat.loc[iat.CVDID ==cvdid, 'iat_wave'].unique():
        
        tmp_dat = iat.loc[(iat.CVDID == cvdid)&(iat.iat_wave == w),:].copy().reset_index(drop = True)
        
        sub_dict = {}
        sub_dict['CVDID'] = cvdid
        sub_dict['wave'] = w
        
        # reaction time/ task duration
        sub_dict['iat_include'] = (~np.isnan(tmp_dat.iat_iatd[0])).astype(int)
        sub_dict['iat_totalTime'] = tmp_dat.iat_totalTime[0]
        sub_dict['iat_administered'] = True
        iat_qual = pd.concat([iat_qual,pd.DataFrame.from_dict(sub_dict, orient = 'index').T])

del iat
iat_qual

In [None]:
amp = pd.read_csv(os.path.join(data_dir, 'AMP_P_CVDID.csv'))
amp_qual = pd.DataFrame()
amp.CVDID = amp.CVDID.astype('float')


for cvdid in amp.CVDID.unique():
    for w in  amp.loc[amp.CVDID ==cvdid, 'amp_wave'].unique():
        tmp_dat = amp.loc[(amp.CVDID == cvdid)&(amp.amp_wave == w),:].copy().reset_index(drop = True)

        sub_dict = {}
        sub_dict['CVDID'] = cvdid
        sub_dict['wave'] = w
        
        # response button press variance 
        amp_long_string = longString(tmp_dat.amp_key_press)
        sub_dict['amp_meanLongString'] = amp_long_string['maxLongStr']        

        # reaction time/ task duration
        sub_dict['amp_totalTime'] = tmp_dat.amp_totalTime[0]
        sub_dict['amp_medianRT'] = tmp_dat.amp_rt.median()
        sub_dict['amp_pct_bad_rts']= 1 - tmp_dat.amp_prct_usable_trials[0]
        
        sub_dict['amp_administered'] = True
        amp_qual = pd.concat([amp_qual,pd.DataFrame.from_dict(sub_dict, orient = 'index').T])

del amp
amp_qual

In [None]:
altt = pd.read_csv(os.path.join(data_dir, 'ATT_P_CVDID.csv'))
altt_qual = pd.DataFrame()
altt.CVDID = altt.CVDID.astype('float')

for cvdid in altt.CVDID.unique():
    for w in  altt.loc[altt.CVDID ==cvdid, 'cvd_altt_wave'].unique():
        tmp_dat = altt.loc[(altt.CVDID == cvdid)&(altt.cvd_altt_wave == w),:].copy().reset_index(drop = True)

        sub_dict = {}
        sub_dict['CVDID'] = cvdid
        sub_dict['wave'] = w
        
        # response button press variance 
        altt_long_string = longString(tmp_dat.cvd_altt_key_press)
        sub_dict['altt_meanLongString'] = altt_long_string['maxLongStr']        

        # reaction time/ task duration
        sub_dict['altt_totalTime'] = tmp_dat.cvd_altt_totalTime[0]
        sub_dict['altt_medianRT'] = tmp_dat.cvd_altt_rt.median()
        sub_dict['altt_rt_pctlt_300']=np.sum(tmp_dat.cvd_altt_rt<300)/tmp_dat.shape[0]

        
        sub_dict['altt_administered'] = True
        altt_qual = pd.concat([altt_qual,pd.DataFrame.from_dict(sub_dict, orient = 'index').T])
        

del altt
altt_qual

In [None]:
biat_raw = pd.read_csv(os.path.join(data_dir, 'BIAT_P_CVDID.csv'))
biat_summary = pd.read_csv(os.path.join(data_dir, 'BIAT_summary_P_CVDID.csv'))
biat_qual = pd.DataFrame()
biat_raw.CVDID = biat_raw.CVDID.astype('float')
biat_summary.CVDID = biat_summary.CVDID.astype('float')

for cvdid in biat_raw.CVDID.unique():
    for w in  biat_raw.loc[biat_raw.CVDID ==cvdid, 'biat_wave'].unique():
        tmp_dat = biat_raw.loc[(biat_raw.CVDID == cvdid)&(biat_raw.biat_wave == w),:].copy().reset_index(drop = True)

        sub_dict = {}
        sub_dict['CVDID'] = cvdid
        sub_dict['wave'] = w
        
        # reaction time/ task duration
        sub_dict['biat_totalTime'] = tmp_dat.biat_totalTime[0]
        sub_dict['biat_include'] =biat_summary.loc[(biat_summary.CVDID ==cvdid) & (biat_summary.biat_wave ==w), 'biat_include'].values[0]
        sub_dict['biat_administered'] = True
        biat_qual = pd.concat([biat_qual,pd.DataFrame.from_dict(sub_dict, orient = 'index').T])
        
del biat_raw, biat_summary

biat_qual

In [None]:
pgg = pd.read_csv(os.path.join(data_dir, 'PGG_P_CVDID.csv'))
pgg['pgg_administered'] = True
pgg = pgg.rename(columns = {'pgg_wave': 'wave'})
pgg_qual = pgg[['CVDID', 'wave', 'pgg_administered']].copy()
pgg_qual

In [None]:
task_qual = altt_qual.merge(tr1_qual, on=['CVDID','wave'], how = 'outer').reset_index(drop = True)
task_qual = task_qual.merge(amp_qual, on=['CVDID','wave'], how = 'outer').reset_index(drop = True)
task_qual = task_qual.merge(iat_qual, on=['CVDID','wave'], how = 'outer').reset_index(drop = True)                             
task_qual = task_qual.merge(biat_qual, on=['CVDID','wave'], how = 'outer').reset_index(drop = True)                             
task_qual = task_qual.merge(consp_qual, on=['CVDID','wave'], how = 'outer').reset_index(drop = True)                             
task_qual = task_qual.merge(pgg_qual, on=['CVDID','wave'], how = 'outer').reset_index(drop = True)            
task_qual.to_csv(os.path.join(data_dir, 'task_qual.csv'), index = False)