In [1]:
import os
import pandas as pd
import numpy as np

In [4]:
derivs_dir = os.path.join('..','derivatives')
scales_dir = os.path.join(derivs_dir,'qualtrics','2.subscaled')
output_dir = os.path.join(derivs_dir,'05.subject-level')

In [5]:
try:os.mkdir(output_dir)
except WindowsError as e: print(e)

# Import data

In [31]:
fname=os.path.join(derivs_dir,'sub-all_task-all_VALUES.xlsx')
behav_data = pd.read_excel(fname)

In [32]:
fname=os.path.join(scales_dir,'all_subscales.csv')
scale_data = pd.read_csv(fname)

In [44]:
behav_data.head()

Unnamed: 0,study,subjnum,block,trial,domain,binary-domain,estimation,trueprob,val-estdiff,val-estdiff-valid,...,facemem-split-123,facemem-median-123,choice-split-12,outcome-split-12,choice-split-123,outcome-split-123,choice-median-12,outcome-median-12,choice-median-123,outcome-median-123
0,1,100,1,1,LOSS,-1,50,0.3,0.2000001,0.2000001,...,-1.0,2,1.0,1.0,1.0,1.0,2.899986,2.378043,2.798163,2.296676
1,1,100,1,2,LOSS,-1,30,0.155172,0.1448277,0.1448277,...,1.0,2,,1.0,,1.0,,,2.798163,2.296676
2,1,100,1,3,LOSS,-1,30,0.3,1.038193e-07,1.038193e-07,...,,2,1.0,1.0,1.0,1.0,,,2.798163,2.296676
3,1,100,1,4,LOSS,-1,40,0.5,-0.1,-0.1,...,,2,1.0,1.0,1.0,1.0,,,2.798163,2.296676
4,1,100,1,5,LOSS,-1,60,0.7,-0.1000001,-0.1000001,...,-1.0,2,-1.0,1.0,-1.0,1.0,,,2.798163,2.296676


In [33]:
scale_data.head()

Unnamed: 0,ssid,ZIP,Relative Sleep,Relative Stress,Financial Difficulty,PAS,NAS,BAS Drive,BAS Fun Seeking,BAS Reward Responsiveness,BIS,Intuitive DMS,Rational DMS,Dependent DMS,Spontaneous DMS,Avoidant DMS,Financial Literacy
0,201,233,-1.0,1.0,1,32.0,17.0,12.0,8.0,20.0,20.0,18.0,22.0,21.0,11.0,16.0,1.0
1,202,34109,0.0,-1.0,0,39.0,10.0,13.0,15.0,20.0,16.0,24.0,25.0,8.0,15.0,9.0,1.0
2,203,33071,-1.0,0.0,0,33.0,13.0,13.0,13.0,20.0,25.0,18.0,20.0,25.0,15.0,23.0,1.0
3,146,32828,0.0,0.0,0,36.0,15.0,12.0,11.0,17.0,21.0,16.0,22.0,17.0,7.0,13.0,0.0
4,205,33027,-1.0,-1.0,1,26.0,25.0,14.0,12.0,19.0,24.0,23.0,23.0,22.0,14.0,14.0,2.0


# Normalize PANAS and BISBAS subscales

In [34]:
keys_to_normalize = [
    'PAS','NAS','BIS','BAS_fs','BAS_rr','BAS_dr','DMS_i','DMS_r','DMS_d','DMS_s','DMS_a',
]

In [35]:
df = scale_data.copy(deep=True)
df.columns=[
    'ssid','zip','sleep','stress','fin_dif','PAS','NAS','BAS_dr','BAS_fs','BAS_rr','BIS',
    'DMS_i','DMS_r','DMS_d','DMS_s','DMS_a','fin_lit'
]

In [36]:
df['study'] = df['ssid'].astype(str).str[0]

In [37]:
def normalize(row):
    study = row['study']
    val = row[key]
    group_mean = df.groupby('study').mean().loc[study,key]
    group_std = df.groupby('study').std().loc[study,key]
    zval = (val - group_mean) / group_std
    return zval

In [38]:
zkeys=[]
for key in keys_to_normalize:
    zkey = 'z_'+ key
    zkeys.append(zkey)
    df[zkey] = df.apply(normalize,axis=1)

In [39]:
df.head()

Unnamed: 0,ssid,zip,sleep,stress,fin_dif,PAS,NAS,BAS_dr,BAS_fs,BAS_rr,...,z_NAS,z_BIS,z_BAS_fs,z_BAS_rr,z_BAS_dr,z_DMS_i,z_DMS_r,z_DMS_d,z_DMS_s,z_DMS_a
0,201,233,-1.0,1.0,1,32.0,17.0,12.0,8.0,20.0,...,0.719199,-0.328343,-1.917315,0.954217,0.208641,-0.033542,0.434326,0.537998,-0.586269,0.494748
1,202,34109,0.0,-1.0,0,39.0,10.0,13.0,15.0,20.0,...,-0.830939,-1.429557,1.24916,0.954217,0.654561,1.653886,1.494209,-2.719881,0.396859,-0.839148
2,203,33071,-1.0,0.0,0,33.0,13.0,13.0,13.0,20.0,...,-0.166594,1.048173,0.344453,0.954217,0.654561,-0.033542,-0.272264,1.540423,0.396859,1.828644
3,146,32828,0.0,0.0,0,36.0,15.0,12.0,11.0,17.0,...,0.778937,0.103462,-0.924831,-0.601536,-0.004402,-0.521983,0.606033,-0.49336,-1.632707,-0.128144
4,205,33027,-1.0,-1.0,1,26.0,25.0,14.0,12.0,19.0,...,2.490786,0.77287,-0.107901,0.316121,1.100481,1.372648,0.78762,0.788605,0.151077,0.113635


In [40]:
columns = zkeys + ['ssid']
norms = df[columns]
norms.head()

Unnamed: 0,z_PAS,z_NAS,z_BIS,z_BAS_fs,z_BAS_rr,z_BAS_dr,z_DMS_i,z_DMS_r,z_DMS_d,z_DMS_s,z_DMS_a,ssid
0,-0.060217,0.719199,-0.328343,-1.917315,0.954217,0.208641,-0.033542,0.434326,0.537998,-0.586269,0.494748,201
1,0.87745,-0.830939,-1.429557,1.24916,0.954217,0.654561,1.653886,1.494209,-2.719881,0.396859,-0.839148,202
2,0.073735,-0.166594,1.048173,0.344453,0.954217,0.654561,-0.033542,-0.272264,1.540423,0.396859,1.828644,203
3,0.64158,0.778937,0.103462,-0.924831,-0.601536,-0.004402,-0.521983,0.606033,-0.49336,-1.632707,-0.128144,146
4,-0.863932,2.490786,0.77287,-0.107901,0.316121,1.100481,1.372648,0.78762,0.788605,0.151077,0.113635,205


# 3sd trial exclusions

In [55]:
behav_data.columns

Index([u'study', u'subjnum', u'block', u'trial', u'domain', u'binary-domain',
       u'estimation', u'trueprob', u'val-estdiff', u'val-estdiff-valid',
       u'val-estdiff-valid-mc-12', u'val-estdiff-valid-mc-123', u'abs-estdiff',
       u'abs-estdiff-valid', u'abs-estdiffvalid-mc-12',
       u'abs-estdiff-valid-mc-123', u'choicert', u'choicert-3sd-12',
       u'choicert-3sd-123', u'choicert-mc-12', u'choicert-mc-123',
       u'esttaskrt', u'esttaskrt-3sd-12', u'esttaskrt-3sd-123', u'outcomert',
       u'outcomert-3sd-12', u'outcomert-3sd-123', u'outcomert-mc-12',
       u'outcomert-mc-123', u'stockchosen', u'waschoiceoptimal',
       u'optimalchoicewas', u'stockvalue', u'abs-stockvalue',
       u'bin-abs-stockvalue', u'bin-abs-stockvalue-mc-12',
       u'bin-abs-stockvalue-mc-123', u'b4choiceprob', u'stockpic', u'bondpic',
       u'facepic', u'bondmem', u'stockmem', u'facemem',
       u'facemem-clean-reverse', u'facemem-mc-123', u'facemem-split-123',
       u'facemem-median-123', u'ch

In [59]:
df=behav_data[['subjnum','block','trial','domain','estimation','trueprob','val-estdiff-valid']].rename(
    columns={'val-estdiff-valid':'val_estdiff_valid'}
)

In [60]:
subj_3sd = df.groupby('subjnum').std()['val_estdiff_valid'] * 3
subj_3sd.head()

subjnum
100    0.548469
101    0.262039
102    0.358415
103    0.620042
104    0.809025
Name: val_estdiff_valid, dtype: float64

In [61]:
subj_means = df.groupby('subjnum').mean()['val_estdiff_valid']
subj_means.head()

subjnum
100    0.046724
101   -0.014427
102    0.014467
103    0.036173
104    0.070082
Name: val_estdiff_valid, dtype: float64

In [62]:
def exclude_3sd(row):
    ssid,val = row['subjnum'],row['val_estdiff_valid']
    mean,bound = subj_means[ssid],subj_3sd[ssid]
    diff = abs(val - mean)
    if diff < bound:
        return(val)
    else: return(np.nan)

In [63]:
df['valError_3sd'] = df.apply(exclude_3sd,axis=1)
df.head()

Unnamed: 0,subjnum,block,trial,domain,estimation,trueprob,val_estdiff_valid,valError_3sd
0,100,1,1,LOSS,50,0.3,0.2000001,0.2000001
1,100,1,2,LOSS,30,0.155172,0.1448277,0.1448277
2,100,1,3,LOSS,30,0.3,1.038193e-07,1.038193e-07
3,100,1,4,LOSS,40,0.5,-0.1,-0.1
4,100,1,5,LOSS,60,0.7,-0.1000001,-0.1000001


# gender-judgment trial exclusions

# subject-level means

In [64]:
domain_means = df.groupby(['subjnum','domain']).mean().reset_index()
domain_means.head()

Unnamed: 0,subjnum,domain,block,trial,trueprob,val_estdiff_valid,valError_3sd
0,100,GAIN,6.666667,37.5,0.333548,0.102563,0.102563
1,100,LOSS,6.333333,35.5,0.473004,-0.009115,-0.009115
2,101,GAIN,7.5,42.5,0.552187,0.003091,0.003091
3,101,LOSS,5.5,30.5,0.5,-0.031944,-0.031944
4,102,GAIN,8.0,45.5,0.420043,0.034123,0.034123


In [65]:
gain_ave_val_error = domain_means[domain_means['domain'] == 'GAIN']['valError_3sd']

In [66]:
loss_ave_val_error = domain_means[domain_means['domain'] == 'LOSS']['valError_3sd']

# output

In [69]:
output = pd.DataFrame({
    'ssid':list(subj_means.index),
    'valError':list(subj_means),
    'gainValError':list(gain_ave_val_error),
    'lossValError':list(loss_ave_val_error)
})
output['valWedge'] = abs(output['gainValError'] - output['lossValError'])

In [70]:
output = output.merge(norms)
output.head()

Unnamed: 0,gainValError,lossValError,ssid,valError,valWedge,z_PAS,z_NAS,z_BIS,z_BAS_fs,z_BAS_rr,z_BAS_dr,z_DMS_i,z_DMS_r,z_DMS_d,z_DMS_s,z_DMS_a
0,0.102563,-0.009115,100,0.046724,0.111678,1.197616,-0.617778,1.375766,0.987888,1.074172,-0.004402,1.346013,0.907394,1.091196,1.334255,-1.20788
1,0.003091,-0.031944,101,-0.014427,0.035036,0.502571,-0.966957,-0.914381,0.987888,0.515603,0.796675,0.545443,-1.503493,-0.229267,1.562483,1.167539
2,0.034123,-0.010058,102,0.014467,0.044181,-1.026528,-0.966957,0.612383,0.987888,-0.042967,-0.805478,-1.322553,0.003312,-0.229267,0.421344,-0.991933
3,0.097953,-0.025607,103,0.036173,0.12356,0.780589,0.08058,0.612383,0.987888,1.074172,1.197213,1.612869,1.208755,0.56301,0.649572,0.735644
4,0.133293,0.006871,104,0.070082,0.126422,-0.192474,0.778937,0.103462,1.466068,0.515603,0.396137,0.278587,-1.503493,-0.49336,1.334255,-0.344092


In [72]:
fname=os.path.join(output_dir,'subject-level.csv')
columns = ['ssid','valError','gainValError','lossValError','valWedge'] + zkeys
output[columns].to_csv(fname,index=False)