# Realign & Homogenize ED-1, ED-2, ED-3

`2.realign_homogenize_all`

Realign and merge converted data from ED-3 into the concatenated data from ED-1 and ED-2.

Differentiate, import, and reassociate memory data into the main-task trialwise dataset.

In [1]:
import os
import pandas as pd
from datetime import datetime

from _utils.clean import smooth_columns, eye_cleanup, label_study
from _utils.clean import clean_selection, clean_stockpic, clean_bondpic, clean_stockchosen

In [2]:
from _utils.clean import clean_paths

In [3]:
date = datetime.today().strftime('%y%m%d')

In [4]:
derivs_dir = os.path.join('..','derivatives')
allsub_dir = os.path.join(derivs_dir,'00.allsub')

In [5]:
eye_fpath = os.path.join(allsub_dir,'econdec-3_task-all_beh_' + date + '.csv')
eye_frame = pd.read_csv(eye_fpath)
main_eye_frame = smooth_columns(eye_frame[eye_frame['Phase'] == 'Main Task'])
frac_eye_frame = smooth_columns(eye_frame[eye_frame['Phase'] == 'Fract'])
face_eye_frame = smooth_columns(eye_frame[eye_frame['Phase'] == 'Face'])

In [6]:
main_fpath = os.path.join(allsub_dir,'econdec-12_task-main_beh_' + date + '.csv')
frac_fpath = os.path.join(allsub_dir,'econdec-12_task-frac_beh_' + date + '.csv')
face_fpath = os.path.join(allsub_dir,'econdec-12_task-face_beh_' + date + '.csv')

In [7]:
main_frame = smooth_columns(pd.read_csv(main_fpath))
frac_frame = smooth_columns(pd.read_csv(frac_fpath))
face_frame = smooth_columns(pd.read_csv(face_fpath))

# Exclude bad subjects

In [8]:
from config import exclusions

#### Study-one

In [9]:
main_frame = main_frame[~main_frame['subjnum'].isin(exclusions)]

In [10]:
len(main_frame.subjnum.unique())

189

#### Study-eye

In [11]:
main_eye_frame = main_eye_frame[~main_eye_frame['originalparticipant'].isin(exclusions)]

In [12]:
len(main_eye_frame.originalparticipant.unique())

69

#### Output

In [13]:
exclusions_dir = os.path.join(derivs_dir,'01.exclusions')
if not os.path.isdir(exclusions_dir): os.mkdir(exclusions_dir)

In [14]:
main_frame.to_csv(os.path.join(exclusions_dir,'econdec-12_task-main_beh_'+date+'.csv'))
main_eye_frame.to_csv(os.path.join(exclusions_dir,'econdec-3_task-all_beh_'+date+'.csv'))

# Main task

## ED-1 & ED-2

In [15]:
main_frame.head()

Unnamed: 0,subjnum,agegroup,experimentername,runnum,date,time,trialnum,trialnumbydomdist,domain,magnitude,...,probrt,confidence,confidencest,confidencert,stocknumber,bondnumber,genderjudgment,totalpayout,trueprobgood,estwithinrange?
0,100,1,kf,1,10_12,11:31:01.963000,1,1,LOSS,low,...,6.077591,8,2141471.0,3.022637,16,9,1,-6,0.3,0
1,100,1,kf,1,10_12,11:31:01.963000,2,2,LOSS,low,...,7.294263,8,2141525.0,3.695852,16,9,1,-12,0.155172,0
2,100,1,kf,1,10_12,11:31:01.963000,3,3,LOSS,low,...,7.635041,8,2141546.0,3.121775,16,9,1,-18,0.3,1
3,100,1,kf,1,10_12,11:31:01.963000,4,4,LOSS,low,...,10.879553,7,2141574.0,3.406241,16,9,1,-24,0.5,0
4,100,1,kf,1,10_12,11:31:01.963000,5,5,LOSS,low,...,16.525458,8,2141602.0,4.553061,16,9,1,-26,0.7,0


## Eye study

In [16]:
main_eye_frame = eye_cleanup(main_eye_frame)

In [17]:
main_eye_frame.head()

Unnamed: 0,agegroup,accuracy,bankaccount,bypassed,confidencevalue,date,emotionresponse,estimationvalue,experimenter,facert,...,originaltrialnumber,originaltrialorder,practice,stockfractallocation,stockfractallocationtype,stockimagename,stocktext,stocktextlocation,stockvalue,trueprobability
9,1.0,1.0,-6.0,0.0,8.0,11041300,58.0,70.0,mm,2811.0,...,1.0,1.0,3,"(565, 540)",L,fractal12b.jpg,-$2 or -$10,"(640, 510)",-2.0,0.7
10,1.0,1.0,-12.0,0.0,8.0,11041300,58.0,50.0,mm,3289.0,...,1.0,2.0,3,"(1355, 540)",R,fractal12b.jpg,-$2 or -$10,"(1280, 510)",-10.0,0.5
11,1.0,1.0,-18.0,0.0,7.0,11041300,58.0,30.0,mm,3242.0,...,1.0,3.0,3,"(1355, 540)",R,fractal12b.jpg,-$2 or -$10,"(1280, 510)",-10.0,0.3
12,1.0,1.0,-20.0,0.0,6.0,11041300,58.0,50.0,mm,3904.0,...,1.0,4.0,3,"(1355, 540)",R,fractal12b.jpg,-$2 or -$10,"(1280, 510)",-2.0,0.5
13,1.0,1.0,-30.0,0.0,7.0,11041300,58.0,30.0,mm,1997.0,...,1.0,5.0,3,"(1355, 540)",R,fractal12b.jpg,-$2 or -$10,"(1280, 510)",-10.0,0.3


## Main task columns

### Prime study

In [18]:
main_frame.columns

Index(['subjnum', 'agegroup', 'experimentername', 'runnum', 'date', 'time',
       'trialnum', 'trialnumbydomdist', 'domain', 'magnitude', 'cueonleft',
       'cueonright', 'stockpic', 'bondpic', 'optionchosen', 'fractalchosen',
       'fracst', 'fracrt', 'stockvalue', 'face', 'facest', 'facert',
       'probgood', 'probst', 'probrt', 'confidence', 'confidencest',
       'confidencert', 'stocknumber', 'bondnumber', 'genderjudgment',
       'totalpayout', 'trueprobgood', 'estwithinrange?'],
      dtype='object')

In [19]:
new_main_columns = {'face':'facepic','runnum':'block','trialnum':'trial',
               'fracrt':'choicert','fracst':'choicest',
               'facert':'outcomert','facest':'outcomest',
               'probrt':'esttaskrt','probst':'esttaskst',
               'optionchosen':'stockchosen',
               'probgood':'estimation','trueprobgood':'trueprob'}

In [20]:
main_frame = main_frame.rename(columns=new_main_columns)

In [21]:
new_main_frame = main_frame.drop(['agegroup','experimentername','date','time','trialnumbydomdist',
                                  'choicest','outcomest','esttaskst',
                                  'confidencest','stocknumber','bondnumber','genderjudgment',
                                  'totalpayout','fractalchosen','estwithinrange?','confidencert'], axis=1)

In [22]:
new_main_frame['study'] = new_main_frame.apply(label_study, axis=1)
new_main_frame.columns

Index(['subjnum', 'block', 'trial', 'domain', 'magnitude', 'cueonleft',
       'cueonright', 'stockpic', 'bondpic', 'stockchosen', 'choicert',
       'stockvalue', 'facepic', 'outcomert', 'estimation', 'esttaskrt',
       'confidence', 'trueprob', 'study'],
      dtype='object')

### Eye study

In [23]:
main_eye_frame.columns

Index(['agegroup', 'accuracy', 'bankaccount', 'bypassed', 'confidencevalue',
       'date', 'emotionresponse', 'estimationvalue', 'experimenter', 'facert',
       'facekeypressed', 'paymentaccuracy', 'phase', 'rt', 'selection',
       'showinstruction', 'bondfractallocation', 'bondfractallocationtype',
       'bondimagename', 'bondtext', 'bondtextlocation', 'bondvalue',
       'bubblefile', 'correctfractal', 'correctfractallocation', 'domain',
       'facedomain', 'facefile', 'faceimage', 'facestockvalue', 'fracdomain',
       'fracmagnitude', 'gender', 'incorectfractal',
       'incorrectfractallocation', 'magnitude', 'oldfaceequalstrue',
       'originalblock', 'originalparticipant', 'originalparticipantnumber',
       'originalsubjectnumber', 'originaltrailnumber', 'originaltrialnumber',
       'originaltrialorder', 'practice', 'stockfractallocation',
       'stockfractallocationtype', 'stockimagename', 'stocktext',
       'stocktextlocation', 'stockvalue', 'trueprobability'],
     

In [24]:
new_eye_columns = {
    'originalparticipant':'subjnum',
    'experimenter':'experimentername',
    'stockimagename':'stockpic',
    'bondimagename':'bondpic',
    'originalblock':'block',
    'originaltrialorder':'trial',
    #'stockfractallocationtype':'cueonleft',
    #'bondfractallocationtype':'cueonright',
    'faceimage':'facepic',
    #'selection':'stockchosen',
    'rt':'choicert',
    'estimationvalue':'estimation',
    'trueprobability':'trueprob',
    'accuracy':'genderjudgment',
    'confidencevalue':'confidence',
    'facert':'outcomert'
    }

In [25]:
main_eye_frame = main_eye_frame.rename(columns=new_eye_columns)

In [26]:
new_main_eye_frame = main_eye_frame.drop([
    'practice','bankaccount','bubblefile','bondvalue','stocktext','bondtext',
    'stocktextlocation','bondtextlocation','emotionresponse','bypassed','agegroup','experimentername',
    'date','correctfractallocation','incorrectfractallocation','paymentaccuracy','phase',
    'stockfractallocation','bondfractallocation','showinstruction','gender',
    'correctfractal','incorectfractal','oldfaceequalstrue','facefile','facekeypressed',
    'originalsubjectnumber','originalparticipantnumber','originaltrialnumber','originaltrailnumber',
    'fracdomain','facedomain','fracmagnitude','facestockvalue',
    'genderjudgment'], axis=1)

In [27]:
new_main_eye_frame['study'] = new_main_eye_frame.apply(label_study, axis=1)

In [28]:
for col in ('choicert','outcomert'):
    new_main_eye_frame[col] = new_main_eye_frame[col].astype(float) *.001

### Unified columns

#### final cleaning to put values in the same units, etc...

In [29]:
unified_main_frame = pd.concat([new_main_frame,new_main_eye_frame])

In [30]:
unified_main_frame['stockchosen'] = unified_main_frame.apply(clean_stockchosen, axis=1)
unified_main_frame['bondpic'] = unified_main_frame.apply(clean_bondpic, axis=1)
unified_main_frame['stockpic'] = unified_main_frame.apply(clean_stockpic, axis=1)
len(unified_main_frame)

18570

# Fractal Memory

## Prime study

In [31]:
frac_frame['oldfractal'] = frac_frame.apply(clean_paths, axis=1)

In [32]:
frac_lil_frame = frac_frame[['subjectid','oldfractal','judgment']].sort_values(['subjectid','oldfractal'])

In [33]:
frac_lil_bond_frame = frac_lil_frame.rename(columns={
    'subjectid':'subjnum','oldfractal':'bondpic','judgment':'bondmem'
})

In [34]:
frac_lil_stock_frame = frac_lil_frame.rename(columns={
    'subjectid':'subjnum','oldfractal':'stockpic','judgment':'stockmem'
})

# Eye study

In [35]:
frac_eye_lil_frame = frac_eye_frame[['originalparticipant','correctfractal','selection','correctfractallocation']]
frac_eye_lil_frame['selection'] = frac_eye_lil_frame.apply(clean_selection, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [36]:
frac_eye_lil_bond_frame = frac_eye_lil_frame.rename(columns={
    'originalparticipant':'subjnum',
    'correctfractal':'bondpic',
    'selection':'bondmem'
}).drop(columns='correctfractallocation')

frac_eye_lil_stock_frame = frac_eye_lil_frame.rename(columns={
    'originalparticipant':'subjnum',
    'correctfractal':'stockpic',
    'selection':'stockmem'
}).drop(columns='correctfractallocation')

In [37]:
frac_lil_bond_frame = pd.concat([frac_lil_bond_frame,frac_eye_lil_bond_frame])
frac_lil_stock_frame = pd.concat([frac_lil_stock_frame,frac_eye_lil_stock_frame])

# Face Memory

## Prime study

In [38]:
face_lil_frame=face_frame[['subjectid','face','subjresp']]
face_lil_frame=face_lil_frame.rename(columns={
    'subjectid':'subjnum','face':'facepic','subjresp':'facemem'
})

## Eye study

In [40]:
face_eye_lil_frame=face_eye_frame[
    ['originalparticipant','facefile','selection']
].rename(columns={
    'originalparticipant':'subjnum',
    'facefile':'facepic',
    'selection':'facemem'
})

In [41]:
face_lil_frame=pd.concat([face_lil_frame,face_eye_lil_frame])

#### output

In [42]:
unified_main_frame = unified_main_frame.merge(frac_lil_bond_frame,how='left')
unified_main_frame = unified_main_frame.merge(frac_lil_stock_frame,how='left')
unified_main_frame = unified_main_frame.merge(face_lil_frame,how='left')
# unified_main_frame[['subjnum','stockpic','bondpic','stockmem','bondmem']]

In [43]:
trials=[]
for s in range(len(unified_main_frame.subjnum.unique())):
    for t in range(1,73):
        trials.append(t)

In [44]:
blocks=[]
for s in range(len(unified_main_frame.subjnum.unique())):
    for b in range(1,13):
        for x in range(6):
            blocks.append(b)

# Check size
Final merged DataFrame compared to expected number of blocks & trials:

In [65]:
print(len(blocks))
print(len(trials))
print(len(unified_main_frame))

18576
18576
18570


In [66]:
assert len(blocks) == len(trials)
assert len(trials) == len(unified_main_frame)

AssertionError: 

6 ED3 subjects are missing a trial so the trial and block numbers won't match up if they are added here.

Should move this step to early cleaning immediately after extraction.

In [67]:
counts = unified_main_frame.groupby('subjnum').count()['block']
counts[counts < 72]

subjnum
339.0    71
340.0    71
346.0    71
347.0    71
363.0    71
372.0    71
Name: block, dtype: int64

In [49]:
unified_main_frame['trial'] = pd.Series(trials)
unified_main_frame['block'] = pd.Series(blocks)

# Output

ONly when all data is fully aligned and homogenized.

**ALL** cleaning steps should be done before this point.

In [70]:
homog_dir=os.path.join(derivs_dir,'02.homogenized')
if not os.path.isdir(homog_dir):
    os.mkdir(homog_dir)

In [71]:
fpath = os.path.join(homog_dir,'econdec-full_task-main_beh_' +date+ '.csv')    
unified_main_frame.to_csv(fpath, index=False)

In [72]:
len(unified_main_frame.subjnum.unique())

258

For reference:

```
final_columns=['study','subjnum','trial','block','domain','dom',
               'estimation','trueprob','estdiff','valestdiff','valestdiffvalid',
               'choicert','choicerta3sd','choicerti3sd','choicemed12v3','choicemed123'
               'esttaskrt','esttaskrta3sd','esttaskrti3sd',
               'outcomert','outcomerta3sd','outcomerti3sd','outcomemed12','outcomemed123'
               'stockchosen','waschoiceoptimal','optimalchoiceshouldhavebeen',
               'magnitude','stockvalue','absstockval','b4choiceprobability',
               'stockpic','bondpic','facepic','stockmemresp','bondmemresp',
               'studymedchoice','studysplitchoice','studymedoutcome','studysplitoutcome',
               'primemedchoice','primesplitchoice','primemedoutcome','primesplitoutcome']
               ```