### Configuration

In [1]:
from pathlib import Path

import pandas as pd
from datetime import datetime

In [2]:
from _utils import extract

In [3]:
date = datetime.today().strftime('%y%m%d')

In [4]:
verbose = False

# ED-1 & ED-2 Merge
`1.extract_econdec-12_merge`

Combine task-specific datasets from EconDec-1 & EconDec-2 and situate for homogenization & merging with dataset from EconDec-3

In [5]:
source_dir = Path('..') / 'sourcedata'
output_dir = Path('..') / 'derivatives' / '00.allsub'
if not Path.is_dir(output_dir): Path.mkdir(output_dir)

In [6]:
datasets = [d for d in source_dir.glob('ds*')]
datasets

[WindowsPath('../sourcedata/ds1'),
 WindowsPath('../sourcedata/ds2'),
 WindowsPath('../sourcedata/ds3')]

## Extract and concatenate Task-wise data from each dataset

#### Dataset 1:

In [7]:
ds1_main = extract.concat(datasets[0],'main')
ds1_frac = extract.concat(datasets[0],'frac')
ds1_face = extract.concat(datasets[0],'face')

[Errno 2] No such file or directory: '..\\sourcedata\\ds1\\sub-165\\sub-165_task-main_beh.xlsx'
[Errno 2] No such file or directory: '..\\sourcedata\\ds1\\sub-183\\sub-183_task-main_beh.xlsx'
[Errno 2] No such file or directory: '..\\sourcedata\\ds1\\sub-144\\sub-144_task-frac_beh.xlsx'
[Errno 2] No such file or directory: '..\\sourcedata\\ds1\\sub-165\\sub-165_task-frac_beh.xlsx'
[Errno 2] No such file or directory: '..\\sourcedata\\ds1\\sub-183\\sub-183_task-frac_beh.xlsx'
[Errno 2] No such file or directory: '..\\sourcedata\\ds1\\sub-144\\sub-144_task-face_beh.xlsx'
[Errno 2] No such file or directory: '..\\sourcedata\\ds1\\sub-165\\sub-165_task-face_beh.xlsx'
[Errno 2] No such file or directory: '..\\sourcedata\\ds1\\sub-183\\sub-183_task-face_beh.xlsx'


#### Dataset 2:

In [8]:
ds2_main = extract.concat(datasets[1],'main')
ds2_frac = extract.concat(datasets[1],'frac')
ds2_face = extract.concat(datasets[1],'face')

[Errno 2] No such file or directory: '..\\sourcedata\\ds2\\sub-217\\sub-217_task-main_beh.xlsx'
[Errno 2] No such file or directory: '..\\sourcedata\\ds2\\sub-265\\sub-265_task-main_beh.xlsx'
[Errno 2] No such file or directory: '..\\sourcedata\\ds2\\sub-270\\sub-270_task-main_beh.xlsx'
[Errno 2] No such file or directory: '..\\sourcedata\\ds2\\sub-278\\sub-278_task-main_beh.xlsx'
[Errno 2] No such file or directory: '..\\sourcedata\\ds2\\sub-283\\sub-283_task-main_beh.xlsx'
[Errno 2] No such file or directory: '..\\sourcedata\\ds2\\sub-284\\sub-284_task-main_beh.xlsx'
[Errno 2] No such file or directory: '..\\sourcedata\\ds2\\sub-201\\sub-201_task-frac_beh.xlsx'
[Errno 2] No such file or directory: '..\\sourcedata\\ds2\\sub-2010\\sub-2010_task-frac_beh.xlsx'
[Errno 2] No such file or directory: '..\\sourcedata\\ds2\\sub-221\\sub-221_task-frac_beh.xlsx'
[Errno 2] No such file or directory: '..\\sourcedata\\ds2\\sub-253\\sub-253_task-frac_beh.xlsx'
[Errno 2] No such file or directory: '

#### Dataset 3:

In [9]:
ds3 = extract.concat(datasets[2],'all')

ds3_main = ds3[ds3['Phase'] == 'Main Task']
ds3_frac = ds3[ds3['Phase'] == 'Fract']
ds3_face = ds3[ds3['Phase'] == 'Face']

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  dataframe = pd.concat(dataframes_list)


## Output concatenated datasets

In [10]:
ds1_main.to_csv(output_dir / ('econdec-1_task-main_beh_' + date + '.csv'), index=False)
ds1_frac.to_csv(output_dir / ('econdec-1_task-frac_beh_' + date + '.csv'), index=False)
ds1_face.to_csv(output_dir / ('econdec-1_task-face_beh_' + date + '.csv'), index=False)

In [11]:
ds2_main.to_csv(output_dir / ('econdec-2_task-main_beh_' + date + '.csv'), index=False)
ds2_frac.to_csv(output_dir / ('econdec-2_task-frac_beh_' + date + '.csv'), index=False)
ds2_face.to_csv(output_dir / ('econdec-2_task-face_beh_' + date + '.csv'), index=False)

In [12]:
ds3_main.to_csv(output_dir / ('econdec-3_task-main_beh_' + date + '.csv'), index=False)
ds3_frac.to_csv(output_dir / ('econdec-3_task-frac_beh_' + date + '.csv'), index=False)
ds3_face.to_csv(output_dir / ('econdec-3_task-face_beh_' + date + '.csv'), index=False)

# EconDec-3 Conversion

Concatenate converted Eye-study files and situate them for downstream homogenization with Study-1 and Study-2

In [51]:
source_dir = Path('..') / 'sourcedata' / 'ds3'
output_dir = Path('..') / 'derivatives' / '00.allsub'

In [5]:
Frames = []
subs = []
for s in os.listdir(source_dir):
    if s.startswith('sub-'):
        subs.append(s)
        sub_dir = os.path.join(source_dir,s)
        file_path = os.path.join(sub_dir,'RESULTS_FILE.txt')
        if verbose: print('Reading: '+file_path)
        file = pd.read_csv(file_path, delimiter='\t')
        Frames.append(file)

In [6]:
print('[Collected]:',','.join(subs))

[Collected]: sub-300,sub-301,sub-302,sub-303,sub-304,sub-305,sub-306,sub-308,sub-309,sub-310,sub-311,sub-312,sub-313,sub-314,sub-315,sub-316,sub-317,sub-318,sub-319,sub-320,sub-321,sub-322,sub-323,sub-324,sub-325,sub-326,sub-327,sub-328,sub-329,sub-330,sub-331,sub-332,sub-333,sub-334,sub-335,sub-336,sub-337,sub-338,sub-339,sub-340,sub-341,sub-342,sub-343,sub-344,sub-345,sub-346,sub-347,sub-348,sub-349,sub-350,sub-351,sub-352,sub-353,sub-354,sub-355,sub-356,sub-357,sub-358,sub-359,sub-360,sub-361,sub-362,sub-363,sub-364,sub-365,sub-366,sub-367,sub-368,sub-369,sub-370,sub-371,sub-372,sub-373,sub-374


In [7]:
output_frame = pd.concat(Frames)
output_path = os.path.join(output_dir,'econdec-3_task-all_beh_' + date + '.csv')
output_frame.to_csv(output_path, index=False)