In [1]:
import os
import re
import pandas as pd
import numpy as np


In [2]:
hcpd_dir = '/ncf/hcp/data/HCD-tfMRI-MultiRunFix'
scan_re = re.compile(r'tfMRI_(?:CARIT|GUESSING)_(?:AP|PA)')
PID_re = re.compile(r'^HCD\d{7}_V[123]_MR')
feat_re = re.compile(r'.*\.feat$')
session_ids = [f for f in os.listdir(hcpd_dir) if PID_re.match(f)]

In [3]:
all_dirs = {'session_id': [], 'scan': [], 'feat': []}

In [4]:
for session_id in session_ids:
    pid_dir = os.path.join(hcpd_dir, session_id, 'MNINonLinear', 'Results')
    tasks = []
    if os.path.isdir(pid_dir):
        tasks = [f for f in os.listdir(pid_dir) if scan_re.match(f)]
    if not tasks:
        all_dirs['session_id'].append(session_id)
        all_dirs['scan'].append(np.nan)
        all_dirs['feat'].append(np.nan)
    else:
        for task in tasks:
            task_dir = os.path.join(pid_dir, task)
            feats = []
            if os.path.isdir(task_dir):
                feats = [f for f in os.listdir(task_dir) if feat_re.match(f)]
            if not feats:
                all_dirs['session_id'].append(session_id)
                all_dirs['scan'].append(task)
                all_dirs['feat'].append(np.nan)
            else:
                for feat in feats:
                    all_dirs['session_id'].append(session_id)
                    all_dirs['scan'].append(task)
                    all_dirs['feat'].append(feat)

In [37]:
targets = pd.DataFrame.from_dict(all_dirs)
targets['task'] = targets['feat'].str.extract(r'(CARIT(?:_PREPOT|_PREVCOND)*|GUESSING)')
targets['type'] = targets['feat'].str.extract(r'(clean_*(?:ColeAnticevic)*)')
targets['direction'] = targets['scan'].str.extract(r'_(PA|AP)')
display(targets[targets.feat.isna()])
display(targets[~targets.feat.isna()])

Unnamed: 0,session_id,scan,feat,task,type,direction
189,HCD0921347_V1_MR,tfMRI_CARIT_AP,,,,AP
499,HCD0679770_V1_MR,tfMRI_CARIT_AP,,,,AP
833,HCD1945164_V1_MR,tfMRI_CARIT_AP,,,,AP
866,HCD0039128_V1_MR,tfMRI_GUESSING_AP,,,,AP
1039,HCD1857066_V1_MR,tfMRI_CARIT_AP,,,,AP
...,...,...,...,...,...,...
11776,HCD2711649_V3_MR,tfMRI_CARIT_AP,,,,AP
12128,HCD2836164_V1_MR,tfMRI_CARIT_AP,,,,AP
12251,HCD2891273_V1_MR,tfMRI_CARIT_PA,,,,PA
12252,HCD2891273_V1_MR,tfMRI_GUESSING_PA,,,,PA


Unnamed: 0,session_id,scan,feat,task,type,direction
0,HCD1389665_V1_MR,tfMRI_GUESSING_PA,tfMRI_GUESSING_hp200_s4_level1_hp0_clean.feat,GUESSING,clean,PA
1,HCD1389665_V1_MR,tfMRI_GUESSING_PA,tfMRI_GUESSING_hp200_s4_level1_hp0_clean_ColeA...,GUESSING,clean_ColeAnticevic,PA
2,HCD1389665_V1_MR,tfMRI_CARIT_PA,tfMRI_CARIT_PREPOT_hp200_s4_level1_hp0_clean.feat,CARIT_PREPOT,clean,PA
3,HCD1389665_V1_MR,tfMRI_CARIT_PA,tfMRI_CARIT_PREPOT_PA_hp200_s4_level1_hp0_clea...,CARIT_PREPOT,clean_ColeAnticevic,PA
4,HCD1389665_V1_MR,tfMRI_GUESSING_AP,tfMRI_GUESSING_hp200_s4_level1_hp0_clean.feat,GUESSING,clean,AP
...,...,...,...,...,...,...
12490,HCD2992784_V1_MR,tfMRI_CARIT_PA,tfMRI_CARIT_PREPOT_PA_hp200_s4_level1_hp0_clea...,CARIT_PREPOT,clean_ColeAnticevic,PA
12491,HCD2992784_V1_MR,tfMRI_GUESSING_AP,tfMRI_GUESSING_hp200_s4_level1_hp0_clean.feat,GUESSING,clean,AP
12492,HCD2992784_V1_MR,tfMRI_GUESSING_AP,tfMRI_GUESSING_hp200_s4_level1_hp0_clean_ColeA...,GUESSING,clean_ColeAnticevic,AP
12493,HCD2992784_V1_MR,tfMRI_GUESSING_PA,tfMRI_GUESSING_hp200_s4_level1_hp0_clean.feat,GUESSING,clean,PA


In [18]:
print(len(targets.session_id.unique()))
print(len(targets[~targets.feat.isna()].session_id.unique()))
print(len(targets.session_id.unique()) - len(targets[~targets.feat.isna()].session_id.unique()))

1661
1649
12
