In [30]:
from os import walk, listdir, path
import re 
import pandas as pd
import numpy as np
hcpdir='/ncf/hcp/data/HCD-tfMRI-MultiRunFix'
print('Getting ID dirs')
id_dirs=[adir for adir in listdir(hcpdir) if match(r"HCD[0-9]{7}_V1_MR", adir)]

def check_dtseries(id_dir, basedir):
    print('Checking for ' + id_dir)
    pattern = re.compile('tfMRI_.*_Atlas_hp0_clean.dtseries.nii')
    dirpostfix = 'MNINonLinear/Results'
    full_id_dir = '/'.join([hcpdir,id_dir, dirpostfix])
    if path.isdir(full_id_dir):
        task_dirs = [task for task in listdir(full_id_dir) if match(r"tfMRI_(CARIT|GUESSING)_(AP|PA)", task)]
        dtseries_fns = []
        dtseries_len = []
        for task_dir in task_dirs:
            dtseries_fn = [nii for nii in listdir('/'.join([full_id_dir, task_dir])) if match(pattern, nii)]
            dtseries_len.append(len(dtseries_fn))
            dtseries_fns.append(dtseries_fn)
        rdf = pd.DataFrame({'Subject' : id_dir, 'task' : task_dirs, 'dtseries' : dtseries_fns, 'N' : dtseries_len})
    else:
        print("Warning, directory does not exist: " + full_id_dir)
        rdf = pd.DataFrame({'Subject' : id_dir, 'task' : None, 'dtseries' : None, 'N' : np.nan}, index=[0])
    return(rdf)
print('Checking dtseries for each CARIT and GUESSING task in each ID dir')
dtseries_df_list = [check_dtseries(id_dir=id_dir, basedir=hcpdir) for id_dir in id_dirs]

dtseries_df = pd.concat(dtseries_df_list)
print('Exporting list of missing DTSeries...')
dtseries_df[dtseries_df.N == 0].to_csv('missing_dtseries.txt', sep = ' ', header = True, index = False)


Exporting list of missing DTSeries...


In [54]:
missing_subs = dtseries_df[dtseries_df.N == 0].loc[:, 'id_dir'].drop_duplicates().str.replace('_.*_.*$', '')
missing_subs.to_csv('missing_dtseries_subs.txt', sep = ' ', header = True, index = False)

In [55]:
missing_subs

2    HCD1042223
2    HCD0574756
0    HCD2217742
2    HCD0336841
2    HCD0921347
0    HCD1703140
0    HCD2669575
1    HCD1527045
0    HCD0225529
2    HCD0679770
0    HCD0363743
2    HCD1945164
2    HCD1857066
3    HCD2679073
0    HCD0295954
0    HCD0714443
0    HCD0719352
0    HCD0949874
1    HCD0957368
0    HCD0980262
0    HCD1022722
0    HCD1395458
0    HCD1444142
0    HCD1546857
0    HCD1658565
0    HCD1734858
0    HCD1798985
1    HCD1959680
0    HCD2322335
2    HCD2332136
0    HCD2600034
0    HCD2617657
0    HCD2635659
0    HCD2662763
0    HCD2748672
1    HCD2995083
Name: id_dir, dtype: object