In [None]:
import os
import sys
from os.path import join, pardir
sys.path.append(pardir)
from bids import BIDSLayout
from itertools import product, chain
from nipype.pipeline.engine import Workflow
from ica_wf import make_subject_ica_wf
from tqdm import tqdm
from nipype import config

In [None]:
config.enable_debug_mode()

In [None]:
# OUTDATED
def get_datapaths(bids_layout, subject, session, run, task, space, outdir):
    """
    Extract mask and bold file paths for one subject for one task
    of one run for one type of space etc.
    
    Input: BIDSlayout, subject, ...  
    Output: bold filepath, mask filepath and output directory (each as string)
    """
    # check if run and session are present
    run = None if run in ('0','00', '') else run
    session = None if session in ('0','00', '') else session
    # get paths
    bold_file = bids_layout.get(
                        subject=subject,
                        run=run,
                        session=session,
                        task=task,
                        space=space,
                        extension='nii.gz',
                        suffix='bold',
                        return_type='filename'
                        )
    # check if AROMA was used - if yes, exclude this file
    bold_file = [i for i in bold_file if "AROMA" not in i]
    
    mask_file = bids_layout.get(
                        subject=subject,
                        run=run,
                        session=session,
                        task=task,
                        space=space,
                        extension='nii.gz',
                        suffix='mask',
                        return_type='filename'
                        )
    if bold_file == []:
        out_dir = []
    else:
        out_dir = join(outdir,
                       f'sub-{subject}',
                       f'sub-{subject}_ses-{session}_task-{task}_run-{run}_space-{space}-melodic')
    print("bold_file", bold_file)
    print("mask_file", mask_file)
    print("out_dir", out_dir)
    print("------------------------------")
    return bold_file, mask_file, out_dir

In [None]:
# OUTDATED
def return_datapaths(bids_layout, outdir, subject="all", session="all", run="all",
               task="all", space="all"):
    """
    Check if all data paths or only specific paths are asked for and return full paths.
    
    Input: BIDSlayout
    Optional Input: subject, session, run, task, space
    Output: one file with all bold and mask file paths as tuples
    """
    if all([param == "all" for param in (subject, session, run, task, space)]):
        subject = bids_layout.get(return_type='id', target='subject', desc='preproc')
        session = bids_layout.get(return_type='id', target='session', desc='preproc')
        run = bids_layout.get(return_type='id', target='run', desc='preproc')
        task = bids_layout.get(return_type='id', target='task', desc='preproc')
        space = bids_layout.get(return_type='id', target='space', desc='preproc')
    else:
        subject = subject
        session = session # TODO: for many runs/sessions, check if pybids gives just a number or a full list
        run = run
        task = task
        space = space
    print("subject", subject)
    print("session", session)
    print("run", run)
    print("task", task)
    print("space", space)
    # check if run and session are present
    session = '0' if session == [] else session
    run = '0' if run == [] else run
    
    # create all parameter combinations and get their paths
    combinations = list(product(subject, session, run, task, space))
    boldfiles_nested, maskfiles_nested, outdirs_nested = zip(*[
        get_datapaths(bids_layout, *params, outdir) for params in combinations
    ])
    outdirs = list(outdirs_nested)
    boldfiles = [val for sublist in boldfiles_nested for val in sublist]
    maskfiles = [val for sublist in maskfiles_nested for val in sublist]
    
    # create output folders
    for d in outdirs:
        if not os.path.exists(d):
            os.makedirs(d)
    
    return boldfiles, maskfiles, outdirs 

In [None]:
def get_paths(bids_layout, subject, out_dir):
    """
    Return all datapaths for brain mask files, bold files, and output directories as lists.
    """
    # List with boldfile paths
    boldfile = bids_layout.get(
        scope='derivatives',
        subject=subject,
        extension='nii.gz',
        suffix='bold',
        return_type='filename'
    )
    boldfile = [i for i in boldfile if "AROMA" not in i] # exclude AROMA files
    # List with maskfile paths
    maskfile = bids_layout.get(
        scope='derivatives',
        dirname='func',
        subject=subject,
        extension='nii.gz',
        suffix='mask',
        return_type='filename'
    )
    maskfile = [i for i in maskfile if "func" in i] # keep files in 'func' folder
    # List with output paths
    outdirs = []
    for elem in maskfile:
        dir_name = elem.split('/')[-1]
        subj = dir_name.split('_')[0]
        outdir = join(out_dir, subj, f'{dir_name[:-23]}_melodic')
        outdirs.append(outdir)
        
    # ERROR catching
    if len(boldfile) != len(maskfile): # check lengths
        print("[ERROR] The number of bold files does not match the number of brain mask files.")
        sys.exit()
    elif len(boldfile) != len(outdirs): # check lengths
        print("[ERROR] The number of output directories differs from the number of bold and brain mask files.")
        sys.exit()
    for bold_elem, mask_elem in zip(boldfile, maskfile): # check names
        boldpath = bold_elem[:-25]
        maskpath = mask_elem[:-23]
        if boldpath != maskpath:
            print("[ERROR] Bold and brain mask files have a different naming convention.")
            print("</path/filename>_desc-preproc_bold.nii.gz")
            print("</path/filename>_desc-brain_mask.nii.gz")
            print("should have same </path/filename>")
            sys.exit()
            
    return boldfile, maskfile, outdirs

In [None]:
def make_dataset_ica(bidsdata_dir, base_dir, subjects='all', tr=1.5, hpf=80., fwhm=4.):
    """
    From a BIDS dataset, search for all bold and mask files and
    then calculate ICs.
    
    Input:
        bidsdata_dir = '/LOCAL/jzerbe/faces_vs_houses/ds002938'
        base_dir = '/LOCAL/jzerbe/temp_results'
    Optional input:
        subjects = ['01', '02', '15']
        tr = 1.5
        hpf = 80. # 120./TR
        fwhm = 4.0
    Output:
        calculated ICs
    """
    # The 'layout' function can throw error if derivatives are not saved inside
    # the BIDS dataset as a folder called 'derivatives', and also if the
    # json description file does not include the field 'DataDescription'
    #for total_progress in tqdm(range(1), 'Total progress'):
    print("ICA Melodic pipeline has started!")
    out_dir = join(bidsdata_dir, 'derivatives', 'melodic')
    #for layout_progress in tqdm(range(1), 'creating BIDS layout'): 
    print("\n creating BIDS layout ...")
    layout = BIDSLayout(bidsdata_dir, derivatives=True)
    print(" DONE \n get subjects ...")
    old_boldlist, old_masklist, old_outdirlist = return_datapaths(layout, out_dir)
    print("old_boldlist", old_boldlist)
    print("old_masklist", old_masklist)
    print("old_outdirlist", old_outdirlist)
    if subjects == 'all':
        subjects = layout.get_subjects(scope='derivatives', return_type='id')
    else:
        subjects = subjects
    print(" DONE \n get datapaths ...")
    boldlist, masklist, outdirlist = get_paths(layout, subjects, out_dir)
    # create output folders
    for d in outdirlist:
        if not os.path.exists(d):
            os.makedirs(d)
    print(" DONE")
    
    runwfs = []
    runwftest = make_subject_ica_wf()
    runwftest.inputs.inputspec.hpf = hpf
    runwftest.inputs.inputspec.tr = tr
    runwftest.inputs.inputspec.fwhm = fwhm
    runwftest.base_dir = base_dir
    i = 1 # iterator to rename workflow
    
    for boldfile, maskfile, outdir in zip(boldlist, masklist, outdirlist):
        print("boldfile", boldfile)
        print("maskfile", maskfile)
        print("outdir", outdir)
        sys.exit()
        runwftest.inputs.inputspec.bold_file = boldfile
        runwftest.inputs.inputspec.mask_file = maskfile
        runwftest.inputs.inputspec.out_dir = outdir
        runwftest.name = join(f'node_{i}')
        
        wf_name = join(f'melodicwf_{i}')
        wf_cloned = runwftest.clone(wf_name) # clone workflow with new name
        runwfs.append(wf_cloned)
        i += 1
    
    dataset_wf = Workflow(name='dataset_wf')
    dataset_wf.base_dir = base_dir
    dataset_wf.add_nodes(runwfs)
    
    dataset_wf.run('MultiProc', plugin_args={'n_procs': 30}) 

In [None]:
# Define dataset and base path
test_bidsdata_dir = '/LOCAL/jzerbe/emotion_category/ds003548' #'/LOCAL/jzerbe/faces_vs_houses/ds002938'
test_base_dir = '/LOCAL/jzerbe/emotion_category' #temp_results
# Start calculating ICs fo all subjects
make_dataset_ica(test_bidsdata_dir, test_base_dir)

print("\nICA Melodic pipeline finished.")