In [1]:
def bids_scan_file_walker(dataset=".", include_types=None, warn_no_files=False):
    '''
    Traverse a BIDS dataset and provide a generator interface
    to the imaging files contained within.
    Arguments:
    dataset -- path to the BIDS dataset folder.
    Keyword Arguments:
    include_types -- a list of the scan types (i.e. subfolder names)
    to include in the results. Can be any combination of "func",
    "anat", "fmap", "dwi".
    warn_no_files -- issue a warning if no imaging files are found
    for a subject or a session.
    Returns:
    A list containing, for each .nii or .nii.gz file found, the BIDS
    identifying tokens and their values. If a file doesn't have an
    identifying token its key will be None.
    '''
    import os
    import os.path as op
    from glob import glob

    from warnings import warn
    def _no_files_warning(folder):
        if not warn_no_files:
            return
        warn("No files of requested type(s) found in scan folder: %s" \
               % folder, RuntimeWarning, stacklevel=1)

    def _walk_dir_for_prefix(target_dir, prefix):
        return [x for x in next(os.walk(target_dir))[1]
                if x.startswith(prefix)]

    def _tokenize_bids_scan_name(scanfile):
        scan_basename = op.splitext(op.split(scanfile)[1])[0]
        # .nii.gz will have .nii leftover
        scan_basename = scan_basename.replace(".nii", "")
        file_bits = scan_basename.split('_')

        # BIDS with non ses-* subfolders given default
        # "single_session" ses.
        file_tokens = {'scanfile': scanfile,
                       'sub': None, 'ses': "single_session",
                       'acq': None, 'rec': None,
                       'run': None, 'task': None,
                       'modality': file_bits[-1]}
        for bit in file_bits:
            for key in file_tokens.keys():
                if bit.startswith(key):
                    file_tokens[key] = bit

        return file_tokens

    #########

    if include_types is None:
        # include all scan types by default
        include_types = ['func', 'anat', 'fmap', 'dwi']

    subjects = _walk_dir_for_prefix(dataset, 'sub-')
    if len(subjects) == 0:
        raise GeneratorExit("No BIDS subjects found to examine.")

    # for each subject folder, look for scans considering explicitly
    # defined sessions or the implicit "single_session" case.
    for subject in subjects:
        subj_dir = op.join(dataset, subject)

        sessions = _walk_dir_for_prefix(subj_dir, 'ses-')

        for scan_type in include_types:
            # seems easier to consider the case of multi-session vs.
            # single session separately?
            if len(sessions) > 0:
                subject_sessions = [op.join(subject, x)
                                    for x in sessions]
            else:
                subject_sessions = [subject]

            for session in subject_sessions:

                scan_files = glob(op.join(
                    dataset, session, scan_type,
                    '*.nii*'))

                if len(scan_files) == 0:
                    _no_files_warning(session)

                for scan_file in scan_files:
                    yield _tokenize_bids_scan_name(scan_file)


In [11]:
bids_imgs = bids_scan_file_walker("..", include_types=['anat', 'func'])

In [12]:
file_info = []
for s in sorted(bids_imgs, key=lambda file: file['scanfile']):
    file_info.append(s)

In [14]:
file_info

[{'acq': None,
  'modality': 'T1w',
  'rec': None,
  'run': None,
  'scanfile': '../sub-9001/ses-1/anat/sub-9001_ses-1_T1w.nii.gz',
  'ses': 'ses-1',
  'sub': 'sub-9001',
  'task': None},
 {'acq': None,
  'modality': 'T2w',
  'rec': None,
  'run': None,
  'scanfile': '../sub-9001/ses-1/anat/sub-9001_ses-1_T2w.nii.gz',
  'ses': 'ses-1',
  'sub': 'sub-9001',
  'task': None},
 {'acq': None,
  'modality': 'bold',
  'rec': None,
  'run': None,
  'scanfile': '../sub-9001/ses-1/func/sub-9001_ses-1_task-arrows_bold.nii.gz',
  'ses': 'ses-1',
  'sub': 'sub-9001',
  'task': 'task-arrows'},
 {'acq': None,
  'modality': 'bold',
  'rec': None,
  'run': None,
  'scanfile': '../sub-9001/ses-1/func/sub-9001_ses-1_task-faces_bold.nii.gz',
  'ses': 'ses-1',
  'sub': 'sub-9001',
  'task': 'task-faces'},
 {'acq': None,
  'modality': 'bold',
  'rec': None,
  'run': None,
  'scanfile': '../sub-9001/ses-1/func/sub-9001_ses-1_task-hands_bold.nii.gz',
  'ses': 'ses-1',
  'sub': 'sub-9001',
  'task': 'task-hand