In [1]:
def search(base_dir, wildcard, error=True):
    search_path = Path(base_dir) / wildcard
    files = glob.glob(str(search_path))

    if not files:
        if error:
            raise FileNotFoundError(f"No files were found in: {search_path}")
        else:
            return []

    return files

In [3]:
from pathlib import Path
import itertools
import glob
import os
import nibabel as nib

from collections import defaultdict, Counter

experiment_ids = [
    '1_frequency_tagging', 
    '1_frequency_tagging',
    '1_attention',
]
mri_ids = [
    '3T',
    '7T',
    '7T',
]
oscprep_dir = 'oscprep_grayords_fmapless'

for experiment_id, mri_id in zip(experiment_ids, mri_ids):
    base_bids_dir = f'/data/{experiment_id}/{mri_id}/bids'
    sub_ids = [Path(i).stem for i in search(base_bids_dir, 'sub-*')]
    sub_ids.sort()

    oscprep_deriv_dir = f"{base_bids_dir}/derivatives/oscprep_grayords_fmapless"
    assert Path(oscprep_deriv_dir).exists()
    
    for sub_id in sub_ids:
        sub_dir = f'{base_bids_dir}/{sub_id}'
        ses_ids = [Path(i).stem for i in search(sub_dir, 'ses-*')]
        ses_ids.sort()
        
        for ses_id in ses_ids:
            ses_func_dir = f'{sub_dir}/{ses_id}/func'
            funcs = [Path(i).stem for i in search(ses_func_dir, '*part-mag_bold.nii.gz')]
            funcs.sort()

            vols_per_task_list = defaultdict(list)
            for func in funcs:
                nifti = Path(f"{ses_func_dir}/{func}.gz")
                task_id = func.split('task-')[1].split('_')[0]
                if task_id == 'wholebrain': continue
                n_vols = nib.load(nifti).shape[-1]
                vols_per_task_list[task_id].append(n_vols)

            vols_per_task = defaultdict(int)
            for k, v in vols_per_task_list.items():
                counter = Counter(v)
                vols_per_task[k] = counter.most_common(1)[0][0]

            for func in funcs:
                nifti = Path(f"{ses_func_dir}/{func}.gz")
                task_id = func.split('task-')[1].split('_')[0]
                if task_id == 'wholebrain': continue
                run_id = func.split('run-')[1].split('_')[0]
                n_vols = nib.load(nifti).shape[-1]
                inconsistent_vols = n_vols != vols_per_task[task_id]

                # Check if oscprep ran correctly
                oscprep_check_wildcards = [
                    f"{sub_id}_{ses_id}_task-{task_id}*run-{run_id}*space-T1w_boldref.nii.gz",
                    f"{sub_id}_{ses_id}_task-{task_id}*run-{run_id}*space-T1w_desc-boldref_brainmask.nii.gz",
                    f"{sub_id}_{ses_id}_task-{task_id}*run-{run_id}*space-T1w_desc-preproc_bold.nii.gz",
                    f"{sub_id}_{ses_id}_task-{task_id}*run-{run_id}*desc-preproc_bold.dtseries.nii",
                    f"{sub_id}_{ses_id}_task-{task_id}*run-{run_id}*desc-preproc_bold.json",
                    f"{sub_id}_{ses_id}_task-{task_id}*run-{run_id}*desc-confounds_timeseries.tsv",
                    f"{sub_id}_{ses_id}_task-{task_id}*run-{run_id}*desc-confounds_timeseries.json",
                ]

                preproc_flag = True
                preproc_paths = []
                for wildcard in oscprep_check_wildcards:
                    _path = search(f"{oscprep_deriv_dir}/bold_preproc/{sub_id}/{ses_id}/func", wildcard, error=False)
                    assert len(_path) in [0,1]
                    if len(_path) == 0:
                        preproc_flag = False
                    else:
                        preproc_paths += _path

                if not preproc_flag or inconsistent_vols:
                    print(
                        '\n',
                        experiment_id, 
                        mri_id, 
                        sub_id,
                        ses_id, 
                        task_id, 
                        run_id, 
                        f'\n[n_vols = {n_vols}, Expected: {vols_per_task[task_id]}]' if inconsistent_vols else '',
                        f'\n[oscprep incomplete]' if not preproc_flag else '',
                    )
                if not preproc_flag:
                    for _p in preproc_paths:
                        print(f"Removing {_p}")
                        os.remove(_p)
                    sbatch_cmd = f"sbatch -J {experiment_id}.{mri_id}.{sub_id}_{ses_id}_task-{task_id}_{run_id}_grayords -D /project/def-mmur/gngo4/data/fastfmri/scripts/02_minimal_preproc --output=/project/def-mmur/gngo4/data/fastfmri/scripts/02_minimal_preproc/logs/step_2/{experiment_id}.{mri_id}.{sub_id}_{ses_id}_task-{task_id}_run-{run_id}_grayords_slurm-%j.out --time=10:00:00 --cpus-per-task=1 --mem-per-cpu=48G --account=def-mmur ./run_minimal_preproc.grayords.sh {sub_id.split('-')[1]} {ses_id.split('-')[1]} {task_id} {run_id} /project/def-mmur/gngo4/data/fastfmri/{experiment_id}/{mri_id}/bids {oscprep_dir}"
                    print(sbatch_cmd)


 1_frequency_tagging 3T sub-000 ses-03 controlQ1 08 
[n_vols = 123, Expected: 800] 

 1_frequency_tagging 3T sub-005 ses-02 controlQ2 03 
[n_vols = 13, Expected: 800] 
[oscprep incomplete]
sbatch -J 1_frequency_tagging.3T.sub-005_ses-02_task-controlQ2_03_grayords -D /project/def-mmur/gngo4/data/fastfmri/scripts/02_minimal_preproc --output=/project/def-mmur/gngo4/data/fastfmri/scripts/02_minimal_preproc/logs/step_2/1_frequency_tagging.3T.sub-005_ses-02_task-controlQ2_run-03_grayords_slurm-%j.out --time=10:00:00 --cpus-per-task=1 --mem-per-cpu=48G --account=def-mmur ./run_minimal_preproc.grayords.sh 005 02 controlQ2 03 /project/def-mmur/gngo4/data/fastfmri/1_frequency_tagging/3T/bids oscprep_grayords_fmapless

 1_frequency_tagging 3T sub-005 ses-02 entrainQ2 01 
[n_vols = 22, Expected: 800] 

 1_frequency_tagging 3T sub-007 ses-02 entrainQ2 01 
[n_vols = 7, Expected: 800] 
[oscprep incomplete]
sbatch -J 1_frequency_tagging.3T.sub-007_ses-02_task-entrainQ2_01_grayords -D /project/def-mmu