### Running run-level frequency-based modelling on ComputeCanada
##### Inputs
 - `scripts_dir`: location of outputted sbatch scripts. 
 - `experiment_ids`: *experiment_id* directory found under
     - **/project/def-mmur/gngo4/data/fastfmri**
 - `mri_ids`: *mri_id* directory found under,
     - **/project/def-mmur/gngo4/data/fastfmri/{experiment_id}**
 - `oscprep_dir`: minimal preprocessed directory (using *https://github.com/gnngo4/oscprep*) found under,
     -  **/project/def-mmur/gngo4/data/fastfmri/{experiment_id}/{mri_id}/bids/derivatives**
 - `smooth_mm`: surface smoothing parameter (*mm*) (only works if the `fla.py ... --image-type == CIFTI`)
 - `relevant_task_ids`: tuple containing all frequency-based task paradigms of interest
 - `experiment_search_frequencies`: search frequencies (*Hz*) associated to each `task_id` and `mri_id`
 - `stim_start_time`: stimulus start time (*s*)
 - `stim_rampup_times`: List of ramp-up times (*s*) used to determine truncation window of the BOLD data
 - `experiment_stim_end`: stimulus stop time (*s*) associated to each `task_id` and `mri_id`
 - `CONTAINER`: Set the container path used to run the sbatch script
 - `image_type`: Set processing of either CIFTI or NIFTI
##### Outputs
 - Denoising outputs found under,
     - **/scratch/{experiment_id}/{mri_id}/derivatives/run_level_s{smooth_mm}/truncate-{time_window[0]}-{time_window[1]}**
     - `time_window` = (`stim_start_time`+`stim_rampup_time`, **stim_end_time**)
         - **stim_end_time**: taken from `experiment_stim_end`
##### sbatch example settings
sbatch --time=00:10:00 --cpus-per-task=1 --mem-per-cpu=4GB --account=def-mmur {script.sh}

In [None]:
from pathlib import Path
import itertools
import glob
import os
import nibabel as nib

from collections import defaultdict, Counter

from ComputeCanada.frequency_tagging.study_info import (
    frequency_tagging_settings,
    attention_settings,
    setting_exceptions,
)
SMOOTH_OVERRIDE = 0
SETTINGS = attention_settings
scripts_dir = "/data/scripts/03a_run_level"
experiment_ids, mri_ids, oscprep_dir, smooth_mm, relevant_task_ids, experiment_search_frequencies, TR, stim_start_time, stim_rampup_time, experiment_stim_end, CONTAINER, image_type = SETTINGS.values()
if SMOOTH_OVERRIDE is not None:
    smooth_mm = SMOOTH_OVERRIDE

assert image_type in ['CIFTI', 'NIFTI']

# Output sbatch scripts here
scripts_dir = Path(scripts_dir)
if not scripts_dir.exists():
    scripts_dir.mkdir(parents=True)

In [None]:
"""
Functions
"""

def search(base_dir, wildcard, error=True):
    search_path = Path(base_dir) / wildcard
    files = glob.glob(str(search_path))

    if not files:
        if error:
            raise FileNotFoundError(f"No files were found in: {search_path}")
        else:
            return []

    return files

# Iterate over `experiment_ids` and `mri_ids`
for experiment_id, mri_id in itertools.product(experiment_ids, mri_ids):
    # Set base directory
    base_bids_dir = f'/data/{experiment_id}/{mri_id}/bids'
    # Set minimal preprocessed directory
    oscprep_deriv_dir = f"{base_bids_dir}/derivatives/{oscprep_dir}"
    assert Path(oscprep_deriv_dir).exists()
    # Find all subjects
    sub_ids = [Path(i).stem for i in search(base_bids_dir, 'sub-*')]
    sub_ids.sort()

    # Iterate over subjects
    for sub_ix, sub_id in enumerate(sub_ids):
        # change settings based on exceptions
        settings = setting_exceptions(experiment_id, mri_id, sub_id, SETTINGS.copy())
        experiment_ids, mri_ids, oscprep_dir, smooth_mm, relevant_task_ids, experiment_search_frequencies, TR, stim_start_time, stim_rampup_time, experiment_stim_end, CONTAINER, image_type = settings.values()
        if SMOOTH_OVERRIDE is not None:
            smooth_mm = SMOOTH_OVERRIDE
        # Set subject directory
        sub_dir = f'{base_bids_dir}/{sub_id}'
        # Find all sessions
        ses_ids = [Path(i).stem for i in search(sub_dir, 'ses-*')]
        ses_ids.sort()

        # Iterate over sessions
        for ses_ix, ses_id in enumerate(ses_ids):

            # Set subject and session bold directory
            ses_func_dir = f'{sub_dir}/{ses_id}/func'
            # Find all bold runs
            funcs = [Path(i).stem for i in search(ses_func_dir, '*part-mag_bold.nii.gz')]
            funcs.sort()
            # Find the number of volumes for each task, and store in a dictionary
            # `vols_per_task`: Dict[task_id, n_vols]
            vols_per_task_list = defaultdict(list)
            for func in funcs:
                nifti = Path(f"{ses_func_dir}/{func}.gz")
                task_id = func.split('task-')[1].split('_')[0]
                if task_id == 'wholebrain': continue
                n_vols = nib.load(nifti).shape[-1]
                vols_per_task_list[task_id].append(n_vols)
            vols_per_task = defaultdict(int)
            for k, v in vols_per_task_list.items():
                counter = Counter(v)
                vols_per_task[k] = counter.most_common(1)[0][0]

            # Iterate over all bold runs
            for func in funcs:
                # Set nifti path
                nifti = Path(f"{ses_func_dir}/{func}.gz")
                # Parse some nifti info
                task_id = func.split('task-')[1].split('_')[0]
                run_id = func.split('run-')[1].split('_')[0]
                n_vols = nib.load(nifti).shape[-1]
                """
                Skipping heuristics
                """
                inconsistent_vols = n_vols != vols_per_task[task_id] # Skip if inconsistent volumes
                task_is_wholebrain = task_id == 'wholebrain' # Skip if task_id == 'wholebrain'
                # Skip if minimal preprocessing (via oscprep) did not run
                oscprep_check_wildcards = [
                    f"{sub_id}_{ses_id}_task-{task_id}*run-{run_id}*space-T1w_boldref.nii.gz",
                    f"{sub_id}_{ses_id}_task-{task_id}*run-{run_id}*space-T1w_desc-boldref_brainmask.nii.gz",
                    f"{sub_id}_{ses_id}_task-{task_id}*run-{run_id}*space-T1w_desc-preproc_bold.nii.gz",
                    f"{sub_id}_{ses_id}_task-{task_id}*run-{run_id}*desc-preproc_bold.dtseries.nii",
                    f"{sub_id}_{ses_id}_task-{task_id}*run-{run_id}*desc-preproc_bold.json",
                    f"{sub_id}_{ses_id}_task-{task_id}*run-{run_id}*desc-confounds_timeseries.tsv",
                    f"{sub_id}_{ses_id}_task-{task_id}*run-{run_id}*desc-confounds_timeseries.json",
                ]
                preproc_flag = True
                preproc_paths = []
                for wildcard in oscprep_check_wildcards:
                    _path = search(f"{oscprep_deriv_dir}/bold_preproc/{sub_id}/{ses_id}/func", wildcard, error=False)
                    assert len(_path) in [0,1], f"{wildcard}\n{_path}"
                    if len(_path) == 0:
                        preproc_flag = False
                    else:
                        preproc_paths += _path
                # Skip
                if inconsistent_vols or task_is_wholebrain or not preproc_flag:
                    continue
                
                # Run if task_id is found in `relevant_task_ids`
                if task_id.startswith(relevant_task_ids):
                    # Set key to parse input metadata
                    k = f"{task_id.split('Q')[0]}_{mri_id}"
                    assert k in experiment_search_frequencies.keys(), f"{k} is not a key in `experiment_search_frequencies`"
                    assert k in experiment_stim_end.keys(), f"{k} is not a key in `experiment_stim_end`"
                    # Parse experiment relevant info
                    search_frequencies = experiment_search_frequencies[k]
                    stim_end_time = experiment_stim_end[k]
                    
                    # Set truncation time window in seconds
                    time_window = (stim_start_time+stim_rampup_time, stim_end_time)
                    # TODO: add assertion statement here to ensure time_window is bounded by the bold data
                    # Set output directory
                    out_dir = f"/scratch/fastfmri/experiment-{experiment_id}_mri-{mri_id}_smooth-{smooth_mm}_truncate-{time_window[0]}-{time_window[1]}_desc-denoised_bold"
                    # TODO: check whether bold denoising has already been performed, skip otherwise
                    n_out = len(search(out_dir, f"*_experiment-*/{sub_id}/{ses_id}/task-{task_id}/run-{run_id}/GLM/*", error=False))
                    if n_out == 2:
                        continue
                    # Verbose, print some stuff
                    print(
                        experiment_id, 
                        mri_id, 
                        sub_id.split('-')[-1],
                        ses_id.split('-')[-1], 
                        task_id, 
                        run_id, 
                        search_frequencies, 
                        time_window,
                    )
                    # Print sbatch scripts to `scripts_dir`
                    txt = f"""#!/bin/bash
module load apptainer/1.2.4

singularity run \\
--bind /project/def-mmur/gngo4/data/fastfmri:/data \\
--bind /scratch/gngo4:/scratch \\
--bind /project/def-mmur/gngo4/projects/fastfmri_toolbox:/opt/app \\
{CONTAINER} \\
python3 /opt/app/scripts/fla.py \\
{experiment_id} \\
{mri_id} \\
{oscprep_deriv_dir} \\
{out_dir} \\
{sub_id.split('-')[-1]} \\
{ses_id.split('-')[-1]} \\
{task_id} \\
{run_id} \\
{' '.join([str(i) for i in search_frequencies])} \\
{' '.join([str(i) for i in time_window])} \\
--smooth-mm {smooth_mm} \\
--image-type {image_type} \\
--denoise-only
                    """
                    cmd_path = f"{scripts_dir}/experiment-{experiment_id}_mri-{mri_id}_smooth-{smooth_mm}_truncate-{time_window[0]}-{time_window[1]}.{sub_id}_{ses_id}_task-{task_id}_run-{run_id}_fla.sh"
                    with open(cmd_path, 'w') as f:
                        f.write(txt)