Functions

In [None]:
import glob
import nibabel as nib
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from pathlib import Path
import itertools

def search(base_dir, wildcard):
    search_path = Path(base_dir) / wildcard
    files = glob.glob(str(search_path))

    if not files:
        raise FileNotFoundError(f"No files were found in: {search_path}")

    return files

def search_denoise_dir(
    experiment_id, 
    mri_id, 
    smooth_id, 
    denoise_id, 
    sub_id, 
    ses_id, 
    task_id, 
    run_id
):

    dtseries = {}
    for descriptor in ["windowed", "denoised"]:
        wildcard = f"{experiment_id}/{mri_id}/derivatives/run_level_s{smooth_id}/{denoise_id}/sub-{sub_id}/ses-{ses_id}/task-{task_id}/run-{run_id}/GLM/*desc-{descriptor}*bold.dtseries.nii"
        _dtseries = search("/scratch", wildcard)
        assert len(_dtseries) == 1
        dtseries[descriptor] = _dtseries[0]
    
    return dtseries

def rename_dtseries_to_metric(dtseries_name, metric_name):

    return Path(dtseries_name.replace('bold.', f"metric-{metric_name}."))

def save_single_metric_as_dtseries(metric_data, base_dtseries, metric_name = 'r2'):
    base_img = nib.load(base_dtseries)
    # Save as dscalar
    img = nib.Cifti2Image(
        np.expand_dims(metric_data,axis=0), 
        header=base_img.header
    )
    # Rewrite number of datapoints
    img.header.matrix[0].number_of_series_points = 1
    # Save
    dtseries_out = rename_dtseries_to_metric(base_dtseries, metric_name)
    nib.save(img, dtseries_out)


def create_directory(directory_path):
    path = Path(directory_path)
    if not path.exists():
        path.mkdir(parents=True)

Compute run average

In [None]:
import nibabel as nib
import numpy as np

smooth_ids = [0, 4]
dm_ids = [
    '00_experiment-min+motion24+wmcsf_mean+scrub',
    '01_experiment-min+motion24+wmcsf_compcor+scrub', 
]
descs = ['denoised','windowed']

for smooth_id, dm_id, desc in itertools.product(smooth_ids, dm_ids, descs):

    print(smooth_id, dm_id, desc)
    # Get dtseries
    inputs = {
        "experiment_id": "1_attention",
        "mri_id": "7T",
        "smooth_id": smooth_id,
        "denoise_id": dm_id,
        "sub_id": "000",
        "ses_id": "20230623d", 
        "task_id": "wbpilot",
        "run_id": '',
    }

    experiment_id = inputs["experiment_id"]
    mri_id = inputs["mri_id"]
    smooth_id = inputs["smooth_id"]
    denoise_id = inputs["denoise_id"]
    sub_id = inputs["sub_id"]
    ses_id = inputs["ses_id"]
    task_id = inputs["task_id"]
    
    dtseries = !ls /scratch/{experiment_id}/{mri_id}/derivatives/run_level_s{smooth_id}/{denoise_id}/sub-{sub_id}/ses-{ses_id}/task-{task_id}/run-0?/GLM/*desc-{desc}*bold.dtseries.nii

    """
    for i in dtseries:
        print(i)
    """

    run_avg_dir = Path(f"/scratch/{experiment_id}/{mri_id}/derivatives/run_level_s{smooth_id}/{denoise_id}/sub-{sub_id}/ses-{ses_id}/task-{task_id}/run-avg/GLM")
    create_directory(run_avg_dir)    
    # Average all runs
    bold_str_base = f"sub-{sub_id}_ses-{ses_id}_task-{task_id}_run-avg_desc-{desc}"
    out_dtseries = f"{run_avg_dir}/{bold_str_base}_bold.dtseries.nii"
    !wb_command -cifti-average {out_dtseries} -cifti {' -cifti '.join(dtseries)}


Fit frequencies onto collapsed wholebrain data and compute R2 model fit

In [None]:
METRIC_NAME = 'r2'
TR = 1.64
search_frequency = .2
smooth_ids = [0, 4]
dm_ids = [
    '00_experiment-min+motion24+wmcsf_mean+scrub',
    '01_experiment-min+motion24+wmcsf_compcor+scrub', 
]
run_ids = ['02', '03', '04', '05', 'avg']

for smooth_id, dm_id, run_id in itertools.product(smooth_ids, dm_ids, run_ids):

    # Get dtseries
    inputs = {
        "experiment_id": "1_attention",
        "mri_id": "7T",
        "smooth_id": smooth_id,
        "denoise_id": dm_id,
        "sub_id": "000",
        "ses_id": "20230623d", 
        "task_id": "wbpilot",
        "run_id": run_id,
    }
    dtseries = search_denoise_dir(**inputs)

    # Run on windowed and denoised data
    for descriptor in ["windowed", "denoised"]:
        _dtseries = dtseries[descriptor]
        metric_out = rename_dtseries_to_metric(_dtseries, METRIC_NAME)
        if metric_out.exists():
            print(f"{metric_out.stem} already generated.\nSkipping.")
            continue
        else:
            print(f"Generating metric: {metric_out.stem}")
        
        # Load dtseries
        img = nib.load(_dtseries)
        ts_data = img.get_fdata()
        n_tps, n_vertices = ts_data.shape
    
        # Run GLM on each vertex
        r2_data = np.zeros((n_vertices,))
        for voxel_idx in range(n_vertices):
    
            if voxel_idx % 50_000 == 0:
                print(f"[PROGRESS] {str(voxel_idx).zfill(6)}/{str(n_vertices).zfill(6)}")
            
            # Get timeseries from a voxel, and associated timepoints
            y = ts_data[:,voxel_idx]
            t = np.linspace(0, TR*n_tps, n_tps+1)[:-1] # Non-phased timepoints
            t = np.fmod(t, 1/search_frequency) # Phased timepoints
            
            # GLM - fit phased
            X = np.vstack((np.sin(2*np.pi*t*search_frequency), np.cos(2*np.pi*t*search_frequency))).T
            X = sm.add_constant(X)
            model = sm.GLM(y, X, family=sm.families.Gaussian())
            result = model.fit()
            
            # Calculate R2
            y_pred = result.predict(X)
            y_mean = np.mean(y)
            ss_total = np.sum((y - y_mean) ** 2)  # Total sum of squares
            ss_residual = np.sum((y - y_pred) ** 2)  # Residual sum of squares
            r2 = 1 - (ss_residual / ss_total)  # R-squared
        
            # store r2
            r2_data[voxel_idx] = r2
    
        # Save as dscalar
        save_single_metric_as_dtseries(
            r2_data, 
            _dtseries, 
            metric_name = METRIC_NAME
        )