In [None]:
import nibabel as nib
import numpy as np

import sys
sys.path.append("/opt/wbplot")

from wbplot import dscalar

from IPython.display import Image
import matplotlib.pyplot as plt

from pathlib import Path
import pandas as pd
from collections import defaultdict
from functools import lru_cache
from scipy.stats import wilcoxon
from IPython.display import clear_output

sys.path.append("ComputeCanada/frequency_tagging")
from dfm import (
    get_frequency_text_codes,
    get_roi_colour_codes,
    change_font,
)
change_font()

Get HCP info
- `hcp_mappings`: dict of ROI: dscalars
- `hcp_rois`: 

In [None]:

"""Get HCP labels
"""
dlabel_dir = Path("/opt/app/notebooks/data/dlabels")
hcp_label = dlabel_dir / "Q1-Q6_RelatedValidation210.CorticalAreas_dil_Final_Final_Areas_Group_Colors.32k_fs_LR.dlabel.nii"

_HCP_INFO = !wb_command -file-information {hcp_label}
HCP_LABELS = []
HCP_COUNTER = 0
for i in _HCP_INFO:
    if len(i) == 60 and any(["L_" in i, "R_" in i]):
        hcp_colors = tuple([float(f"0.{k}") for k in [j.split(' ') [0] for j in i.split('0.')][-3:]] + [1])
        if ' R_' in i:
            roi = i.split("_ROI")[0].split(' R_')[1]
            HCP_LABELS.append(f"R_{roi}_ROI")
        if ' L_' in i:
            roi = i.split("_ROI")[0].split(' L_')[1]
            HCP_LABELS.append(f"L_{roi}_ROI")
        HCP_COUNTER += 1

"""Get HCP label coordinates
"""
dscalar_dir = Path("/opt/app/notebooks/data/dscalars")
tmpdir = Path("/tmp")

hcp_mapping = {}
for roi_label in HCP_LABELS:
    out_dscalar = tmpdir / f"{roi_label}.dscalar.nii"
    if out_dscalar.exists():
        hcp_mapping[roi_label] = out_dscalar
        continue
    !wb_command -cifti-label-to-roi {hcp_label} {out_dscalar} -name {roi_label}
    assert out_dscalar.exists(), f"{out_dscalar.stem} does not exist."
    hcp_mapping[roi_label] = out_dscalar
hcp_rois = list(set([k.split('_')[1] for k in hcp_mapping.keys()]))

In [3]:
surface_dir = Path("/opt/app/notebooks/data/surfaces")
tmpdir = Path("/tmp")
L_mid = surface_dir / "S1200.L.midthickness_MSMAll.32k_fs_LR.surf.gii"
R_mid = surface_dir / "S1200.R.midthickness_MSMAll.32k_fs_LR.surf.gii"
L_geo = tmpdir / "L.dconn.nii"
R_geo = tmpdir / "R.dconn.nii"
!wb_command -surface-geodesic-distance-all-to-all {L_mid} {L_geo}
!wb_command -surface-geodesic-distance-all-to-all {R_mid} {R_geo}

template_dscalar = dscalar_dir / "S1200.MyelinMap_BC_MSMAll.32k_fs_LR.dscalar.nii"
L_V1_ROI = tmpdir / "L_V1_ROI.dscalar.nii"
R_V1_ROI = tmpdir / "R_V1_ROI.dscalar.nii"
L_V1_GD = tmpdir / "L_V1_ROI.gd.dscalar.nii"
R_V1_GD = tmpdir / "R_V1_ROI.gd.dscalar.nii"
    
!wb_command -cifti-create-dense-from-template {template_dscalar} {L_V1_GD} -cifti {L_V1_ROI}
!wb_command -cifti-create-dense-from-template {template_dscalar} {R_V1_GD} -cifti {R_V1_ROI}

L_V1_coords = nib.load(L_V1_GD).get_fdata()[0,:32492]==1
_L_geo = nib.load(L_geo).get_fdata()
L_geo_arr = _L_geo[L_V1_coords,:].mean(0)
del _L_geo
R_V1_coords = nib.load(R_V1_GD).get_fdata()[0,32492:]==1
_R_geo = nib.load(R_geo).get_fdata()
R_geo_arr = _R_geo[R_V1_coords,:].mean(0)
del _R_geo

geo_arr = np.concatenate((L_geo_arr, R_geo_arr))

geodesic_dscalar = tmpdir / "geodesic_V1.dscalar.nii"
img = nib.load(L_V1_GD)
data = np.zeros(img.shape)
data[0,:] = geo_arr
geo_img = nib.Cifti2Image(data, header=img.header)
nib.save(geo_img, geodesic_dscalar)
!wb_command -cifti-create-dense-from-template {L_V1_ROI} {geodesic_dscalar} -cifti {geodesic_dscalar}

Functions

In [4]:
def convert_f_im_with_f1_f2(f_im,f_1,f_2,fo=1.,mask=None,f_1_c=-.1,f_2_c=.82,f1f2_c=.1,f_im_c=.9,mask_c=.4,):
    f_1_data = convert_to_fractional_overlap(nib.load(f_1).get_fdata())
    f_1_data = (f_1_data >= fo).astype(int)
    f_2_data = convert_to_fractional_overlap(nib.load(f_2).get_fdata())
    f_2_data = (f_2_data >= fo).astype(int)
    f1f2_data = ((f_1_data + f_2_data) == 2).astype(int) # Intersection of f1 & f2
    f_1_data -= f1f2_data # f1 only
    f_2_data -= f1f2_data # f2 only
    f_im_data = convert_to_fractional_overlap(nib.load(f_im).get_fdata())
    f_im_data = (f_im_data >= fo).astype(int) # f_im
    
    # Recolour f_im_data with f_1, f_2 and f1f2 (show f1, f2 and f1f2 that appears in fim) 
    if mask:
        mask_data = convert_to_fractional_overlap(nib.load(mask).get_fdata())
        mask_data = (mask_data >= 1.).astype(int)
    data_dict = []
    for f_data,f_c in zip([f_1_data, f_2_data, f1f2_data],[f_1_c, f_2_c,f1f2_c]):
        _f_data = ((f_data+f_im_data)==2).astype(int) # Intersection of f_im and f (f_data)
        f_im_data -= _f_data
        if mask:
            mask_data -= _f_data
        data_dict.append((_f_data,f_c))
    data_dict.append((f_im_data,f_im_c))
    if mask:
        mask_data -= f_im_data
        data_dict.append((mask_data,mask_c))
    X = map_data_to_value(data_dict)
    
    # Recolour f_im_data with f_1, f_2 and f1f2 (show f1, f2 and f1f2 that does not appears in fim) 
    f_im_data = convert_to_fractional_overlap(nib.load(f_im).get_fdata())
    f_im_data = (f_im_data >= fo).astype(int) # f_im
    if mask:
        mask_data = convert_to_fractional_overlap(nib.load(mask).get_fdata())
        mask_data = (mask_data >= 1.).astype(int)
    data_dict = []
    for f_data,f_c in zip([f_1_data, f_2_data, f1f2_data],[f_1_c, f_2_c,f1f2_c]):
        _f_data = ((f_data+f_im_data)==2).astype(int) # Intersection of f_im and f (f_data)
        _f_data_only = f_data.copy() # f-only, and does not include any IM vertices
        _f_data_only -= _f_data
        if mask:
            mask_data -= _f_data_only
        data_dict.append((_f_data_only,f_c))
    data_dict.append((f_im_data,f_im_c))
    if mask:
        mask_data -= f_im_data
        data_dict.append((mask_data,mask_c))
    Y = map_data_to_value(data_dict)

    return X, Y

def binarize_mask(data, f_im_c, mask_c, im_key):
    data_dict = {
        im_key: data.copy(),
    }
    data_dict[im_key][(data_dict[im_key]==f_im_c)] = 1
    for v in data_dict.values():
        v[v==mask_c] = 0

    return data_dict

def append_data(
    df_data,
    hcp_mapping,
    map_data,
    power_im_data,
    pd_im_data, 
    q_id,
    experiment_label, 
    sub_id, 
    roi_fo,
    roi_task_id,
    task_id,
    im_str,
    im_f,
):
    """Create function to store vertex level data for each HCP ROI:
    - columns: [cohort_id, sub_ids, quadrant_id, hcp_roi, im_code, vertex_count, vertex_coordinates, f_im_BOLD_power, f_im_phase_delay]
        - roi_fo = region fractional overlap threshold
        - cohort_id = cohort_id of each dataset [3T/7T Normal/Vary]
            - sub_ids = sub_id of all ROIs in cohort
                - quadrant_id = each subject will have a quadrant_id (corresponding to quadrant stimulation)
                - hcp_roi = all HCP ROIs, convert L/R to express laterality
                    - CONTRA/IPSI
                    - im_code = each `hcp_roi` will have a ROI corresponding to f1, f2 or both (f1Uf2)
                        - vertex_count = each `im_code` will have a vertex_count
                        - vertex_coordinates = each `im_code` will have coordinates to all its vertices
                        - f_im_BOLD_power = each `im_code` will have a np.array of power values corresponding to each vertex
                        - f_im_phase_delay = each `im_code` will have a np.array of phase delay values corresponding to each vertex
    """
    for im_code, f_data in map_data.items():
        for roi_label, roi_path in hcp_mapping.items():
            if q_id == "Q1":
                contra = "L_"
            elif q_id == "Q2":
                contra = "R_"
            else:
                raise ValueError(f"{q_id} not supported.")

            if roi_label.startswith(contra):
                roi_label = f"CONTRA_{roi_label[2:-4]}"
            else:
                roi_label = f"IPSI_{roi_label[2:-4]}"

            roi_mask = read_roi_path(roi_path)
            assert roi_mask.shape == f_data.shape

            hcp_and_f_roi = roi_mask * f_data
            vertex_coordinates = np.where(hcp_and_f_roi == 1)
            vertex_count = hcp_and_f_roi.sum()
            if vertex_count == 0:
                continue

            if roi_task_id == "control":
                f_im_BOLD_power = None
            elif power_im_data is None:
                f_im_BOLD_power = None
            else:
                f_im_BOLD_power = power_im_data[hcp_and_f_roi==1]
            if task_id != roi_task_id:
                f_im_phase_delay = None
            else:
                f_im_phase_delay = pd_im_data[hcp_and_f_roi==1]

            df_data["roi_task_id"].append(roi_task_id)
            df_data["task_id"].append(task_id)
            df_data["roi_fo"].append(roi_fo)
            df_data["experiment_id"].append(experiment_label)
            df_data["sub_id"].append(sub_id)
            df_data["quadrant_id"].append(q_id)
            df_data["hcp_roi"].append(roi_label)
            df_data["im_code"].append(im_code)
            df_data["vertex_count"].append(vertex_count)
            df_data["vertex_coordinates"].append(vertex_coordinates)
            df_data["f_im_BOLD_power"].append(f_im_BOLD_power)
            df_data["f_im_phase_delay"].append(f_im_phase_delay)
            df_data["im_type"].append(im_str)
            df_data["im_f"].append(im_f)

    return df_data

def calculate_percentile(values, percentile):
    if not values:
        print(values)
        raise ValueError("The list of values cannot be empty.")
    if not (0 <= percentile <= 1):
        raise ValueError("Percentile must be between 0 and 1.")
    
    sorted_values = sorted(values)
    N = len(sorted_values)
    R = percentile * (N - 1)
    
    lower_index = int(R)
    upper_index = min(lower_index + 1, N - 1)
    fraction = R - lower_index
    
    return sorted_values[lower_index] + (sorted_values[upper_index] - sorted_values[lower_index]) * fraction


def generate_single_subject_maps(
    label, experiment_id, mri_id, sub_ids, 
    roi_task_ids, im_frequencies, hcp_labels,
    df_data=None,
    proportion_data=None,
    corr_type="uncp", 
    ROI_FO=.8, SUB_THRESHOLD=.5,
    LEFT=590, TOP=80, RIGHT=1140, BOTTOM=460, VERTEX_TO = 59412,
    FORCE_TASK_ID=None,
    mask_c = .41,
    PALETTE="power_surf"
):
    
    if df_data is None:
        df_data = defaultdict(list)

    if proportion_data is None:
        proportion_data = defaultdict(list)

    for ix, (sub_id, roi_task_id) in enumerate(zip(sub_ids,roi_task_ids)):

        if FORCE_TASK_ID is None:
            _roi_task_id = roi_task_id
        else:
            _roi_task_id = FORCE_TASK_ID
        """ 
        im_frequencies = {
            "first_order": [("f1",.125), ...],
            "second_order": [("f2-f1",.075), ...],
            "third_order": [("2f1-f2",.05), ...],
        }
        """
        for im_order, v in im_frequencies.items():
            im_strs = [i[0] for i in v]
            im_fs = [i[1] for i in v]
            for im_str, im_f in zip(im_strs, im_fs):
                png_out = Path(set_base_dir(f"./ComputeCanada/frequency_tagging/figures/im_frequency_mapping")) / f"label-{label}_mri-{mri_id}_sub-{sub_id}_task-{roi_task_id}_f-{im_order}-{im_str}-{im_f}_corr-{corr_type}_fo-{ROI_FO}.png"
                dscalar_out = Path(set_base_dir(f"./ComputeCanada/frequency_tagging/figures/im_frequency_mapping_cifti")) / f"label-{label}_mri-{mri_id}_sub-{sub_id}_task-{roi_task_id}_f-{im_order}-{im_str}-{im_f}_corr-{corr_type}_fo-{ROI_FO}.dtseries.nii"
                if png_out.exists():
                    pass

                f_1_str = im_frequencies["first_order"][0][0]
                f_2_str = im_frequencies["first_order"][1][0]
                f_1 = im_frequencies["first_order"][0][1]
                f_2 = im_frequencies["first_order"][1][1]
                assert f_2 > f_1, f"{f_2} <= {f_1}"
                f_1 = find_activations(experiment_id, mri_id, roi_task_id, f_1, .8, sub_id, match_str="activations.dtseries.nii", corr_type=corr_type)
                f_2 = find_activations(experiment_id, mri_id, roi_task_id, f_2, .8, sub_id, match_str="activations.dtseries.nii", corr_type=corr_type)
                f_im = find_activations(experiment_id, mri_id, roi_task_id, im_f, .8, sub_id, match_str="activations.dtseries.nii", corr_type=corr_type)
                mask = find_activations(experiment_id, mri_id, roi_task_id, im_f, .8, sub_id, match_str="mask.dtseries.nii", corr_type=corr_type)
                pd_im = find_activations(experiment_id, mri_id, roi_task_id, im_f, .8, sub_id, data_split_id = "train", match_str="phasedelay.dtseries.nii", additional_match_strs=[roi_task_id,f"f-{im_f}"], corr_type=corr_type)
                power_im = find_activations(experiment_id, mri_id, roi_task_id, im_f, .8, sub_id, data_split_id = "test", match_str="power.dtseries.nii", additional_match_strs=[_roi_task_id,f"f-{im_f}"], corr_type=corr_type)
                for f_label, f in zip([f_1_str,f_2_str,im_str,"mask",f"pd_{im_str}",f"power_{im_str}"], [f_1,f_2,f_im,mask,pd_im,power_im]):
                    if roi_task_id == "control" and experiment_id == "1_frequency_tagging":
                        if f_label in [im_str,"mask"]:
                            assert len(f) == 1, f"{sub_id}, {f_label} - {f}"
                    else: # AssertionError: 2f2/0.4, 002, power_2f2 - [], 1_frequency_tagging entrain
                        if not f_label.startswith("power"):
                            assert len(f) == 1, f"{im_str}/{im_f}, {sub_id}, {f_label} - {f}, {experiment_id} {roi_task_id}"
                f_1 = f_1[0] # f_1 path
                f_2 = f_2[0] # f_2 path
                f_im = f_im[0] # f_im path
                # Create image for im only
                data = convert_f_im(f_im, fo=ROI_FO, mask=mask[0], f_im_c=f_im_c, mask_c=mask_c)
                data = data[:VERTEX_TO]
                # Create image for im, contextualized by f1 and f2
                data_contextualized_1, data_contextualized_2 = convert_f_im_with_f1_f2(f_im, f_1, f_2, fo=ROI_FO, mask=mask[0], f_1_c=f_1_c, f_2_c=f_2_c, f1f2_c=f1f2_c, f_im_c=f_im_c, mask_c=mask_c)
                data_contextualized_1 = data_contextualized_1[:VERTEX_TO]
                data_contextualized_2 = data_contextualized_2[:VERTEX_TO]

                # Get total vertex count
                c_per_label = [f_1_c,f_2_c,f1f2_c,f_im_c]
                for hcp_label in hcp_labels:
                    hcp_roi_mask = read_roi_path(f"/tmp/{hcp_label}.dscalar.nii")
                    vertex_in_hcp_roi = hcp_roi_mask.sum()
                    _data_contextualized_1 = data_contextualized_1 * hcp_roi_mask
                    total_vertex = 0
                    for _f_c in c_per_label:
                        total_vertex += (_data_contextualized_1 == _f_c).sum()
                    total_vertex_with_slab = total_vertex + (_data_contextualized_1 == mask_c).sum()
                    # Get proportion per label
                    if total_vertex > 0:
                        vertex_label_dict = {}
                        f_labels = ["f1","f2","f1&f2","fim"]
                        for _f_label, _f_c in zip(f_labels, c_per_label):
                            vertex_label_dict[_f_label] = (_data_contextualized_1 == _f_c).sum() / total_vertex
                        vertex_label_dict["hcp_label"] = hcp_label
                        vertex_label_dict["activated_vertex_count"] = total_vertex
                        vertex_label_dict["slab_vertex_count"] = total_vertex_with_slab
                        vertex_label_dict["hcp_vertex_count"] = vertex_in_hcp_roi
                        vertex_label_dict["sub_id"] = sub_id
                        vertex_label_dict["im_code"] = im_str
                        vertex_label_dict["roi_task_id"] = roi_task_id
                        vertex_label_dict["experiment"] = label
                        for k,v in vertex_label_dict.items():
                            proportion_data[k].append(v)

                map_data = binarize_mask(data,f_im_c,mask_c,im_str)
                pd_im_data = load_mean_dtseries(pd_im[0])[:VERTEX_TO]
                # Power metrics were not calculated for control task condition (no voxels allocated to task-control ROIs)
                if roi_task_id == "control" and experiment_id == "1_frequency_tagging":
                    power_im_data = None
                # Power metrics were not calculated for some IM frequencies where voxels were not allocated
                elif len(power_im)==0:
                    power_im_data = None
                else:
                    power_im_data = load_mean_dtseries(power_im[0])[:VERTEX_TO]
                q_id = get_quadrant_id(mask[0])
                df_data = append_data(
                    df_data, 
                    hcp_mapping, 
                    map_data, 
                    power_im_data, 
                    pd_im_data, 
                    q_id,
                    label, 
                    sub_id,
                    ROI_FO,
                    roi_task_id,
                    _roi_task_id,
                    im_str,
                    im_f,
                )
                palette_params = {
                    "disp-zero": False,
                    "disp-neg": True,
                    "disp-pos": True,
                    "pos-user": (0, 1.),
                    "neg-user": (-1,0),
                    "interpolate": True,
                }
                # Save f1f2 map as dtseries
                f_im_img = nib.load(f_im)
                dscalar_to_save_as_cifti = np.zeros((1,f_im_img.shape[-1]))
                dscalar_to_save_as_cifti[0,:VERTEX_TO] = data
                f_im_img = nib.Cifti2Image(dscalar_to_save_as_cifti, header=f_im_img.header)
                f_im_img.header.matrix[0].number_of_series_points = 1
                nib.save(f_im_img, dscalar_out)
                dscalar(
                    png_out, data, 
                    orientation="portrait", 
                    hemisphere='right',
                    palette=PALETTE, 
                    palette_params=palette_params,
                    transparent=False,
                    flatmap=True,
                    flatmap_style='plain',
                )
                #crop_and_save(png_out, str(png_out).replace("png", "cropped.png"), LEFT, TOP, RIGHT, BOTTOM)
                # Save f1f2 map (contextualized) as dtseries
                png_out = str(png_out).replace(".png", "_contextualized.png")
                dscalar(
                    png_out, data_contextualized_1, 
                    orientation="portrait", 
                    hemisphere='right',
                    palette=PALETTE, 
                    palette_params=palette_params,
                    transparent=False,
                    flatmap=True,
                    flatmap_style='plain',
                )
                png_out = str(png_out).replace("_contextualized.png", "_contextualized_include_missing.png")
                dscalar(
                    png_out, data_contextualized_2,
                    orientation="portrait", 
                    hemisphere='right',
                    palette=PALETTE, 
                    palette_params=palette_params,
                    transparent=False,
                    flatmap=True,
                    flatmap_style='plain',
                )
                if power_im_data is not None:
                    if len([i for i in power_im_data[power_im_data>0]])==0:
                        continue
                    png_out = str(png_out).replace("_contextualized_include_missing.png", "_bold_power.png")
                    percentile = .8
                    ub = calculate_percentile([i for i in power_im_data[power_im_data>0]], percentile)
                    print(f"Percentile [{percentile}]: {ub}")
                    palette_params = {
                        "disp-zero": False,
                        "disp-neg": False,
                        "disp-pos": True,
                        "pos-user": (0, ub),
                        "neg-user": (-1,0),
                        "interpolate": True,
                    }
                    dscalar(
                        png_out, power_im_data,
                        orientation="portrait", 
                        hemisphere='right',
                        palette="videen_style", 
                        palette_params=palette_params,
                        transparent=False,
                        flatmap=True,
                        flatmap_style='plain',
                    )
                    png_out = str(png_out).replace("_bold_power.png", "_phase_delay.png")
                    ub = 1/im_f
                    palette_params = {
                        "disp-zero": False,
                        "disp-neg": False,
                        "disp-pos": True,
                        "pos-user": (0, ub),
                        "neg-user": (-1,0),
                        "interpolate": True,
                    }
                    pd_im_data[power_im_data <= 0] = 0
                    dscalar(
                        png_out, pd_im_data,
                        orientation="portrait", 
                        hemisphere='right',
                        palette="videen_style", 
                        palette_params=palette_params,
                        transparent=False,
                        flatmap=True,
                        flatmap_style='plain',
                    )
                #crop_and_save(png_out, str(png_out).replace("png", "cropped.png"), LEFT, TOP, RIGHT, BOTTOM)
                
                track = [len(v) for k,v in df_data.items()]
                print(track)

    return df_data, proportion_data

In [5]:
# Directories
scratch_dir = Path("/scratch/fastfmri")
immapdir = scratch_dir / "im_map"
if not immapdir.exists():
    immapdir.mkdir(exist_ok=True, parents=True)

Save visualizations
- 3T normal (.125/.2)
- 7T normal (.125/.2)
- 3T varying frequencies
- 7T varying frequencies

In [6]:
"""Set up for visualizing dual frequency tagging across each subject using fractional overlap
"""
PALETTE = "power_surf"
f_1_c = -.1 # red
f_2_c = .82 # blue
f1f2_c = .14 # gold
f_im_c = .52 # .52 cyan
f_1_c_not_im = .6 # orange
f_2_c_not_im = .65 # purple
f1f2_c_not_im = .75 # limegreen
mask_c = .41 # black

ROI_FOS = [.8]
corr_type = "uncp"
FORCE_RUN = False

"""Save png
"""
immap_pkl = immapdir / f"im_map_corr-{corr_type}.pkl"
immap_proportion_pkl = immapdir / f"im_map_proportion_corr-{corr_type}.pkl"
if immap_pkl.exists() and immap_proportion_pkl.exists() and not FORCE_RUN:
    df = pd.read_pickle(immap_pkl)
    proportion_df = pd.read_pickle(immap_proportion_pkl)
else:
    # 3T control under entrain condition (set this to get power measurements with entrain ROIs)
    label = "3TNormal"
    df_data = None
    proportion_data = None
    for _roi_task_id in ["entrain"]:
        for ROI_FO in ROI_FOS:
            experiment_id = "1_frequency_tagging" 
            mri_id = "3T"
            sub_ids = ["000", "002", "003", "004", "005", "006", "007", "008", "009"] 
            roi_task_ids = [_roi_task_id] * len(sub_ids)
            roi_f_1s = [.125] * len(sub_ids)
            roi_f_2s = [.2] * len(sub_ids)
            for sub_id, roi_task_id, roi_f_1, roi_f_2 in zip(sub_ids, roi_task_ids, roi_f_1s, roi_f_2s):
                im_frequencies = get_im_frequencies(roi_f_1, roi_f_2)
                df_data, proportion_data = generate_single_subject_maps(
                    label, experiment_id, mri_id, [sub_id], 
                    [roi_task_id], im_frequencies, HCP_LABELS,
                    df_data=df_data,
                    proportion_data=proportion_data,
                    corr_type=corr_type,
                    ROI_FO=ROI_FO, SUB_THRESHOLD=.5,
                    FORCE_TASK_ID="control"
                )
                import pdb; pdb.set_trace()
    # 3T normal
    label = "3TNormal"
    for _roi_task_id in ["entrain"]:
        for ROI_FO in ROI_FOS:
            experiment_id = "1_frequency_tagging" 
            mri_id = "3T"
            sub_ids = ["000", "002", "003", "004", "005", "006", "007", "008", "009"] 
            roi_task_ids = [_roi_task_id] * len(sub_ids)
            roi_f_1s = [.125] * len(sub_ids)
            roi_f_2s = [.2] * len(sub_ids)
            for sub_id, roi_task_id, roi_f_1, roi_f_2 in zip(sub_ids, roi_task_ids, roi_f_1s, roi_f_2s):
                im_frequencies = get_im_frequencies(roi_f_1, roi_f_2)
                df_data, proportion_data = generate_single_subject_maps(
                    label, experiment_id, mri_id, [sub_id],
                    [roi_task_id], im_frequencies, HCP_LABELS,
                    df_data=df_data,
                    proportion_data=proportion_data,
                    corr_type=corr_type,
                    ROI_FO=ROI_FO, SUB_THRESHOLD=.5
                )
    # 7T normal
    label = "7TNormal"
    for ROI_FO in ROI_FOS:
        experiment_id = "1_attention" 
        mri_id = "7T"
        sub_ids = ["Pilot001", "Pilot009", "Pilot010", "Pilot011"]
        roi_task_ids = ["AttendAway"] * len(sub_ids)
        roi_f_1s = [.125] * len(sub_ids)
        roi_f_2s = [.2] * len(sub_ids)
        for sub_id, roi_task_id, roi_f_1, roi_f_2 in zip(sub_ids, roi_task_ids, roi_f_1s, roi_f_2s):
            im_frequencies = get_im_frequencies(roi_f_1, roi_f_2)
            df_data, proportion_data = generate_single_subject_maps(
                label, experiment_id, mri_id, [sub_id],
                [roi_task_id], im_frequencies, HCP_LABELS,
                df_data=df_data,
                proportion_data=proportion_data,
                corr_type=corr_type,
                ROI_FO=ROI_FO, SUB_THRESHOLD=.5
            )
    # 3T vary
    label = "3TVary"
    for ROI_FO in ROI_FOS:
        experiment_id = "1_frequency_tagging"
        mri_id = "3T"
        sub_ids = ["020"] * 3 + ["021"] * 3
        roi_task_ids = [f"entrain{i}" for i in ["A", "B", "C", "D", "E", "F"]]
        roi_f_1s = [.125] * 3 + [.125, .15, .175]
        roi_f_2s = [.2, .175, .15] + [.2] * 3
        for sub_id, roi_task_id, roi_f_1, roi_f_2 in zip(sub_ids, roi_task_ids, roi_f_1s, roi_f_2s):
            im_frequencies = get_im_frequencies(roi_f_1, roi_f_2)
            df_data, proportion_data = generate_single_subject_maps(
                label, experiment_id, mri_id, [sub_id],
                [roi_task_id], im_frequencies, HCP_LABELS,
                df_data=df_data,
                proportion_data=proportion_data,
                corr_type=corr_type,
                ROI_FO=ROI_FO, SUB_THRESHOLD=.5
            )
    # 7T vary
    label = "7TVary"
    for ROI_FO in ROI_FOS:
        experiment_id = "1_frequency_tagging"
        mri_id = "7T"
        sub_ids = ["020"] * 3 + ["021"] * 3
        roi_task_ids = [f"entrain{i}" for i in ["A", "B", "C", "D", "E", "F"]]
        roi_f_1s = [.125] * 3 + [.125, .15, .175]
        roi_f_2s = [.2, .175, .15] + [.2] * 3
        for sub_id, roi_task_id, roi_f_1, roi_f_2 in zip(sub_ids, roi_task_ids, roi_f_1s, roi_f_2s):
            im_frequencies = get_im_frequencies(roi_f_1, roi_f_2)
            df_data, proportion_data = generate_single_subject_maps(
                label, experiment_id, mri_id, [sub_id],
                [roi_task_id], im_frequencies, HCP_LABELS, 
                df_data=df_data,
                proportion_data=proportion_data,
                corr_type=corr_type,
                ROI_FO=ROI_FO, SUB_THRESHOLD=.5
            )

    df = pd.DataFrame(df_data)
    df.to_pickle(immap_pkl)
    proportion_df = pd.DataFrame(proportion_data)
    proportion_df.to_pickle(immap_proportion_pkl)

from IPython.display import clear_output
clear_output()

In [None]:
im_codes = [i for i in df.im_code.unique()]
FONTSIZE = 6
LINEWIDTH = 1.
roi_c_dict = get_roi_colour_codes()
text_dict = get_frequency_text_codes()

fig, ax_dict = plt.subplot_mosaic(
    [[i] for i in im_codes],
    layout="constrained",
    figsize=(6,10),
    dpi=600,
)

for im_code, ax in ax_dict.items():
    _df = proportion_df[(proportion_df.im_code==im_code)]
    _df.drop_duplicates(subset=["im_code","hcp_label","sub_id","roi_task_id","experiment"], inplace=True)

    _df["f1_count"] = _df["activated_vertex_count"] * _df["f1"]
    _df["f2_count"] = _df["activated_vertex_count"] * _df["f2"]
    _df["f1&f2_count"] = _df["activated_vertex_count"] * _df["f1&f2"]
    _df["fim_count"] = _df["activated_vertex_count"] * _df["fim"]
    _df = _df.groupby(["sub_id","experiment","roi_task_id"])[["f1_count","f2_count","f1&f2_count","fim_count"]].sum().reset_index()
    _df["all_count"] = _df["f1_count"] + _df["f2_count"] + _df["f1&f2_count"] + _df["fim_count"]
    for col_id in ["f1_count","f2_count","f1&f2_count","fim_count"]:
        _df[col_id] = _df[col_id] / _df["all_count"]
    
    sub_ids_info = _df.sub_id.values
    roi_task_info = _df.roi_task_id.values
    experiment_info = _df.experiment.values

    for i, (row_ix, row) in enumerate(_df[["f1_count","f2_count","f1&f2_count","fim_count"]].iterrows()):
        p_f1 = row["f1_count"]
        p_f2 = row["f2_count"]
        p_fim = row["fim_count"]
        p_f1f2 = row["f1&f2_count"]

        end_i = i+1

        p_cumu_1 = 0
        p_cumu_2 = 0
        for ix, (_p, c) in enumerate(zip([p_f1, p_f2, p_f1f2, p_fim],[roi_c_dict["f1"],roi_c_dict["f2"],roi_c_dict["f1f2"],"cyan"])):
            p_cumu_2 += _p
            ax.fill_between([i, end_i], [p_cumu_1]*2, [p_cumu_2]*2, color=c, alpha=1.)
            p_cumu_1 += _p
            
        for ix in range(_df.shape[0]):
            if ix == 0:
                ax.plot([0,0],[0,1],c='k',linestyle='-',linewidth=LINEWIDTH,zorder=1)
            ax.plot([ix+1,ix+1],[0,1],c='k',linestyle='-',linewidth=LINEWIDTH,zorder=1)
        ax.plot([0,_df.shape[0]],[0,0],c='k',linestyle='-',linewidth=LINEWIDTH,zorder=1)
        ax.plot([0,_df.shape[0]],[1,1],c='k',linestyle='-',linewidth=LINEWIDTH,zorder=1)
        for _spine in ["right","bottom","top","left"]:
            ax.spines[_spine].set_visible(False)

        ax.set_ylim(0,1)
        ax.set_xlim(0,_df.shape[0])

        ax.set_xticks([i+.5 for i in range(_df.shape[0])])
        ax.set_xticklabels([f"{sub_id}/{roi_task_id}\n{experiment_id}" for sub_id, roi_task_id, experiment_id in zip(sub_ids_info, roi_task_info, experiment_info)], fontsize=FONTSIZE-2, rotation=90)
        ax.set_yticks([0,1])
        ax.set_yticklabels([0,1], fontsize=FONTSIZE-2)
        ax.set_title(text_dict[im_code], fontsize=FONTSIZE)

        ax.tick_params("both",pad=0,width=LINEWIDTH,length=2)

fig.savefig("im_mapping_proportion.png",dpi=600)

Generate control ROIs

In [8]:

immap_control_pkl = immapdir / f"im_map_control_corr-{corr_type}.pkl"
immap_control_proportion_pkl = immapdir / f"im_map_control_proportion_corr-{corr_type}.pkl"
if immap_control_pkl.exists() and immap_control_proportion_pkl.exists():
    control_df = pd.read_pickle(immap_control_pkl)
    control_proportion_df = pd.read_pickle(immap_control_proportion_pkl)
else:
    label = "3TControl"
    control_df_data = None
    control_proportion_data = None
    for _roi_task_id in ["control"]:
        for ROI_FO in ROI_FOS:
            experiment_id = "1_frequency_tagging" 
            mri_id = "3T"
            sub_ids = ["000", "002", "003", "004", "005", "006", "007", "008", "009"] 
            roi_task_ids = [_roi_task_id] * len(sub_ids)
            roi_f_1s = [.125] * len(sub_ids)
            roi_f_2s = [.2] * len(sub_ids)
            for sub_id, roi_task_id, roi_f_1, roi_f_2 in zip(sub_ids, roi_task_ids, roi_f_1s, roi_f_2s):
                im_frequencies = get_im_frequencies(roi_f_1, roi_f_2)
                control_df_data, control_proportion_data = generate_single_subject_maps(
                    label, experiment_id, mri_id, [sub_id],
                    [roi_task_id], im_frequencies, HCP_LABELS,
                    df_data=control_df_data,
                    proportion_data=control_proportion_data,
                    corr_type=corr_type,
                    ROI_FO=ROI_FO, SUB_THRESHOLD=.5
                )
            
    control_df = pd.DataFrame(control_df_data)
    control_df.to_pickle(immap_control_pkl)
    control_proportion_df = pd.DataFrame(control_proportion_data)
    control_proportion_df.to_pickle(immap_control_proportion_pkl)

In [None]:
from functools import lru_cache
geodesic_arr = np.array(nib.load(geodesic_dscalar).get_fdata())
@lru_cache(maxsize=180)
def _get_geodesic_distance(hcp_label):
    hcp_label = hcp_label.split("_")[1]
    l_hcp_path = f"/tmp/L_{hcp_label}_ROI.dscalar.nii"
    r_hcp_path = f"/tmp/R_{hcp_label}_ROI.dscalar.nii"
    geo_l_data = geodesic_arr[nib.load(l_hcp_path).get_fdata()==1].mean()
    geo_r_data = geodesic_arr[nib.load(r_hcp_path).get_fdata()==1].mean()

    return np.mean([geo_l_data, geo_r_data])

def get_laterality(df,sub_id, experiment_id, hcp_label):

    quadrant_id = df[(df.sub_id==sub_id) & (df.experiment_id==experiment_id)].quadrant_id.unique()[0]
    if quadrant_id == "Q1" and hcp_label.startswith("L_"):
        return hcp_label.replace("L_","CONTRA_",1)
    elif quadrant_id == "Q1" and hcp_label.startswith("R_"):
        return hcp_label.replace("R_","IPSI_",1)
    elif quadrant_id == "Q2" and hcp_label.startswith("L_"):
        return hcp_label.replace("L_","IPSI_",1)
    elif quadrant_id == "Q2" and hcp_label.startswith("R_"):
        return hcp_label.replace("R_","CONTRA_",1)
    else:
        raise ValueError(f"{quadrant_id} / {hcp_label} not supported.")

def plot_pie(ax,values,x,y,s,colors): 
    ax.pie(values, center=(x,y), radius=s, colors=colors)

def get_pie_info_per_hcp_and_im_label(sub_proportion_df,im_code, hcp_label_normalized):
    X = sub_proportion_df[(sub_proportion_df.im_code==im_code) & (sub_proportion_df.hcp_label_normalized==hcp_label_normalized)]
    nrows = X.shape[0]
    if nrows in [1,2]:
        if nrows == 2:
            X = X.iloc[[0]]
        f1_proportion = X["f1"].values[0]
        f2_proportion = X["f2"].values[0]
        f1_and_f2_proportion = X["f1&f2"].values[0]
        fim_proportion = X["fim"].values[0]
        activated_vertex_count = X.activated_vertex_count.values[0]
        hcp_vertex_count = X.hcp_vertex_count.values[0]
        slab_vertex_count = X.slab_vertex_count.values[0]
        assert hcp_vertex_count >= slab_vertex_count
        not_slab_vertex_count = hcp_vertex_count - slab_vertex_count
        not_activated_vertex_count = slab_vertex_count - activated_vertex_count
        slab_vertex_count = slab_vertex_count - activated_vertex_count
        assert activated_vertex_count + slab_vertex_count + not_slab_vertex_count == hcp_vertex_count, f"{activated_vertex_count} + {slab_vertex_count} + {not_slab_vertex_count} != {hcp_vertex_count}"
        f1_vertex_count = f1_proportion * activated_vertex_count
        f2_vertex_count = f2_proportion * activated_vertex_count
        f1_and_f2_vertex_count = f1_and_f2_proportion * activated_vertex_count
        fim_vertex_count = fim_proportion * activated_vertex_count
        assert activated_vertex_count == f1_vertex_count + f2_vertex_count + f1_and_f2_vertex_count + fim_vertex_count
        
        return [f1_vertex_count, f2_vertex_count, f1_and_f2_vertex_count, fim_vertex_count, slab_vertex_count, not_slab_vertex_count], hcp_vertex_count, X.geodesic_distance.values[0]

    elif nrows == 0:
        return 0
        
    else:
        raise ValueError(f"{hcp_label} {im_code} has {nrows} rows")

def plot_bar(ax, hcp_ix, x_offset, bar_width, values, colors,fractionate=False):

    assert len(values) == len(colors)

    sum_all = np.sum(values)

    if fractionate:
        values = [i/sum_all for i in values]
    
    bottom_value = 0
    for v, c in zip(values, colors):
        top_value = v + bottom_value
        x_start = hcp_ix + x_offset
        x_end = x_start + bar_width
        ax.fill_between([x_start,x_end], [bottom_value]*2, [top_value]*2, color=c, alpha=.8,linewidth=0)
        bottom_value += v

def plot_proportions(ax, proportion_df,experiment, sub_id, hemi_starts_with, im_codes_to_include, pie_colors, FONTSIZE, unique_hcp_rois=None, ymax=None):

    sub_proportion_df = proportion_df[(proportion_df.sub_id == sub_id) & (proportion_df.experiment == experiment) & (proportion_df.hcp_label_normalized.str.startswith(hemi_starts_with)) & (proportion_df.im_code.isin(im_codes_to_include))]

    if unique_hcp_rois is None:
        unique_hcp_rois = proportion_df[(proportion_df.hcp_label_normalized.str.startswith(hemi_starts_with))].hcp_label_normalized.unique()
    if ymax is None:
        ymax = sub_proportion_df.activated_vertex_count.max() * .8
    for hcp_ix, hcp_label_normalized in enumerate(unique_hcp_rois):
        x_offset = 0
        x_width = .2
        ax.plot([hcp_ix, hcp_ix+(x_width*4)],[0,0], color="k", linewidth=.4, alpha=1.,zorder=10)
        ax.text(hcp_ix+(x_width*2), ymax*1.05, hcp_label_normalized.split("_")[1], ha="center", fontsize=FONTSIZE-3,rotation=90,fontweight="bold")
        for ypos, line_c, im_code in zip([0,1,2,3],["k","k","r","k"], im_codes_to_include):
            pie_info = get_pie_info_per_hcp_and_im_label(sub_proportion_df,im_code, hcp_label_normalized)
            if pie_info == 0:
                x_offset += x_width
                continue
            
            ymax_plot = np.sum(pie_info[0][:4])
            ax.plot([hcp_ix+((x_width*ypos)+x_width/2)]*2,[0,ymax_plot], color=line_c, linewidth=.4, alpha=1.,zorder=10, linestyle="dotted")
            if im_code == "f1":
                ax.scatter(hcp_ix+((x_width*ypos)+x_width/2), ymax_plot + (5*ymax/100), marker='o', s=5, c='r', linewidths=0, zorder=20)
            if im_code == "f2":
                ax.scatter(hcp_ix+((x_width*ypos)+x_width/2), ymax_plot + (5*ymax/100), marker='o', s=5, c='b', linewidths=0, zorder=20)
            if im_code == "f2-f1":
                ax.scatter(hcp_ix+((x_width*ypos)+x_width/2), ymax_plot + (5*ymax/100), marker='v', s=15, c='g', linewidths=0, zorder=20)
            pie_values, hcp_vertex_count, gd = pie_info
            assert len(pie_values) == len(pie_colors)

            plot_bar(ax, hcp_ix, x_offset, x_width, pie_values, pie_colors)

            x_offset += x_width

    # style plot
    for _spine in ["top","right","bottom"]:
        ax.spines[_spine].set_visible(False)
    ax.spines["left"].set_bounds(0,ymax)
    ax.spines['left'].set_position(('data',-.4))
    ax.spines["left"].set_linewidth(.2)
    ax.set_ylim(-ymax*.01,ymax*1.2)
    ax.set_yticks([0,ymax])
    ax.set_yticklabels([f"{i:.1f}" for i in [0,ymax]], fontsize=FONTSIZE)
    ax.set_xticks([])
    ax.tick_params(axis="y",width=.2,pad=0,length=2,labelsize=FONTSIZE)
    ax.set_title(sub_id, fontsize=FONTSIZE)
    
    return ymax

proportion_df["hcp_label_normalized"] = proportion_df.apply(lambda row: get_laterality(df, row["sub_id"], row["experiment"], row["hcp_label"]), axis=1)
proportion_df["geodesic_distance"] = proportion_df["hcp_label"].apply(_get_geodesic_distance)
proportion_df = proportion_df.sort_values(by="geodesic_distance",ascending=True)

control_proportion_df["hcp_label_normalized"] = control_proportion_df.apply(lambda row: get_laterality(control_df, row["sub_id"], row["experiment"], row["hcp_label"]), axis=1)
control_proportion_df["geodesic_distance"] = control_proportion_df["hcp_label"].apply(_get_geodesic_distance)
control_proportion_df = control_proportion_df.sort_values(by="geodesic_distance",ascending=True)

FONTSIZE = 6
im_codes_to_include = ["f1","f2","f2-f1","2f1"]
pie_colors = ["r","b","yellow","cyan","grey","k"]

experiment = "3TNormal"
sub_ids = ["000","002","003","004","005","006","007","008","009"]
for hemi_starts_with in ["CONTRA_","IPSI_"]:
    unique_hcp_rois = proportion_df[(proportion_df.hcp_label_normalized.str.startswith(hemi_starts_with)) & (proportion_df.im_code.isin(im_codes_to_include))].hcp_label_normalized.unique()
    entrain_fig, entrain_ax_dict = plt.subplot_mosaic(mosaic=[[i] for i in sub_ids],layout="constrained", figsize=(4.5,6.6), dpi=300)
    control_fig, control_ax_dict = plt.subplot_mosaic(mosaic=[[i] for i in sub_ids],layout="constrained", figsize=(4.5,6.6), dpi=300)
    for sub_id in sub_ids:
        ax = entrain_ax_dict[sub_id]
        ymax = plot_proportions(ax, proportion_df, "3TNormal", sub_id, hemi_starts_with, im_codes_to_include, pie_colors, FONTSIZE,unique_hcp_rois=unique_hcp_rois)
        ax = control_ax_dict[sub_id]
        _ = plot_proportions(ax, control_proportion_df, "3TControl", sub_id, hemi_starts_with, im_codes_to_include, pie_colors, FONTSIZE,unique_hcp_rois=unique_hcp_rois,ymax=ymax)

    entrain_fig.savefig(f"{experiment}_{hemi_starts_with}entrain_composition.png",dpi=600)
    control_fig.savefig(f"{experiment}_{hemi_starts_with}control_composition.png",dpi=600)

Vertex count 3T data

In [None]:
roi_task_id = "entrain"
task_id = "entrain"
control_task_id = "control"
roi_fo = .8
FONTSIZE = 6

str_mapping = get_frequency_text_codes()

def count_vertices_from_df(df):
    vertex_count = 0
    for i in df.vertex_coordinates:
        vertex_count += i[0].shape[0]

    return vertex_count

def round_up_to_nearest_hundred(num, round_to=100):
    import math
    return math.ceil(num / round_to) * round_to

im_codes = [i for i in df.im_code.unique()]
sub_ids_3T = df[(df.experiment_id=='3TNormal')].sub_id.unique()

fig, ax_dict = plt.subplot_mosaic([im_codes], layout="constrained", figsize=(5, 1.), dpi=300)

for im_code, ax in ax_dict.items():
    _entrain_data = []
    _control_data = []
    for sub_id in sub_ids_3T:
        _df = df[(df.sub_id==sub_id) & (df.roi_task_id==roi_task_id) & (df.task_id==task_id) & (df.roi_fo==roi_fo) & (df.im_code==im_code)]
        _control_df = control_df[(control_df.sub_id==sub_id) & (control_df.roi_task_id==control_task_id) & (control_df.task_id==control_task_id) & (control_df.roi_fo==roi_fo) & (control_df.im_code==im_code)]
        entrain_count = count_vertices_from_df(_df)
        control_count = count_vertices_from_df(_control_df)
        ax.scatter(1, entrain_count, color='cyan', s=20, edgecolors='k',linewidths=.4,zorder=12)
        ax.scatter(0, control_count, color='grey', s=20, edgecolors='k',linewidths=.4,zorder=12)
        ax.plot([0,1], [control_count, entrain_count], color='k', linestyle='dotted', linewidth=.4, zorder=10)
        _entrain_data.append(entrain_count)
        _control_data.append(control_count)
    _data = _entrain_data + _control_data

    wilcoxon_fail = False
    try:
        test = wilcoxon(_entrain_data, _control_data, alternative="greater")
        print(im_code, test)
    except:
        wilcoxon_fail = True

    ax.set_title(str_mapping[im_code], fontsize=FONTSIZE)
    ax.set_xlim(-.4,1.4)
    ax.set_xticks([0,1])
    ax.set_xticklabels(["Control","Entrain"], fontsize=FONTSIZE,rotation=90)

    ymax = round_up_to_nearest_hundred(max(_data))
    ax.set_ylim(-ymax*.2,ymax*1.2)
    ax.set_yticks([0,ymax/2,ymax])
    ax.set_yticklabels([0,int(ymax/2),ymax],fontsize=FONTSIZE)
    ax.tick_params("both",pad=.2,width=.4)
    if test.pvalue < .05 and not wilcoxon_fail:
        ax.text(.5, max(_entrain_data)*1.15, "$*$", fontsize=FONTSIZE, ha="center", va="center")

    for _spine in ["right","bottom","top"]:
        ax.spines[_spine].set_visible(False)
    ax.spines.left.set_linewidth(.4)
    ax.spines.left.set_bounds(0,ymax)

ax_dict["f1"].set_ylabel("Vertex count", fontsize=FONTSIZE)

Vertex count vary entrainDEF

In [None]:
fig, ax_dict = plt.subplot_mosaic([im_codes], layout="constrained", figsize=(5, 1.), dpi=300)

for mri_id in ["3T","7T"]:
    marker_style = 'o'
    if mri_id=="7T":
        marker_style="^"
    for im_code, ax in ax_dict.items():
        entrain1_data = []
        entrain2_data = []
        entrain3_data = []
        entrain1 = df[(df.experiment_id==f"{mri_id}Vary") & (df.roi_task_id=="entrainD") & (df.im_code==im_code)]
        entrain2 = df[(df.experiment_id==f"{mri_id}Vary") & (df.roi_task_id=="entrainE") & (df.im_code==im_code)]
        entrain3 = df[(df.experiment_id==f"{mri_id}Vary") & (df.roi_task_id=="entrainF") & (df.im_code==im_code)]
        entrain1_data.append(count_vertices_from_df(entrain1))
        entrain2_data.append(count_vertices_from_df(entrain2))
        entrain3_data.append(count_vertices_from_df(entrain3))
        all_entrain_data = entrain1_data+entrain2_data+entrain3_data
        
        ax.scatter(np.zeros_like(entrain1_data)+0,entrain1_data,c="cyan",s=20,zorder=12,edgecolors='k',linewidths=.4,marker=marker_style)
        ax.scatter(np.zeros_like(entrain2_data)+1,entrain2_data,c="cyan",s=20,zorder=12,edgecolors='k',linewidths=.4,marker=marker_style)
        ax.scatter(np.zeros_like(entrain3_data)+2,entrain3_data,c="cyan",s=20,zorder=12,edgecolors='k',linewidths=.4,marker=marker_style)
    
        ax.set_title(str_mapping[im_code], fontsize=FONTSIZE)
        _max = np.max(all_entrain_data)
        if _max <= 100:
            round_to = 10
        else:
            round_to = 100
        ymax = round_up_to_nearest_hundred(np.max(all_entrain_data),round_to=round_to)
        ax.set_ylim(-ymax*.2,ymax*1.2)
        ax.set_yticks([0,ymax/2,ymax])
        ax.set_yticklabels([0,int(ymax/2),ymax],fontsize=FONTSIZE)
        ax.tick_params("both",pad=.2,width=.4)
        ax.set_xlim(-.4,2.4)
        ax.set_xticks([0,1,2])
        ax.set_xticklabels(["1","2","3"], fontsize=FONTSIZE,rotation=90)
        ax.tick_params("both",pad=.2,width=.4,labelsize=FONTSIZE)
        for _spine in ["right","bottom","top"]:
            ax.spines[_spine].set_visible(False)
        ax.spines.left.set_linewidth(.4)
        ax.spines.left.set_bounds(0,ymax)

ax_dict["f1"].set_ylabel("Voxel count", fontsize=FONTSIZE)


fig.savefig("K.png",dpi=600)

Vertex count vary entrainABC

In [None]:

fig, ax_dict = plt.subplot_mosaic([im_codes], layout="constrained", figsize=(5, 1.), dpi=300)

for mri_id in ["3T","7T"]:
    marker_style = 'o'
    if mri_id=="7T":
        marker_style="^"
    for im_code, ax in ax_dict.items():
        entrain1_data = []
        entrain2_data = []
        entrain3_data = []
        entrain1 = df[(df.experiment_id==f"{mri_id}Vary") & (df.roi_task_id=="entrainA") & (df.im_code==im_code)]
        entrain2 = df[(df.experiment_id==f"{mri_id}Vary") & (df.roi_task_id=="entrainB") & (df.im_code==im_code)]
        entrain3 = df[(df.experiment_id==f"{mri_id}Vary") & (df.roi_task_id=="entrainC") & (df.im_code==im_code)]
        entrain1_data.append(count_vertices_from_df(entrain1))
        entrain2_data.append(count_vertices_from_df(entrain2))
        entrain3_data.append(count_vertices_from_df(entrain3))
        all_entrain_data = entrain1_data+entrain2_data+entrain3_data
        
        ax.scatter(np.zeros_like(entrain1_data)+0,entrain1_data,c="cyan",s=20,zorder=12,edgecolors='k',linewidths=.4,marker=marker_style)
        ax.scatter(np.zeros_like(entrain2_data)+1,entrain2_data,c="cyan",s=20,zorder=12,edgecolors='k',linewidths=.4,marker=marker_style)
        ax.scatter(np.zeros_like(entrain3_data)+2,entrain3_data,c="cyan",s=20,zorder=12,edgecolors='k',linewidths=.4,marker=marker_style)
    
        ax.set_title(str_mapping[im_code], fontsize=FONTSIZE)
        _max = np.max(all_entrain_data)
        if _max <= 100:
            round_to = 10
        else:
            round_to = 100
        ymax = round_up_to_nearest_hundred(np.max(all_entrain_data),round_to=round_to)
        ax.set_ylim(-ymax*.2,ymax*1.2)
        ax.set_yticks([0,ymax/2,ymax])
        ax.set_yticklabels([0,int(ymax/2),ymax],fontsize=FONTSIZE)
        ax.tick_params("both",pad=.2,width=.4)
        ax.set_xlim(-.4,2.4)
        ax.set_xticks([0,1,2])
        ax.set_xticklabels(["1","2","3"], fontsize=FONTSIZE,rotation=90)
        ax.tick_params("both",pad=.2,width=.4,labelsize=FONTSIZE)
        for _spine in ["right","bottom","top"]:
            ax.spines[_spine].set_visible(False)
        ax.spines.left.set_linewidth(.4)
        ax.spines.left.set_bounds(0,ymax)

ax_dict["f1"].set_ylabel("Voxel count", fontsize=FONTSIZE)


fig.savefig("K.png",dpi=600)

DESIGN MATRIX 

im_types = f1, f2, f2-f1, f1+f2, 2f1, 2f2, 2f1-f2, 2f2-f1
- VERTEX_ID [row], sub_id, bold_power_[im_types], binary_[im_types], geodesic_distance_from_v1

In [None]:
assert df.roi_fo.unique().shape[0] == 1

roi_task_ids = []
task_ids = []
experiment_ids = []
sub_ids = []
quadrant_ids = []
hcp_rois = []
im_codes = []
im_fs = []
vertex_coordinates = []
vertex_powers = []

for row_ix, row in df.iterrows():
    vertex_coords = row.vertex_coordinates[0]
    power = row.f_im_BOLD_power
    if power is None or len(power) == 0:
        continue
    assert vertex_coords.shape[0] == power.shape[0]

    for _vertex, _power in zip(vertex_coords, power):
        vertex_powers.append(_power)
        vertex_coordinates.append(_vertex)
        im_fs.append(row.im_f)
        im_codes.append(row.im_code)
        hcp_rois.append(row.hcp_roi)
        quadrant_ids.append(row.quadrant_id)
        sub_ids.append(row.sub_id)
        experiment_ids.append(row.experiment_id)
        task_ids.append(row.task_id)
        roi_task_ids.append(row.roi_task_id)

expanded_df = pd.DataFrame(
    {
        "roi_task_id": roi_task_ids,
        "task_id": task_ids,
        "roi_fo": df.roi_fo.values[0],
        "experiment_id": experiment_ids,
        "sub_id": sub_ids,
        "quadrant_id": quadrant_ids,
        "hcp_roi": hcp_rois,
        "im_code": im_codes,
        "im_f": im_fs,
        "power": vertex_powers,
        "vertex_id": vertex_coordinates
    }
)
# Add hemi column
expanded_df["hemi"] = np.where(expanded_df.hcp_roi.str.startswith("CONTRA"), 1,0)
# Add geodesic distance column
from functools import lru_cache
@lru_cache(maxsize=180)
def _get_geodesic_distance(hcp_label):
    l_hcp_path = f"/tmp/L_{hcp_label}_ROI.dscalar.nii"
    r_hcp_path = f"/tmp/R_{hcp_label}_ROI.dscalar.nii"
    geo_l_data = geodesic_arr[nib.load(l_hcp_path).get_fdata()==1].mean()
    geo_r_data = geodesic_arr[nib.load(r_hcp_path).get_fdata()==1].mean()

    return np.mean([geo_l_data, geo_r_data])

def get_geodesic_distance(row):
    return _get_geodesic_distance(row.hcp_roi.split("_")[-1])

"""
def get_geodesic_distance(row):
    return geodesic_arr[0,row.vertex_id]
"""

geodesic_arr = np.array(nib.load(geodesic_dscalar).get_fdata())
expanded_df["geodesic_distance"] = expanded_df.apply(get_geodesic_distance, axis=1,)
expanded_df["experiment_sub_id"] = expanded_df.experiment_id + "_" + expanded_df.sub_id + "_" + expanded_df.task_id

expanded_df

In [None]:
assert control_df.roi_fo.unique().shape[0] == 1

roi_task_ids = []
task_ids = []
experiment_ids = []
sub_ids = []
quadrant_ids = []
hcp_rois = []
im_codes = []
im_fs = []
vertex_coordinates = []

for row_ix, row in control_df.iterrows():
    vertex_coords = row.vertex_coordinates[0]

    for _vertex in vertex_coords:
        vertex_coordinates.append(_vertex)
        im_fs.append(row.im_f)
        im_codes.append(row.im_code)
        hcp_rois.append(row.hcp_roi)
        quadrant_ids.append(row.quadrant_id)
        sub_ids.append(row.sub_id)
        experiment_ids.append(row.experiment_id)
        task_ids.append(row.task_id)
        roi_task_ids.append(row.roi_task_id)

control_expanded_df = pd.DataFrame(
    {
        "roi_task_id": roi_task_ids,
        "task_id": task_ids,
        "roi_fo": control_df.roi_fo.values[0],
        "experiment_id": experiment_ids,
        "sub_id": sub_ids,
        "quadrant_id": quadrant_ids,
        "hcp_roi": hcp_rois,
        "im_code": im_codes,
        "im_f": im_fs,
        "vertex_id": vertex_coordinates
    }
)
# Add hemi column
control_expanded_df["hemi"] = np.where(control_expanded_df.hcp_roi.str.startswith("CONTRA"), 1,0)

geodesic_arr = np.array(nib.load(geodesic_dscalar).get_fdata())
control_expanded_df["geodesic_distance"] = control_expanded_df.apply(get_geodesic_distance, axis=1,)
control_expanded_df["experiment_sub_id"] = control_expanded_df.experiment_id + "_" + control_expanded_df.sub_id + "_" + control_expanded_df.task_id

control_expanded_df

In [None]:
import warnings
warnings.filterwarnings("ignore")

import statsmodels.api as sm
from statsmodels.formula.api import mixedlm
from scipy.stats import pearsonr

def get_formula(bold_power_type, bold_f_type_strs):
    formula_y = f"bold_power_{bold_power_type} ~ "
    all_variables = [f"binary_{i}" for i in bold_f_type_strs] + ["hemi","geodesic_distance"]
    all_variables = [i for i in all_variables if i != f"binary_{bold_power_type}"]
    assert len(all_variables)-1 == len(bold_f_type_strs)
    all_variables = " + ".join(all_variables)
    formula = f"{formula_y}{all_variables}"

    return formula


experiment_ids = ["3TNormal"]*18+["7TNormal"]*4+["3TVary"]*6+["7TVary"]*6
sub_ids = ["000","002","003","004","005","006","007","008","009"]*2+["Pilot001","Pilot009","Pilot010","Pilot011"]+["020"]*3+["021"]*3+["020"]*3+["021"]*3
roi_task_ids = ["entrain"]*18+["AttendAway"]*4+[f"entrain{i}" for i in ["A","B","C","D","E","F"]]*2
task_ids = ["control"]*9+["entrain"]*9+["AttendAway"]*4+[f"entrain{i}" for i in ["A","B","C","D","E","F"]]*2
bold_f_type_strs = ["f1","f2","f2_sub_f1","2f1"]

n_sub_ids = len(sub_ids)
n_bold_f_types = len(bold_f_type_strs)

fig, ax_dict = plt.subplot_mosaic([bold_f_type_strs], layout="constrained", figsize=(8,2),dpi=400)
predict_fig, predict_ax_dict = plt.subplot_mosaic([bold_f_type_strs], layout="constrained", figsize=(8,1),dpi=400)
geo_fig, geo_ax_dict = plt.subplot_mosaic([bold_f_type_strs], layout="constrained", figsize=(8,1),dpi=400)
hemi_fig, hemi_ax_dict = plt.subplot_mosaic([bold_f_type_strs], layout="constrained", figsize=(8,1),dpi=400)
spatial_f1_fig, spatial_f1_ax_dict = plt.subplot_mosaic([bold_f_type_strs], layout="constrained", figsize=(8,1),dpi=400)
spatial_f2_fig, spatial_f2_ax_dict = plt.subplot_mosaic([bold_f_type_strs], layout="constrained", figsize=(8,1),dpi=400)
spatial_f2_sub_f1_fig, spatial_f2_sub_f1_ax_dict = plt.subplot_mosaic([bold_f_type_strs], layout="constrained", figsize=(8,1),dpi=400)
spatial_2f1_fig, spatial_2f1_ax_dict = plt.subplot_mosaic([bold_f_type_strs], layout="constrained", figsize=(8,1),dpi=400)

mixed_glm_cohort_pvalues = np.zeros((len(bold_f_type_strs),n_bold_f_types+2))
im_code_df_dict = {}
for bold_power_type_ix, bold_power_type in enumerate(bold_f_type_strs):

    ax = ax_dict[bold_power_type]
    predict_ax = predict_ax_dict[bold_power_type]
    hemi_ax = hemi_ax_dict[bold_power_type]
    geo_ax = geo_ax_dict[bold_power_type]
    spatial_f1_ax = spatial_f1_ax_dict[bold_power_type]
    spatial_f2_ax = spatial_f2_ax_dict[bold_power_type]
    spatial_f2_sub_f1_ax = spatial_f2_sub_f1_ax_dict[bold_power_type]
    spatial_2f1_ax = spatial_2f1_ax_dict[bold_power_type]

    group_filtered_df = None
    cohort_pvalues = np.zeros((n_sub_ids,n_bold_f_types+2)) # +1 add hemi column, +1 add gd column
    for sub_ix, (experiment_id,sub_id,roi_task_id,task_id) in enumerate(zip(experiment_ids,sub_ids,roi_task_ids,task_ids)):

        # Reorganize
        sub_expanded_df = expanded_df[(expanded_df.sub_id==sub_id) & (expanded_df.experiment_id==experiment_id) & (expanded_df.roi_task_id==expanded_df.roi_task_id) & (expanded_df.task_id==task_id)]
        data_dict = defaultdict(list)
        for vertex_id in sub_expanded_df.vertex_id.unique():
            _df = sub_expanded_df[sub_expanded_df.vertex_id==vertex_id]
            bold_power_f1 = np.nan
            bold_power_f2 = np.nan
            bold_power_f2_sub_f1 = np.nan
            bold_power_2f1 = np.nan
            binary_f1 = 0
            binary_f2 = 0
            binary_f2_sub_f1 = 0
            binary_2f1 = 0
            for _, row in _df.iterrows():
                if row.im_code == "f1":
                    binary_f1 = 1
                    bold_power_f1 = row.power
                elif row.im_code == "f2":
                    binary_f2 = 1
                    bold_power_f2 = row.power
                elif row.im_code == "f2-f1":
                    binary_f2_sub_f1 = 1
                    bold_power_f2_sub_f1 = row.power
                elif row.im_code == "2f1":
                    binary_2f1 = 1
                    bold_power_2f1 = row.power
                else:
                    continue
                #else:
                    #raise ValueError(f"{row.im_code} not supported.")
            data_dict["bold_power_f1"].append(bold_power_f1)
            data_dict["bold_power_f2"].append(bold_power_f2)
            data_dict["bold_power_f2_sub_f1"].append(bold_power_f2_sub_f1)
            data_dict["bold_power_2f1"].append(bold_power_2f1)
            data_dict["binary_f1"].append(binary_f1)
            data_dict["binary_f2"].append(binary_f2)
            data_dict["binary_f2_sub_f1"].append(binary_f2_sub_f1)
            data_dict["binary_2f1"].append(binary_2f1)
            data_dict["im_count"].append(_df.shape[0])
            data_dict["vertex_id"].append(vertex_id)
            data_dict["sub_id"].append(sub_id)
            data_dict["hcp_label"].append(row.hcp_roi)
            data_dict["hemi"].append(row.hemi)
            data_dict["geodesic_distance"].append(row.geodesic_distance)
            data_dict["experiment_sub_id"].append(row.experiment_sub_id)
        _df = pd.DataFrame(data_dict)

        """STATS
        Model a frequencies' power with spatial information of other exist stimulating and IM frequencies
        """
        keep_rows = [f"bold_power_{bold_power_type}", "im_count", "hcp_label","vertex_id"] + [f"binary_{i}" for i in bold_f_type_strs] + ["hemi","geodesic_distance","experiment_sub_id"]
        filtered_df = _df[(pd.notna(_df[f"bold_power_{bold_power_type}"]))][keep_rows]
        y = filtered_df[f"bold_power_{bold_power_type}"]
        X = filtered_df[[f"binary_{i}" for i in bold_f_type_strs]+["hemi","geodesic_distance"]]
        if y.shape[0] == 0:
            for var_ix in range(6):
                cohort_pvalues[sub_ix,var_ix] = -1
            continue

        X = sm.add_constant(X)
        model = sm.OLS(y,X).fit()

        for var_ix, (var,pval) in enumerate(model.pvalues.items()):
            if np.isnan(pval):
                cohort_pvalues[sub_ix,var_ix] = -1
            else:
                cohort_pvalues[sub_ix,var_ix] = pval
        
        # Store data for group level analysis (mixed lm model)
        if group_filtered_df is None:
            group_filtered_df = filtered_df
        else:
            group_filtered_df = pd.concat((group_filtered_df,filtered_df))

    im_code_df_dict[bold_power_type] = group_filtered_df

    """STATS
    with mixed linear models on group level data
    """
    print(group_filtered_df.shape)
    # Remove control condition
    group_filtered_df = group_filtered_df[~group_filtered_df.experiment_sub_id.str.contains("control")]
    print("After removing control:", group_filtered_df.shape)
    # Log transform gd
    #group_filtered_df["geodesic_distance"] = np.log(group_filtered_df["geodesic_distance"])
    # Demean all vars
    group_filtered_df["geodesic_distance"] = group_filtered_df.geodesic_distance - group_filtered_df.geodesic_distance.mean()
    for col in [[f"binary_{i}" for i in bold_f_type_strs] + ["hemi","geodesic_distance"]]:
        group_filtered_df[col] = group_filtered_df[col] - group_filtered_df[col].mean()
    # Detect upperbound outlier information
    q1 = group_filtered_df[f"bold_power_{bold_power_type}"].quantile(.005)
    q3 = group_filtered_df[f"bold_power_{bold_power_type}"].quantile(.995)
    iqr = q3-q1
    lower_bound = q1 - 1.5 * iqr
    upper_bound = q3 + 1.5 * iqr
    outliers = (group_filtered_df[f"bold_power_{bold_power_type}"] > upper_bound)
    print(f"Identified {outliers.sum()} outliers")
    outliers_df = group_filtered_df[outliers]
    group_filtered_df = group_filtered_df[~outliers]
    bold_ymax = group_filtered_df[f"bold_power_{bold_power_type}"].max()

    # Get formula
    formula = get_formula(bold_power_type, bold_f_type_strs)
    # Remove maps with no vertex assignment
    remove_l = []
    for i in group_filtered_df.columns:
        if i.startswith("binary"):
            _sum = group_filtered_df[i].sum()
            if _sum == 0:
                remove_l.append(i)
    for r in remove_l:
        print(f"Remove {r}")
        formula = formula.replace(f"+ {r} "," ")
    # Mixed LM
    model = mixedlm(
        formula, 
        group_filtered_df, 
        groups=group_filtered_df["experiment_sub_id"], 
    )
    result = model.fit()
    # Predict BOLD power with model
    model_cols = [i.strip() for i in formula.split("~")[-1].split("+")] + ["experiment_sub_id"]
    _data = group_filtered_df[model_cols]
    y_predict = result.predict(_data)
    print(result.summary())


    """Plot single subject OLS fits
    """
    masked_sig = cohort_pvalues < .05
    masked_nan = cohort_pvalues == -1
    cohort_pvalues[~masked_sig] = 0
    cohort_pvalues[masked_sig] = 1
    cohort_pvalues[masked_nan] = -1
    im = ax.imshow(cohort_pvalues, interpolation='none',cmap='Reds')
    from mpl_toolkits.axes_grid1 import make_axes_locatable
    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", size="5%", pad=0.05)
    cbar = plt.colorbar(im,cax=cax)
    cbar.ax.set_yticks([-1,0,1])
    cbar.ax.set_yticklabels(["n/a","n.s.","*"], fontsize=FONTSIZE)
    for _spine in ["top","right","bottom","left"]:
        ax.spines[_spine].set_visible(False)
    ax.set_yticks([i for i in range(len(sub_ids))])
    ax.set_yticklabels(sub_ids, fontsize=FONTSIZE-2, rotation=0)
    ax.set_xticks([i for i in range(len(bold_f_type_strs)+2)])
    ax.set_xticklabels(bold_f_type_strs+["hemi","gd"], fontsize=FONTSIZE-2, rotation=90)
    ax.set_title(bold_power_type,fontsize=FONTSIZE)
    ax.tick_params("both",pad=0,width=.25,length=2)

    """Plot model predictions
    """
    y = group_filtered_df[f"bold_power_{bold_power_type}"]
    pval = pearsonr(y_predict,y).pvalue
    c='k'
    if pval < .05:
        c='r'
    predict_ax.scatter(y_predict,y,s=10,c=c,alpha=.1,edgecolors="none")
    predict_ax.set_title(f"{bold_power_type}",fontsize=FONTSIZE)
    predict_ax.set_xlabel("Predicted",fontsize=FONTSIZE)
    predict_ax.set_ylabel("Observed",fontsize=FONTSIZE)
    predict_ax.set_xticklabels(predict_ax.get_xticks(),fontsize=FONTSIZE)
    predict_ax.set_yticklabels(predict_ax.get_yticks(),fontsize=FONTSIZE)
    predict_ax.set_ylim(0,bold_ymax)
    
    """Plot association to gd
    """
    gd = group_filtered_df["geodesic_distance"]
    y = group_filtered_df[f"bold_power_{bold_power_type}"]
    pval = result.pvalues.geodesic_distance
    c='k'
    if pval < .05:
        c='r'
    geo_ax.set_title(f"{bold_power_type}",fontsize=FONTSIZE)
    geo_ax.scatter(gd,y,s=10,c=c,alpha=.02,edgecolors="none")
    geo_ax.set_xticklabels([i for i in geo_ax.get_xticks() if i >= 0],fontsize=FONTSIZE)
    geo_ax.set_yticklabels([i for i in geo_ax.get_yticks() if i >= 0],fontsize=FONTSIZE)
    geo_ax.set_xlabel("geodesic distance",fontsize=FONTSIZE)
    geo_ax.set_ylabel("power",fontsize=FONTSIZE)
    geo_ax.set_ylim(0,bold_ymax)

    """Plot association to hemi
    """
    for i,(m, fig, ax, xlabel) in enumerate(zip(
        ["hemi","binary_f1","binary_f2","binary_f2_sub_f1","binary_2f1"],
        [hemi_fig,spatial_f1_fig,spatial_f2_fig,spatial_f2_sub_f1_fig,spatial_2f1_fig],
        [hemi_ax,spatial_f1_ax,spatial_f2_ax,spatial_f2_sub_f1_ax,spatial_2f1_ax],
        ["hemi","f1","f2","f2-f1","2f1"]
    )):
        try:
            x = group_filtered_df[m]
            x_offset = np.random.uniform(-.2,.2,x.shape[0])
            y = group_filtered_df[f"bold_power_{bold_power_type}"]
            
            pval = result.pvalues[m]
            c='grey'
            if pval < .05:
                c='r'
            ax.set_title(f"{bold_power_type}",fontsize=FONTSIZE)
            ax.scatter(x+x_offset,y,s=10,c=c,alpha=.1,edgecolors="none")
            ax.set_xticklabels(hemi_ax.get_xticks(),fontsize=FONTSIZE)
            ax.set_yticklabels(hemi_ax.get_yticks(),fontsize=FONTSIZE)
            ax.set_xlabel(xlabel,fontsize=FONTSIZE)
            ax.set_ylabel("power",fontsize=FONTSIZE)
            ax.set_ylim(0,bold_ymax)
        except:
            # Turn plot off
            fig.delaxes(ax)
            
    # fill `mixed_glm_cohort_pvalues`
    for i,m in enumerate(["binary_f1","binary_f2","binary_f2_sub_f1","binary_2f1","hemi","geodesic_distance"]):
        try:
            pval = result.pvalues[m]
            mixed_glm_cohort_pvalues[bold_power_type_ix,i] = pval
        except:
            mixed_glm_cohort_pvalues[bold_power_type_ix,i] = -1 # Not included in model


    # Plot BOLD power per hcp label
    retain_rois_per_experiment = [.75,1.,1.,1]
    experiment_labels = ["3TNormal","7TNormal","3TVary","7TVary"]
    fig, power_per_hcp_ax_dict = plt.subplot_mosaic([experiment_labels],figsize=(7,1.2),dpi=300, layout="constrained")
    for experiment_label, retain_rois_by_thr in zip(experiment_labels,retain_rois_per_experiment):
        ax = power_per_hcp_ax_dict[experiment_label]
        _df = group_filtered_df[(group_filtered_df.experiment_sub_id.str.contains(experiment_label))]
        total_experiments = len(_df.experiment_sub_id.unique())
        hcp_label_frequency_dict = {}
        for hcp_label in _df.hcp_label.unique():
            __df = _df[(_df.hcp_label == hcp_label)]
            n_experiment = len(__df["experiment_sub_id"].unique())
            if n_experiment >= retain_rois_by_thr*total_experiments:
                hcp_label_frequency_dict[hcp_label] = n_experiment

        for ms,hemi in zip(['o','^'],["CONTRA","IPSI"]):
            __df = _df[(_df["hcp_label"].isin([k for k,v in hcp_label_frequency_dict.items()])) & (_df["hcp_label"].str.startswith(hemi))]
            X = __df.groupby("hcp_label")[[f"bold_power_{bold_power_type}","geodesic_distance"]].mean()
            ax.scatter(X["geodesic_distance"],X[f"bold_power_{bold_power_type}"],marker=ms)
            for hcp_label, (i, row) in zip(X.index,X.iterrows()):
                ax.text(
                    row["geodesic_distance"],
                    row[f"bold_power_{bold_power_type}"],
                    hcp_label.split("_")[-1],
                    rotation=0,fontsize=FONTSIZE, va="center", ha="center"
                )

        ax.set_xticklabels(ax.get_xticks(),fontsize=FONTSIZE)
        # scientific notation
        ax.set_yticklabels([f"{i:.1e}" for i in ax.get_yticks()],fontsize=FONTSIZE)
        ax.set_title(experiment_label,fontsize=FONTSIZE)
        ax.set_ylabel("%BOLD Power",fontsize=FONTSIZE)
        ax.set_xlabel("log(geodesic distance)",fontsize=FONTSIZE)

In [None]:

fig, ax = plt.subplots(figsize=(2,2),dpi=300,layout="tight")
masked_sig = mixed_glm_cohort_pvalues < .05
masked_nan = mixed_glm_cohort_pvalues == -1
pvalue_matrix = np.zeros_like(mixed_glm_cohort_pvalues)
pvalue_matrix[~masked_sig] = 0
pvalue_matrix[masked_sig] = 1
pvalue_matrix[masked_nan] = -1
im = ax.imshow(pvalue_matrix, interpolation='none',cmap='Reds')
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.05)
cbar = plt.colorbar(im,cax=cax)
cbar.ax.set_yticks([-1,0,1])
cbar.ax.set_yticklabels(["n/a","n.s.","*"], fontsize=FONTSIZE)
for _spine in ["top","right","bottom","left"]:
    ax.spines[_spine].set_visible(False)
ax.set_yticks([i for i in range(len(bold_f_type_strs))])
ax.set_yticklabels(bold_f_type_strs, fontsize=FONTSIZE-2, rotation=0)
ax.set_xticks([i for i in range(len(bold_f_type_strs)+2)])
ax.set_xticklabels(bold_f_type_strs+["hemi","gd"], fontsize=FONTSIZE-2, rotation=90)
ax.set_ylabel("BOLD Power",fontsize=FONTSIZE)
ax.set_xlabel("Variables",fontsize=FONTSIZE)
ax.set_title("Linear mixed GLM on power",fontsize=FONTSIZE)
ax.tick_params("both",pad=0,width=.25,length=2)


hmm.. there is a positive gd vs. power (f2-f1) relationship. Upon inspection it appears .75Hz maps can lead to possible high power false positives in areas outside of visual hierarchy. Solution: Only look at this relationship in predefined regions.

OK. Lets look at each dataset and only include ROIs that include in >= 50% of datasets (or 5/9 datasets), `thr` is used to define this threshold.

In [41]:
def update_hcp_formula(formula, df):
    dependent_var = formula.split("~")[0].replace(" ","")
    formula_components = formula.split("~")[-1].replace(" ","").split("+")
    updated_formula_components = []

    for col in formula_components:
        n_categories = len(df[col].unique())
        if n_categories == 2:
            updated_formula_components.append(col)

    if formula.startswith("bold_power_f2_sub_f1"):
        n_categories = len(df["binary_f1:binary_f2"].unique())
        if n_categories == 2:
            updated_formula_components.append("binary_f1:binary_f2")

    new_formula = f"{dependent_var} ~ {'+'.join(updated_formula_components)}"

    return new_formula, len(updated_formula_components)

def power_effects_across_hcp_labels(group_filtered_df, formula, experiment_type="All"):

    assert experiment_type in ["All", "Normal", "3TNormal", "7TNormal", "3TVary", "7TVary", "3TVary_020", "7TVary_020", "3TVary_021", "7TVary_021"]

    import matplotlib.colors as mcolors

    # Create interaction term for f1 and f2 vertices
    group_filtered_df["binary_f1:binary_f2"] = ( (group_filtered_df.binary_f1>0).astype(int) * (group_filtered_df.binary_f2>0).astype(int) )
    group_filtered_df["binary_f1:binary_f2"] -= group_filtered_df["binary_f1:binary_f2"].mean() # Demean
        
    # Count total ROIs to control figure size better
    contra_ipsi_hcp_label_count = 0
    for hemi in ["CONTRA", "IPSI"]:
        # Contralateral HCP labels - ordered by geodesic distance
        hcp_labels_ordered_by_gd = [i for i in group_filtered_df.sort_values(by="geodesic_distance").hcp_label.unique() if i.startswith(hemi)]
        n_hcp_labels = len(hcp_labels_ordered_by_gd)
        contra_ipsi_hcp_label_count += n_hcp_labels

    fig, ax_dict = plt.subplot_mosaic([["CONTRA_COUNT","IPSI_COUNT"],["CONTRA_COEFS","IPSI_COEFS"]],figsize=(.5+(6/20*contra_ipsi_hcp_label_count),2.5), layout="constrained", dpi=300)

    for hemi in ["CONTRA", "IPSI"]:

        # Contralateral HCP labels - ordered by geodesic distance
        hcp_labels_ordered_by_gd = [i for i in group_filtered_df.sort_values(by="geodesic_distance").hcp_label.unique() if i.startswith(hemi)]
        n_hcp_labels = len(hcp_labels_ordered_by_gd)
        if n_hcp_labels == 0:
            for ax_label in [f"{hemi}_COUNT",f"{hemi}_COEFS"]:
                ax_dict[ax_label].set_visible(False)
            continue
        hcp_formula = formula.split(" + hemi")[0]
        all_variables = hcp_formula.replace(" ","").split("~")[1].split("+")
        n_variables = len(all_variables)

        if hcp_formula.startswith("bold_power_f2_sub_f1"):
            all_variables.append("binary_f1:binary_f2")
            n_variables += 1
        coefs = np.zeros((n_variables, n_hcp_labels))
        pvals = np.zeros((n_variables, n_hcp_labels))
        experiments_per_hcp_label = np.zeros((1, n_hcp_labels))

        if experiment_type == "All":
            total_experiments = len(group_filtered_df.experiment_sub_id.unique())
        else:
            total_experiments = len(group_filtered_df[(group_filtered_df.experiment_sub_id.str.contains(experiment_type))].experiment_sub_id.unique())

        for hcp_ix, hcp_label in enumerate(hcp_labels_ordered_by_gd):
            if experiment_type == "All":
                hcp_group_filtered_df = group_filtered_df[(group_filtered_df.hcp_label==hcp_label)]
            else:
                hcp_group_filtered_df = group_filtered_df[(group_filtered_df.hcp_label==hcp_label) & (group_filtered_df.experiment_sub_id.str.contains(experiment_type))]
            experiments_per_hcp_label[0,hcp_ix] = len(hcp_group_filtered_df.experiment_sub_id.unique()) / total_experiments
            hcp_formula, n_vars = update_hcp_formula(formula, hcp_group_filtered_df)

            if n_vars != 0:
                if hcp_group_filtered_df.shape[0] <= 2:
                    print(f"{hcp_label} only has {hcp_group_filtered_df.shape[0]} experiments. Skipping.")
                else:
                    #print(hcp_formula)
                    model = mixedlm(
                        hcp_formula,
                        hcp_group_filtered_df, 
                        groups=hcp_group_filtered_df["experiment_sub_id"], 
                    )
                    try:
                        result = model.fit()
                        not_completed = False
                    except:
                        not_completed = True
              
            for m_ix, m in enumerate(all_variables):
                vars_not_in_formula = hcp_formula.split("~")[-1].replace(" ","").split("+")
                if m not in vars_not_in_formula or hcp_group_filtered_df.shape[0] <= 2 or not_completed:
                    pval = np.nan
                    coef = np.nan
                else:
                    pval = result.pvalues[m] if n_vars!=0 else np.nan
                    coef = result.params[m] if n_vars!=0 else np.nan

                coefs[m_ix,hcp_ix] = coef
                pvals[m_ix,hcp_ix] = pval 
        
        ax = ax_dict[f"{hemi}_COUNT"]
        norm = mcolors.Normalize(vmin=0, vmax=1)
        cax = ax.imshow(experiments_per_hcp_label, cmap="BuPu",norm=norm)
        divider = make_axes_locatable(ax)
        cbar_ax = divider.append_axes("right", size="5%", pad=0.05)
        cbar = fig.colorbar(cax, cax=cbar_ax)
        #cbar.set_label("Experiment count", fontsize=FONTSIZE-2)
        cbar.set_ticks([0, 1])
        cbar.set_ticklabels([0, int(total_experiments)], fontsize=FONTSIZE-2)
        for i in range(experiments_per_hcp_label.shape[1]):
            c= 'k'
            if experiments_per_hcp_label[0,i]*total_experiments > .8*total_experiments:
                c='w'
            ax.text(i,0,f"{experiments_per_hcp_label[0,i]*total_experiments:.0f}",c=c,va="center",ha="center",fontsize=FONTSIZE,weight="bold")
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_title(f"{hemi}",fontsize=FONTSIZE)

        ax = ax_dict[f"{hemi}_COEFS"]
        _min = np.nanmin(coefs)
        _max = np.nanmax(coefs)
        if np.nanmin(coefs) >= 0:
            _min = -.00001
        if np.nanmax(coefs) <= 0:
            _max = .00001
        norm = mcolors.TwoSlopeNorm(vmin=_min,vmax=_max,vcenter=0)
        cax = ax.imshow(coefs, cmap="bwr", norm=norm)
        divider = make_axes_locatable(ax)
        cbar_ax = divider.append_axes("right", size="5%", pad=0.05)
        cbar = fig.colorbar(cax, cax=cbar_ax)
        #cbar.set_label("GLM coefs", fontsize=FONTSIZE-2)
        cbar.set_ticks([np.nanmin(coefs), 0, np.nanmax(coefs)])
        cbar.set_ticklabels([f"{i:.1e}" for i in cbar.get_ticks().astype(float)], fontsize=FONTSIZE-2)
        ax.set_yticks(range(n_variables))
        ax.set_yticklabels(all_variables, fontsize=FONTSIZE-2)
        ax.set_xticks(range(n_hcp_labels))
        ax.set_xticklabels([i.split("_")[-1] for i in hcp_labels_ordered_by_gd],fontsize=FONTSIZE-2, rotation=90)
        for row_ix, row in enumerate(pvals):
            for col_ix, element in enumerate(row):
                _FONTSIZE = FONTSIZE
                if element < .05:
                    _text = "*"
                    _FONTSIZE += 2
                elif np.isnan(element):
                    _text = "N/A"
                    _FONTSIZE -= 2
                else:
                    _text = ""
                ax.text(col_ix, row_ix, _text, ha="center", va="center", fontsize=_FONTSIZE,weight="bold")
        
    for ax_key, ax in ax_dict.items():
        for _spine in ["right","bottom","top","left"]:
            ax.spines[_spine].set_visible(False)
            ax.tick_params("both",pad=.5,width=0,length=0.25)

    title_formula = formula.split("+ hemi")[0]
    title_formula = title_formula.split("+ geo")[0]
    fig.suptitle(f"{title_formula}, {experiment_type}", fontsize=FONTSIZE)
    
    frequency_type = formula.replace(" ","").split("~")[0]
    fig.savefig(f"experiment-{experiment_type}_frequency-{frequency_type}_power_across_hcp_labels.png",dpi='figure')

In [None]:
import warnings
warnings.filterwarnings("ignore")

additional_rules_dict = {
    "All": (None, "Normal", "Normal"),
    "Normal": ("Normal","3TNormal", "entrain"),
    "3TNormal": ("3TNormal","3TNormal","entrain"),
    "7TNormal": ("7TNormal","7TNormal","AttendAway"),
    "3TVary": ("3TVary","3TNormal","entrain"),
    "7TVary": ("7TVary","7TNormal","AttendAway"),
}
filter_outliers = True
apply_log_to_gd = True
all_dfs_to_plot = {}
for k, v in additional_rules_dict.items():
    additional_rules = v[0]
    experiment_label = v[1]
    task_condition = v[2]
    # Verbose
    if filter_outliers:
        print("Filtering outliers during mixed GLM.")
    if apply_log_to_gd:
        print("Applying log to geodesic distance during mixed GLM.")
    # Get ROIs by filtering across 3TNormal dataset and based on pct of experiments with that ROI
    dfs_to_plot = {}
    for thr in [.5]:
        keep_rois = {}
        for im_type in ["f1","f2","f2_sub_f1","2f1"]:
            im_df = im_code_df_dict[im_type].copy()
            # Select only 3TNormal & entrain condition
            filter_im_df = im_df[(im_df.experiment_sub_id.str.contains(experiment_label)) & (im_df.experiment_sub_id.str.contains(task_condition))]
            n_experiments = len(filter_im_df.experiment_sub_id.unique())
            # Calculate threshold based on number of experiments
            _thr = n_experiments * thr #
            # Get ROI count across all experiments
            roi_count = {}
            for experiment_sub_id in filter_im_df.experiment_sub_id.unique():
                for hcp_label in filter_im_df[(filter_im_df.experiment_sub_id == experiment_sub_id)].hcp_label.unique():
                    if hcp_label not in roi_count:
                        roi_count[hcp_label] = 0
                    roi_count[hcp_label] += 1
            # Filter out ROIs with less than threshold
            thresholded_roi_list = []
            for hcp_label, count in roi_count.items():
                if count >= _thr:
                    thresholded_roi_list.append(hcp_label)
            # Filter df to exclude vertices not in `thresholded_roi_list`
            im_df = im_df[(im_df.hcp_label.isin(thresholded_roi_list))]
            keep_rois[im_type] = thresholded_roi_list

            # Plot
            for experiment_sub_id in im_df.experiment_sub_id.unique():
                try:
                    sub_im_df = im_df[(im_df.experiment_sub_id==experiment_sub_id)]
                    power = sub_im_df[f"bold_power_{im_type}"]
                    gd = sub_im_df["geodesic_distance"]
                except:
                    print(f"Skip {sub_id}")

        # Run statistics
        experiment_ids = ["3TNormal"]*18+["7TNormal"]*4+["3TVary"]*6+["7TVary"]*6
        sub_ids = ["000","002","003","004","005","006","007","008","009"]*2+["Pilot001","Pilot009","Pilot010","Pilot011"]+["020"]*3+["021"]*3+["020"]*3+["021"]*3
        roi_task_ids = ["entrain"]*18+["AttendAway"]*4+[f"entrain{i}" for i in ["A","B","C","D","E","F"]]*2
        task_ids = ["control"]*9+["entrain"]*9+["AttendAway"]*4+[f"entrain{i}" for i in ["A","B","C","D","E","F"]]*2
        bold_f_type_strs = ["f1","f2","f2_sub_f1","2f1"]

        n_sub_ids = len(sub_ids)
        n_bold_f_types = len(bold_f_type_strs)

        fig, ax_dict = plt.subplot_mosaic([bold_f_type_strs], layout="constrained", figsize=(8,2),dpi=400)
        mixed_glm_cohort_pvalues = np.zeros((len(bold_f_type_strs),n_bold_f_types+2))
        mixed_glm_cohort_coefs = np.zeros((len(bold_f_type_strs),n_bold_f_types+2))
        for bold_power_type_ix, bold_power_type in enumerate(bold_f_type_strs):

            ax = ax_dict[bold_power_type]
            #predict_ax = predict_ax_dict[bold_power_type]
            #geo_ax = geo_ax_dict[bold_power_type]
            """
            hemi_ax = hemi_ax_dict[bold_power_type]
            spatial_f1_ax = spatial_f1_ax_dict[bold_power_type]
            spatial_f2_ax = spatial_f2_ax_dict[bold_power_type]
            spatial_f2_sub_f1_ax = spatial_f2_sub_f1_ax_dict[bold_power_type]
            spatial_2f1_ax = spatial_2f1_ax_dict[bold_power_type]
            """

            group_filtered_df = None
            cohort_pvalues = np.zeros((n_sub_ids,n_bold_f_types+2)) # +1 add hemi column, +1 add gd column
            for sub_ix, (experiment_id,sub_id,roi_task_id,task_id) in enumerate(zip(experiment_ids,sub_ids,roi_task_ids,task_ids)):

                # Reorganize
                sub_expanded_df = expanded_df[(expanded_df.sub_id==sub_id) & (expanded_df.experiment_id==experiment_id) & (expanded_df.roi_task_id==expanded_df.roi_task_id) & (expanded_df.task_id==task_id)]
                data_dict = defaultdict(list)
                for vertex_id in sub_expanded_df.vertex_id.unique():
                    _df = sub_expanded_df[sub_expanded_df.vertex_id==vertex_id]
                    bold_power_f1 = np.nan
                    bold_power_f2 = np.nan
                    bold_power_f2_sub_f1 = np.nan
                    bold_power_2f1 = np.nan
                    binary_f1 = 0
                    binary_f2 = 0
                    binary_f2_sub_f1 = 0
                    binary_2f1 = 0
                    for _, row in _df.iterrows():
                        if row.im_code == "f1":
                            binary_f1 = 1
                            bold_power_f1 = row.power
                        elif row.im_code == "f2":
                            binary_f2 = 1
                            bold_power_f2 = row.power
                        elif row.im_code == "f2-f1":
                            binary_f2_sub_f1 = 1
                            bold_power_f2_sub_f1 = row.power
                        elif row.im_code == "2f1":
                            binary_2f1 = 1
                            bold_power_2f1 = row.power
                        else:
                            continue
                        #else:
                            #raise ValueError(f"{row.im_code} not supported.")
                    data_dict["bold_power_f1"].append(bold_power_f1)
                    data_dict["bold_power_f2"].append(bold_power_f2)
                    data_dict["bold_power_f2_sub_f1"].append(bold_power_f2_sub_f1)
                    data_dict["bold_power_2f1"].append(bold_power_2f1)
                    data_dict["binary_f1"].append(binary_f1)
                    data_dict["binary_f2"].append(binary_f2)
                    data_dict["binary_f2_sub_f1"].append(binary_f2_sub_f1)
                    data_dict["binary_2f1"].append(binary_2f1)
                    data_dict["im_count"].append(_df.shape[0])
                    data_dict["vertex_id"].append(vertex_id)
                    data_dict["sub_id"].append(sub_id)
                    data_dict["hcp_label"].append(row.hcp_roi)
                    data_dict["hemi"].append(row.hemi)
                    data_dict["geodesic_distance"].append(row.geodesic_distance)
                    data_dict["experiment_sub_id"].append(row.experiment_sub_id)
                _df = pd.DataFrame(data_dict)

                """STATS
                Model a frequencies' power with spatial information of other exist stimulating and IM frequencies
                """
                keep_rows = [f"bold_power_{bold_power_type}", "im_count", "hcp_label","vertex_id"] + [f"binary_{i}" for i in bold_f_type_strs] + ["hemi","geodesic_distance","experiment_sub_id"]
                filtered_df = _df[(pd.notna(_df[f"bold_power_{bold_power_type}"]))][keep_rows]
                y = filtered_df[f"bold_power_{bold_power_type}"]
                X = filtered_df[[f"binary_{i}" for i in bold_f_type_strs]+["hemi","geodesic_distance"]]
                if y.shape[0] == 0:
                    for var_ix in range(6):
                        cohort_pvalues[sub_ix,var_ix] = -1
                    continue

                X = sm.add_constant(X)
                model = sm.OLS(y,X).fit()

                for var_ix, (var,pval) in enumerate(model.pvalues.items()):
                    if np.isnan(pval):
                        cohort_pvalues[sub_ix,var_ix] = -1
                    else:
                        cohort_pvalues[sub_ix,var_ix] = pval
                # Store data for group level analysis (mixed lm model)
                if group_filtered_df is None:
                    group_filtered_df = filtered_df
                else:
                    group_filtered_df = pd.concat((group_filtered_df,filtered_df))

            if len(keep_rois[bold_power_type]) == 1:
                print(f"Only one ROI found for {bold_power_type}, can't perform mixed GLM due to singular matrix")
                continue
            # Filter vertices that contain ROIs based on keep_rois
            group_filtered_df = group_filtered_df[(group_filtered_df.hcp_label.isin(keep_rois[bold_power_type]))]
            dfs_to_plot[bold_power_type] = group_filtered_df
            """STATS
            with mixed linear models on group level data
            """
            #print(group_filtered_df.shape)
            # Remove control condition
            if additional_rules is None:
                group_filtered_df = group_filtered_df[(~group_filtered_df.experiment_sub_id.str.contains("control"))]
            else:
                group_filtered_df = group_filtered_df[(~group_filtered_df.experiment_sub_id.str.contains("control")) & (group_filtered_df.experiment_sub_id.str.contains(additional_rules))]
            print("After removing control:", group_filtered_df.shape)
            # Log transform gd
            if apply_log_to_gd:
                group_filtered_df["geodesic_distance"] = np.log(group_filtered_df["geodesic_distance"])
            # Demean all vars
            group_filtered_df["geodesic_distance"] = group_filtered_df.geodesic_distance - group_filtered_df.geodesic_distance.mean()
            for col in [[f"binary_{i}" for i in bold_f_type_strs] + ["hemi","geodesic_distance"]]:
                group_filtered_df[col] = group_filtered_df[col] - group_filtered_df[col].mean()
            # Detect upperbound outlier information
            q1 = group_filtered_df[f"bold_power_{bold_power_type}"].quantile(.01)
            q3 = group_filtered_df[f"bold_power_{bold_power_type}"].quantile(.99)
            iqr = q3-q1
            lower_bound = q1 - 1.5 * iqr
            upper_bound = q3 + 1.5 * iqr
            outliers = (group_filtered_df[f"bold_power_{bold_power_type}"] > upper_bound)
            print(f"Identified {outliers.sum()} outliers")
            outliers_df = group_filtered_df[outliers]
            bold_ymax = group_filtered_df[~outliers][f"bold_power_{bold_power_type}"].max()
            outlier_hcp_labels = group_filtered_df[outliers]["hcp_label"].unique()
            print("OUTLIER ROIS INCLUDE:", outlier_hcp_labels, outliers_df.shape)
            if filter_outliers:
                group_filtered_df = group_filtered_df[~outliers]
            # Get formula
            formula = get_formula(bold_power_type, bold_f_type_strs)
            # Remove hemi regressor if only 1 laterality is found
            if not any(i.startswith("CONTRA") for i in keep_rois[bold_power_type]) or not any(i.startswith("IPSI") for i in keep_rois[bold_power_type]):
                formula = formula.replace("+ hemi ", " ")

            # Remove maps with no vertex assignment
            remove_l = []
            for i in group_filtered_df.columns:
                if i.startswith("binary"):
                    _sum = group_filtered_df[i].sum()
                    if _sum == 0:
                        remove_l.append(i)
            for r in remove_l:
                #print(f"Remove {r}")
                formula = formula.replace(f"+ {r} "," ")
            # Mixed LM
            #print(f"formula: {formula}")
            try:
                model = mixedlm(
                    formula, 
                    group_filtered_df, 
                    groups=group_filtered_df["experiment_sub_id"], 
                )
                result = model.fit()
            except np.linalg.LinAlgError as e:
                print(f"An error occured {type(e).__name__} - {e}")
                continue
            # Predict BOLD power with model
            model_cols = [i.strip() for i in formula.split("~")[-1].split("+")] + ["experiment_sub_id"]
            _data = group_filtered_df[model_cols]
            y_predict = result.predict(_data)
            print(result.summary())
            """Plot single subject OLS fits
            """
            masked_sig = cohort_pvalues < .05
            masked_nan = cohort_pvalues == -1
            cohort_pvalues[~masked_sig] = 0
            cohort_pvalues[masked_sig] = 1
            cohort_pvalues[masked_nan] = -1
            im = ax.imshow(cohort_pvalues, interpolation='none',cmap='Reds')
            from mpl_toolkits.axes_grid1 import make_axes_locatable
            divider = make_axes_locatable(ax)
            cax = divider.append_axes("right", size="5%", pad=0.05)
            cbar = plt.colorbar(im,cax=cax)
            cbar.ax.set_yticks([-1,0,1])
            cbar.ax.set_yticklabels(["n/a","n.s.","*"], fontsize=FONTSIZE)
            for _spine in ["top","right","bottom","left"]:
                ax.spines[_spine].set_visible(False)
            ax.set_yticks([i for i in range(len(sub_ids))])
            ax.set_yticklabels(sub_ids, fontsize=FONTSIZE-2, rotation=0)
            ax.set_xticks([i for i in range(len(bold_f_type_strs)+2)])
            ax.set_xticklabels(bold_f_type_strs+["hemi","gd"], fontsize=FONTSIZE-2, rotation=90)
            ax.set_title(bold_power_type,fontsize=FONTSIZE)
            ax.tick_params("both",pad=0,width=.25,length=2)

            # fill `mixed_glm_cohort_pvalues`
            for i,m in enumerate(["binary_f1","binary_f2","binary_f2_sub_f1","binary_2f1","hemi","geodesic_distance"]):
                try:
                    pval = result.pvalues[m]
                    mixed_glm_cohort_pvalues[bold_power_type_ix,i] = pval
                    coef = result.params[m]
                    mixed_glm_cohort_coefs[bold_power_type_ix,i] = coef
                except:
                    mixed_glm_cohort_pvalues[bold_power_type_ix,i] = -1 # Not included in model
                    mixed_glm_cohort_coefs[bold_power_type_ix,i] = 0

            # Plot association to power effects across HCP labels
            power_effects_across_hcp_labels(group_filtered_df, formula, experiment_type=k)

        # Plot mixed GLM p-values
        fig, ax = plt.subplots(figsize=(2,2),dpi=300,layout="tight")
        masked_sig = mixed_glm_cohort_pvalues < .05
        masked_nan = mixed_glm_cohort_pvalues == -1
        pvalue_matrix = np.zeros_like(mixed_glm_cohort_pvalues)
        pvalue_matrix[~masked_sig] = 0
        pvalue_matrix[masked_sig] = 1
        pvalue_matrix[masked_nan] = -1
        im = ax.imshow(pvalue_matrix, interpolation='none',cmap='Reds',)
        divider = make_axes_locatable(ax)
        cax = divider.append_axes("right", size="5%", pad=0.05)
        cbar = plt.colorbar(im,cax=cax)
        cbar.ax.set_yticks([-1,0,1])
        cbar.ax.set_yticklabels(["n/a","n.s.","*"], fontsize=FONTSIZE)
        for _spine in ["top","right","bottom","left"]:
            ax.spines[_spine].set_visible(False)
        ax.set_yticks([i for i in range(len(bold_f_type_strs))])
        ax.set_yticklabels(bold_f_type_strs, fontsize=FONTSIZE-2, rotation=0)
        ax.set_xticks([i for i in range(len(bold_f_type_strs)+2)])
        ax.set_xticklabels(bold_f_type_strs+["hemi","gd"], fontsize=FONTSIZE-2, rotation=90)
        ax.set_ylabel("BOLD Power",fontsize=FONTSIZE)
        ax.set_xlabel("Variables",fontsize=FONTSIZE)
        ax.set_title(f"{thr} Linear mixed GLM on power",fontsize=FONTSIZE)
        ax.tick_params("both",pad=0,width=.25,length=2)
        # Plot mixed GLM coefficients
        fig, ax = plt.subplots(figsize=(2,2),dpi=300,layout="tight")
        _max = np.abs(mixed_glm_cohort_coefs).max()
        cax = ax.imshow(mixed_glm_cohort_coefs, cmap='bwr',vmin=-_max/4, vmax=_max/4)
        cbar = fig.colorbar(cax, ax=ax, shrink=.3)
        for _spine in ["top","right","bottom","left"]:
            ax.spines[_spine].set_visible(False)
        ax.set_yticks([i for i in range(len(bold_f_type_strs))])
        ax.set_yticklabels(bold_f_type_strs, fontsize=FONTSIZE-2, rotation=0)
        ax.set_xticks([i for i in range(len(bold_f_type_strs)+2)])
        ax.set_xticklabels(bold_f_type_strs+["hemi","gd"], fontsize=FONTSIZE-2, rotation=90)
        ax.set_ylabel("BOLD Power",fontsize=FONTSIZE)
        ax.set_xlabel("Variables",fontsize=FONTSIZE)
        ax.set_title(f"{additional_rules}/{thr} Linear mixed GLM on power",fontsize=FONTSIZE)
        ax.tick_params("both",pad=0,width=.25,length=2)

    all_dfs_to_plot[k] = dfs_to_plot

Get HCP color map

In [None]:
"""Get HCP labels
"""
dlabel_dir = Path("/opt/app/notebooks/data/dlabels")
hcp_label = dlabel_dir / "Q1-Q6_RelatedValidation210.CorticalAreas_dil_Final_Final_Areas_Group_Colors.32k_fs_LR.dlabel.nii"

_HCP_INFO = !wb_command -file-information {hcp_label}
HCP_LABELS = []
HCP_COUNTER = 0
hcp_c_dict = {}
for i in _HCP_INFO:
    if len(i) == 60 and any(["L_" in i, "R_" in i]):
        hcp_colors = tuple([float(f"0.{k}") for k in [j.split(' ') [0] for j in i.split('0.')][-3:]] + [1])
        if "R_" in i:
            hcp_c_dict[i.split("R_")[-1].split("_ROI")[0]]=hcp_colors

hcp_c_dict["V1"]

Plot only contralateral HCP rois vs geodesic distance

In [None]:
log_gd = False

mosaic = ["f1","f2","f2_sub_f1","2f1"]
for k, v in additional_rules_dict.items():

    fig, ax_dict = plt.subplot_mosaic([mosaic], layout="constrained", figsize=(5.5, 1.5), dpi=300)

    dfs_to_plot = all_dfs_to_plot[k].copy()
    for bold_power_type in ["f1","f2","f2_sub_f1","2f1"]:
        ax = ax_dict[bold_power_type]
        # Get df for power of interest
        try:
            group_filtered_df = dfs_to_plot[bold_power_type].copy()
        except:
            continue
        # Refilter dataframes
        plot_dict = defaultdict(list)
        for experiment_sub_id in group_filtered_df.experiment_sub_id.unique():
            experiment_df = group_filtered_df[group_filtered_df.experiment_sub_id == experiment_sub_id]
            for hcp_label in experiment_df.hcp_label.unique():
                if hcp_label.startswith("IPSI"):
                    continue

                experiment_hcp_df = experiment_df[experiment_df.hcp_label == hcp_label]
                vertex_count = experiment_hcp_df.shape[0]
                gd = experiment_hcp_df["geodesic_distance"].mean()
                power = experiment_hcp_df[f"bold_power_{bold_power_type}"].mean()
                plot_dict["hcp_label"].append(hcp_label.split("_")[-1])
                plot_dict["experiment_sub_id"].append(experiment_sub_id)
                plot_dict["vertex_count"].append(vertex_count)
                plot_dict["geodesic_distance"].append(gd)
                plot_dict[f"bold_power_{bold_power_type}"].append(power)
        plot_df = pd.DataFrame(plot_dict)

        if log_gd:
            plot_df["geodesic_distance"] = np.log(plot_df.geodesic_distance)

        power_max = 0
        gd_max = 0
        gd_min = None
        for experiment_sub_id in plot_df.experiment_sub_id.unique():
            if "control" in experiment_sub_id:
                continue
            if v[0] is None:
                pass
            elif v[0] not in experiment_sub_id:
                continue
            experiment_sub_df = plot_df[(plot_df.experiment_sub_id==experiment_sub_id) & (plot_df.vertex_count>1)]
            experiment_sub_df = experiment_sub_df.sort_values("geodesic_distance") # Sort by gd
            # Update min/max
            power_max = max(power_max, experiment_sub_df[f"bold_power_{bold_power_type}"].max())
            gd_max = max(gd_max, experiment_sub_df.geodesic_distance.max())
            if gd_min is None:
                gd_min = experiment_sub_df.geodesic_distance.min()
            gd_min = min(gd_min, experiment_sub_df.geodesic_distance.min())

            ax.plot(experiment_sub_df.geodesic_distance, experiment_sub_df[f"bold_power_{bold_power_type}"],c='k',zorder=5, linewidth=.2, alpha=.2)
            for _, row in experiment_sub_df.iterrows():
                ax.scatter(
                    row.geodesic_distance, 
                    row[f"bold_power_{bold_power_type}"],
                    s=10+row["vertex_count"]/10, c=hcp_c_dict[row.hcp_label],
                    marker="o", edgecolor='lightgrey',linewidth=.2, alpha=.6,
                    zorder=10
                )
        
        power_min = 20
        ax.set_yticks([0,power_max])
        ax.set_yticklabels([f"{i:.2e}" if i!=0 else "0" for i in [0,power_max]],fontsize=FONTSIZE)
        ax.set_xticks([gd_min-gd_max*.1, gd_max+gd_max*.1])
        ax.set_title(f"{k}, {bold_power_type}")
        ax.set_xticklabels([" "]*2, fontsize=FONTSIZE)

        for _spine in ["top","right","bottom",]:
            ax.spines[_spine].set_visible(False)
        ax.spines.left.set_linewidth(.25)
        ax.spines.left.set_bounds(0,power_max)

        ax.tick_params("y",pad=0,width=.25,length=4)
        ax.tick_params("x",pad=0,width=.25,length=0)

        if bold_power_type == "f1":
            ax.set_ylabel("%BOLD Power",fontsize=FONTSIZE)
        ax.set_xlabel("Geodesic distance from V1",fontsize=FONTSIZE)

Plot vertex count across HCP rois

parcellated

In [None]:

# Global variables
VERTEX_TO = 59412
hemi_dict = {
    "CONTRA": "L",
    "IPSI": "R",
}
palette_params = {
    "disp-zero": False,
    "disp-neg": False,
    "disp-pos": True,
    "pos-user": (0, 1),
    "neg-user": (-1,0),
    "interpolate": True,
}
PALETTE = "magma"

# Re-sort DF
vertex_count_per_roi = expanded_df.groupby(["experiment_sub_id","im_code","hcp_roi"]).count().reset_index()

# Inputs
n_experiments_all = [9,4,3,3,3,3]
hcp_roi_count_labels = ["3TNormalEntrain","7TNormalEntrain","3TVary020Entrain","7TVary020Entrain","3TVary021Entrain","7TVary021Entrain"]
patterns = ["3TNormal.*entrain","7TNormal.*AttendAway","3TVary_020.*entrain.*","7TVary_020.*entrain.*","3TVary_021.*entrain.*","7TVary_021.*entrain.*"]
im_codes = ["f1","f2","f2-f1","f1+f2","2f1","2f2","2f1-f2","2f2-f1"]


for hcp_roi_count_label, pattern, n_experiments in zip(hcp_roi_count_labels, patterns, n_experiments_all):
    for im_code in im_codes:
        _vertex_count_per_roi = vertex_count_per_roi[(vertex_count_per_roi.experiment_sub_id.str.contains(pattern)) & (vertex_count_per_roi.im_code==im_code)] # Filter df based on im type
        print(hcp_roi_count_label, im_code, n_experiments)
        roi_counts = _vertex_count_per_roi.groupby("hcp_roi").size().reset_index(name="row_count")
        roi_counts["row_count"] = roi_counts["row_count"] / n_experiments
        cohort_count_map = np.zeros((59412,))
        for hemi_type in ["CONTRA","IPSI"]:
            hemi_roi_counts = roi_counts[(roi_counts.hcp_roi.str.startswith(hemi_type))]
            for roi in hemi_roi_counts.hcp_roi:
                hcp_suffix = roi.split("_")[-1]
                hemi_to_map_to = hemi_dict[hemi_type]
                hcp_dscalar = f"/tmp/{hemi_to_map_to}_{hcp_suffix}_ROI.dscalar.nii"
                hcp_coords = nib.load(hcp_dscalar).get_fdata()[0,:]==1
                cohort_count_map[hcp_coords] = roi_counts[roi_counts.hcp_roi==roi].row_count.values[0]
        
        for hemisphere, flatmap, mapstyle_str in zip(["left","right","left"], [False,False,True], ["lh-surf","rh-surf","flat"]):
            png_out = f"{hcp_roi_count_label}_map-{mapstyle_str}_im-{im_code}_hcp-cohort-roi-count.png" # Save png path
            dscalar(
                png_out, cohort_count_map, 
                orientation="portrait", 
                hemisphere=hemisphere,
                palette=PALETTE,
                palette_params=palette_params,
                transparent=False,
                flatmap=flatmap,
                flatmap_style='hcp_border',
            )

clear_output()

In [None]:

# Global variables
VERTEX_TO = 59412
hemi_dict = {
    "CONTRA": "L",
    "IPSI": "R",
}
palette_params = {
    "disp-zero": False,
    "disp-neg": False,
    "disp-pos": True,
    "pos-user": (0, 1),
    "neg-user": (-1,0),
    "interpolate": True,
}
PALETTE = "magma"

# Re-sort DF
vertex_count_per_roi = control_expanded_df.groupby(["experiment_sub_id","im_code","hcp_roi"]).count().reset_index()

# Inputs
n_experiments_all = [9]
hcp_roi_count_labels = ["3TControl"]
patterns = ["3TControl.*control"]
im_codes = ["f1","f2","f2-f1","f1+f2","2f1","2f2","2f1-f2","2f2-f1"]


for hcp_roi_count_label, pattern, n_experiments in zip(hcp_roi_count_labels, patterns, n_experiments_all):
    for im_code in im_codes:
        _vertex_count_per_roi = vertex_count_per_roi[(vertex_count_per_roi.experiment_sub_id.str.contains(pattern)) & (vertex_count_per_roi.im_code==im_code)] # Filter df based on im type
        print(hcp_roi_count_label, im_code, n_experiments)
        roi_counts = _vertex_count_per_roi.groupby("hcp_roi").size().reset_index(name="row_count")
        roi_counts["row_count"] = roi_counts["row_count"] / n_experiments
        cohort_count_map = np.zeros((59412,))
        for hemi_type in ["CONTRA","IPSI"]:
            hemi_roi_counts = roi_counts[(roi_counts.hcp_roi.str.startswith(hemi_type))]
            for roi in hemi_roi_counts.hcp_roi:
                hcp_suffix = roi.split("_")[-1]
                hemi_to_map_to = hemi_dict[hemi_type]
                hcp_dscalar = f"/tmp/{hemi_to_map_to}_{hcp_suffix}_ROI.dscalar.nii"
                hcp_coords = nib.load(hcp_dscalar).get_fdata()[0,:]==1
                cohort_count_map[hcp_coords] = roi_counts[roi_counts.hcp_roi==roi].row_count.values[0]
        
        for hemisphere, flatmap, mapstyle_str in zip(["left","right","left"], [False,False,True], ["lh-surf","rh-surf","flat"]):
            png_out = f"{hcp_roi_count_label}_map-{mapstyle_str}_im-{im_code}_hcp-cohort-roi-count.png" # Save png path
            dscalar(
                png_out, cohort_count_map, 
                orientation="portrait", 
                hemisphere=hemisphere,
                palette=PALETTE,
                palette_params=palette_params,
                transparent=False,
                flatmap=flatmap,
                flatmap_style='hcp_border',
            )

clear_output()

In [None]:
# Global variables
VERTEX_TO = 59412
hemi_dict = {
    "CONTRA": "L",
    "IPSI": "R",
}
palette_params = {
    "disp-zero": False,
    "disp-neg": False,
    "disp-pos": True,
    "pos-user": (0, 1),
    "neg-user": (-1,0),
    "interpolate": True,
}
PALETTE = "magma"

# Re-sort DF
vertex_count_per_roi = control_expanded_df.groupby(["experiment_sub_id","im_code","hcp_roi"]).count().reset_index()

# Inputs
n_experiments_all = [9]
hcp_roi_count_labels = ["3TControl"]
patterns = ["3TControl.*control"]
im_codes = ["f1","f2","f2-f1","f1+f2","2f1","2f2","2f1-f2","2f2-f1"]


for hcp_roi_count_label, pattern, n_experiments in zip(hcp_roi_count_labels, patterns, n_experiments_all):
    for im_code in im_codes:
        _vertex_count_per_roi = vertex_count_per_roi[(vertex_count_per_roi.experiment_sub_id.str.contains(pattern)) & (vertex_count_per_roi.im_code==im_code)] # Filter df based on im type
        print(hcp_roi_count_label, im_code, n_experiments)
        roi_counts = _vertex_count_per_roi.groupby("hcp_roi").size().reset_index(name="row_count")
        roi_counts["row_count"] = roi_counts["row_count"] / n_experiments
        cohort_count_map = np.zeros((59412,))
        for hemi_type in ["CONTRA","IPSI"]:
            hemi_roi_counts = roi_counts[(roi_counts.hcp_roi.str.startswith(hemi_type))]
            for roi in hemi_roi_counts.hcp_roi:
                hcp_suffix = roi.split("_")[-1]
                hemi_to_map_to = hemi_dict[hemi_type]
                hcp_dscalar = f"/tmp/{hemi_to_map_to}_{hcp_suffix}_ROI.dscalar.nii"
                hcp_coords = nib.load(hcp_dscalar).get_fdata()[0,:]==1
                cohort_count_map[hcp_coords] = roi_counts[roi_counts.hcp_roi==roi].row_count.values[0]
        
        for hemisphere, flatmap, mapstyle_str in zip(["left","right","left"], [False,False,True], ["lh-surf","rh-surf","flat"]):
            png_out = f"{hcp_roi_count_label}_map-{mapstyle_str}_im-{im_code}_hcp-cohort-roi-count.png" # Save png path
            dscalar(
                png_out, cohort_count_map, 
                orientation="portrait", 
                hemisphere=hemisphere,
                palette=PALETTE,
                palette_params=palette_params,
                transparent=False,
                flatmap=flatmap,
                flatmap_style='hcp_border',
            )

clear_output()

dense

In [None]:
# Global variables
VERTEX_TO = 59412
hemi_dict = {
    "CONTRA": "L",
    "IPSI": "R",
}
palette_params = {
    "disp-zero": False,
    "disp-neg": False,
    "disp-pos": True,
    "pos-user": (0, 1),
    "neg-user": (-1,0),
    "interpolate": True,
}
PALETTE = "magma"
pattern_dict = {
    "3TNormalEntrainL": "|".join([f"_{i}_entrain" for i in ["000","002","003","004","009"]]),
    "3TNormalEntrainR": "|".join([f"_{i}_entrain" for i in ["005","006","007","008"]])
}

# Re-sort DF
dense_vertex_count = expanded_df.groupby(["experiment_sub_id","im_code"]).agg({"vertex_id": lambda x: list(x)}).reset_index()

# Inputs
n_experiments_all = [5,4,9,4,3,3,3,3]
hcp_roi_count_labels = ["3TNormalEntrainL","3TNormalEntrainR","3TNormalEntrain","7TNormalEntrain","3TVary020Entrain","7TVary020Entrain","3TVary021Entrain","7TVary021Entrain"]
patterns = [None,None,"3TNormal.*entrain","7TNormal.*AttendAway","3TVary_020.*entrain.*","7TVary_020.*entrain.*","3TVary_021.*entrain.*","7TVary_021.*entrain.*"]
im_codes = ["f1","f2","f2-f1","f1+f2","2f1","2f2","2f1-f2","2f2-f1"]


for hcp_roi_count_label, pattern, n_experiments in zip(hcp_roi_count_labels, patterns, n_experiments_all):
    for im_code in im_codes:
        if pattern is None:
            pattern = pattern_dict[hcp_roi_count_label]
            _dense_vertex_count = dense_vertex_count[(dense_vertex_count.im_code==im_code) & (dense_vertex_count.apply(lambda row: row.astype(str).str.contains(pattern).any(),axis=1))]
        else:
            _dense_vertex_count = dense_vertex_count[(dense_vertex_count.experiment_sub_id.str.contains(pattern)) & (dense_vertex_count.im_code==im_code)]
        print(hcp_roi_count_label, im_code, n_experiments)
        cohort_count_map = np.zeros((59412,))
        for row_ix, row in _dense_vertex_count.iterrows():
            for vertex_id in row.vertex_id:
                cohort_count_map[vertex_id] += 1
        cohort_count_map = cohort_count_map / n_experiments
        
        for hemisphere, flatmap, mapstyle_str in zip(["left","right","left"], [False,False,True], ["lh-surf","rh-surf","flat"]):
            png_out = f"{hcp_roi_count_label}_map-{mapstyle_str}_im-{im_code}_dense-cohort-roi-count.png" # Save png path
            dscalar(
                png_out, cohort_count_map, 
                orientation="portrait", 
                hemisphere=hemisphere,
                palette=PALETTE,
                palette_params=palette_params,
                transparent=False,
                flatmap=flatmap,
                flatmap_style='hcp_border',
            )

clear_output()

In [None]:

# Global variables
VERTEX_TO = 59412
hemi_dict = {
    "CONTRA": "L",
    "IPSI": "R",
}
palette_params = {
    "disp-zero": False,
    "disp-neg": False,
    "disp-pos": True,
    "pos-user": (0, 1),
    "neg-user": (-1,0),
    "interpolate": True,
}
PALETTE = "magma"
pattern_dict = {
    "3TNormalControlL": "|".join([f"_{i}_control" for i in ["000","002","003","004","009"]]),
    "3TNormalControlR": "|".join([f"_{i}_control" for i in ["005","006","007","008"]])
}

# Re-sort DF
dense_vertex_count = control_expanded_df.groupby(["experiment_sub_id","im_code"]).agg({"vertex_id": lambda x: list(x)}).reset_index()

# Inputs
n_experiments_all = [5,4]
hcp_roi_count_labels = ["3TNormalControlL","3TNormalControlR"]
patterns = [None,None]
im_codes = ["f1","f2","f2-f1","f1+f2","2f1","2f2","2f1-f2","2f2-f1"]


for hcp_roi_count_label, pattern, n_experiments in zip(hcp_roi_count_labels, patterns, n_experiments_all):
    for im_code in im_codes:
        if pattern is None:
            pattern = pattern_dict[hcp_roi_count_label]
            _dense_vertex_count = dense_vertex_count[(dense_vertex_count.im_code==im_code) & (dense_vertex_count.apply(lambda row: row.astype(str).str.contains(pattern).any(),axis=1))]
        else:
            _dense_vertex_count = dense_vertex_count[(dense_vertex_count.experiment_sub_id.str.contains(pattern)) & (dense_vertex_count.im_code==im_code)]
        print(hcp_roi_count_label, im_code, n_experiments)
        cohort_count_map = np.zeros((59412,))
        for row_ix, row in _dense_vertex_count.iterrows():
            for vertex_id in row.vertex_id:
                cohort_count_map[vertex_id] += 1
        cohort_count_map = cohort_count_map / n_experiments
        
        for hemisphere, flatmap, mapstyle_str in zip(["left","right","left"], [False,False,True], ["lh-surf","rh-surf","flat"]):
            png_out = f"{hcp_roi_count_label}_map-{mapstyle_str}_im-{im_code}_dense-cohort-roi-count.png" # Save png path
            dscalar(
                png_out, cohort_count_map, 
                orientation="portrait", 
                hemisphere=hemisphere,
                palette=PALETTE,
                palette_params=palette_params,
                transparent=False,
                flatmap=flatmap,
                flatmap_style='hcp_border',
            )

clear_output()