In [1]:
import numpy as np
import os
from utils import check_paths

import mne
from mne.stats import permutation_cluster_1samp_test

import scipy
from scipy.stats import zscore
from scipy.sparse import coo_matrix, save_npz

import matplotlib.pyplot as plt
import matplotlib.patches as patches
%matplotlib qt

**PLOTTING FUNCS**

In [48]:
def plot_rect_topo_from_epochs(data, epochs_info, cmap='YlGn', vmin=None, vmax=None, title=''):
    """
    Plot a rectangular grid topomap from per-channel data using layout from epochs.info.

    Parameters
    ----------
    data : array, shape (n_channels,)
        Scalar values per channel (e.g., PAC strength).
    epochs_info : instance of mne.Info
        Info object from Epochs to extract channel positions.
    cmap : str or Colormap
        Colormap to use for values.
    vmin, vmax : float
        Limits for color scaling.
    title : str
        Title for the plot.
    """
    # Get rectangular layout from MNE
    layout = mne.channels.make_eeg_layout(epochs_info)
    ch_names = layout.names
    pos_2d = layout.pos[:, :2]

    # Normalize positions to grid indices
    x_idx = np.round((pos_2d[:, 0] - np.min(pos_2d[:, 0])) / np.ptp(pos_2d[:, 0]) * 14).astype(int)
    y_idx = np.round((pos_2d[:, 1] - np.min(pos_2d[:, 1])) / np.ptp(pos_2d[:, 1]) * 14).astype(int)
    layout_grid = {ch: (y, x) for ch, x, y in zip(ch_names, x_idx, y_idx)}
    # name, pos in zip(layout.names, layout.pos)

    # Prepare grid size
    nrows = y_idx.max() + 1
    ncols = x_idx.max() + 1

    # Start plotting
    fig, ax = plt.subplots(figsize=(ncols, nrows))
    ax.set_xlim(0, ncols)
    ax.set_ylim(0, nrows)
    ax.invert_yaxis()
    ax.set_xticks([])
    ax.set_yticks([])

    norm = plt.Normalize(vmin if vmin is not None else np.nanmin(data),
                         vmax if vmax is not None else np.nanmax(data))
    sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)

    for i, ch in enumerate(ch_names):
        # if ch not in layout_grid:
        #     continue
        row, col = layout_grid[ch]
        value = data[i].T
        color = sm.to_rgba(value)
        rect = patches.Rectangle((col, row), 1, 1, facecolor=color, edgecolor='black')
        ax.add_patch(rect)
        ax.text(col + 0.5, row + 0.5, ch, ha='center', va='center', fontsize=7)

    plt.gca().invert_yaxis()
    plt.colorbar(sm, ax=ax, shrink=0.8, label='Value')
    ax.set_title(title)
    plt.tight_layout()
    plt.show()

    return fig, ax


In [49]:
def plot_matrix_topo_from_epochs(data, epochs_info, cmap='YlGn', vmin=None, vmax=None, title=''):
    """
    Plot a topographic layout of 2D matrices (e.g., PAC) per channel using epochs.info.

    Parameters
    ----------
    data : ndarray, shape (n_channels, height, width)
        A 2D matrix per channel (e.g., PAC frequency x frequency).
    epochs_info : instance of mne.Info
        Info object to extract channel layout.
    cmap : str or Colormap
        Colormap to use.
    vmin, vmax : float or None
        Color scale limits.
    title : str
        Title for the entire plot.
    """
    n_channels, h, w = data.shape

    # Get layout info
    layout = mne.channels.make_eeg_layout(epochs_info)
    ch_names = layout.names
    pos_2d = layout.pos[:, :2]

    # Normalize positions to grid indices
    x_idx = np.round((pos_2d[:, 0] - np.min(pos_2d[:, 0])) / np.ptp(pos_2d[:, 0]) * 14).astype(int)
    y_idx = np.round((pos_2d[:, 1] - np.min(pos_2d[:, 1])) / np.ptp(pos_2d[:, 1]) * 14).astype(int)
    layout_grid = {ch: (y, x) for ch, x, y in zip(ch_names, x_idx, y_idx)}

    # Grid dimensions
    nrows = y_idx.max() + 1
    ncols = x_idx.max() + 1

    # Set up figure
    fig, ax = plt.subplots(figsize=(ncols, nrows))
    ax.set_xlim(0, ncols)
    ax.set_ylim(0, nrows)
    # ax.invert_yaxis()
    ax.set_xticks([])
    ax.set_yticks([])

    # Color normalization
    norm = plt.Normalize(vmin if vmin is not None else np.nanmin(data),
                         vmax if vmax is not None else np.nanmax(data))
    sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)

    for i, ch in enumerate(ch_names):
        if ch not in layout_grid:
            continue
        row, col = layout_grid[ch]
        matrix = data[i].T

        # Plot small matrix inside rectangle
        extent = (col, col + 1, row, row + 1)
        ax.imshow(matrix, cmap=cmap, norm=norm, extent=extent, origin='lower', aspect='auto')

        # Draw frame and label
        rect = patches.Rectangle((col, row), 1, 1, fill=False, edgecolor='black', linewidth=0.5)
        ax.add_patch(rect)
        ax.text(col + 0.5, row + 0.5, ch, ha='center', va='center', fontsize=6, color='black')

    plt.colorbar(sm, ax=ax, shrink=0.7, label='Matrix Value')
    ax.set_title(title)
    plt.tight_layout()
    plt.show()

    return fig, ax


In [74]:
def plot_significant_topomap(T_obs, clusters, cluster_p_values, info, p_thresh=0.05, vlim=(-4, 4),
                             group=None, task=None, task_stage=None, block_name=None):
    """
    Plots a topomap highlighting electrodes in significant clusters.

    Parameters
    ----------
    T_obs : array, shape (n_channels,)
        Observed T-values.
    clusters : list of boolean arrays
        Cluster masks (n_channels,).
    cluster_p_values : array
        P-values for each cluster.
    info : instance of mne.Info
        EEG info with channel locations.
    p_thresh : float
        Significance threshold.
    title : str
        Title for the plot.
    """

    task  = ['' if task==None else task][0]
    task_stage = ['' if task_stage==None else task_stage][0]
    block_name = ['' if block_name==None else block_name][0]

    # Combine significant cluster masks
    sig_mask = np.zeros_like(T_obs, dtype=bool)
    for cluster, p_val in zip(clusters, cluster_p_values):
        if p_val <= p_thresh:
            sig_mask |= cluster

    # Get color limits manually
    if vlim is None:
        vlim = np.nanmax(np.abs(T_obs))

    title = f'{group}_{task}{task_stage}{block_name}: Significant Electrodes'
    # Start plotting
    fig, ax = plt.subplots()

    # Plot topomap with significant electrodes highlighted
    im, _ = mne.viz.plot_topomap(
        T_obs,
        info,
        cmap='PiYG',
        vlim=vlim,
        show=False,
        mask=sig_mask,
        mask_params=dict(marker='h', markersize=15, 
                         markerfacecolor='y',
                         markeredgecolor='k'),
        contours=0,
        axes=ax,
        sensors=False # Hide insignificant sensors
    )

    plt.colorbar(im, ax=ax, shrink=0.6, label='T-value')
    ax.set_title(title)
    plt.tight_layout()
    plt.show()

**TWO CONDITIONS**

In [62]:
# Conditions definition: name -> (task, stage, block)
conditions = {
    "BL_go": ("_BL", "_go", None),
    "BL_plan": ("_BL", "_plan", None),

    "MAIN_go_baseline": ("_MAIN", "_go", "_baseline"),
    "MAIN_plan_baseline": ("_MAIN", "_plan", "_baseline"),

    "MAIN_go_adaptation": ("_MAIN", "_go", "_adaptation"),
    "MAIN_plan_adaptation": ("_MAIN", "_plan", "_adaptation"),

    "MAIN_go_combined": ("_MAIN", "_go", None),     # both blocks
    "MAIN_plan_combined": ("_MAIN", "_plan", None)  # both blocks
}

comparisons = [
    ("BL_go", "BL_plan"),
    ("MAIN_go_baseline", "MAIN_plan_baseline"),
    ("MAIN_go_adaptation", "MAIN_plan_adaptation"),
    ("MAIN_go_combined", "MAIN_plan_combined"),
    ("MAIN_go_combined", "BL_go"),
    ("MAIN_plan_combined", "BL_plan")
]


In [63]:
def load_condition_data(eeg_data_dir, subjects, condition_key, conditions):
    """Load PAC data for one condition, given subject-specific pac_dirs."""
    task, stage, block = conditions[condition_key]
    pac_list = []
    pac_zscore_list = []

    for sub_name in subjects:
        # print(f'Loading {sub_name} data for {task, stage, block}...')
        sub_dir = os.path.join(eeg_data_dir, sub_name)
        pac_dir = os.path.join(sub_dir, "pac_results")

        if block is None and task == "_MAIN":
            # Combine both blocks for MAIN
            blocks = ["_baseline", "_adaptation"]
            block_data = []
            for b in blocks:
                pac = np.load(os.path.join(pac_dir, f"pac_mi_TOPO_{sub_name[-5:]}{task}{stage}{b}.npy"))
                pac_t = np.transpose(pac, (1, 0, 2))
                block_data.append(pac_t)
            blocks_arr = np.stack(block_data, axis=0)
            blocks_arr_ch_ave = np.mean(blocks_arr, axis=0) # average across blocks for each channel to keep data shape consistent
            # print(f'Blocks stacked shape: {blocks_arr_ch_ave.shape}')
            pac_list.append(blocks_arr_ch_ave)
            pac_zscore_list.append(zscore(blocks_arr_ch_ave))
        else:
            # Single block or BL task
            bname = "" if block is None else block
            pac = np.load(os.path.join(pac_dir, f"pac_mi_TOPO_{sub_name[-5:]}{task}{stage}{bname}.npy"))
            pac_t = np.transpose(pac, (1, 0, 2))
            pac_list.append(pac_t)
            pac_zscore_list.append(zscore(pac_t))

    # Stack them along a new first axis (subject axis)
    pac_all = np.stack(pac_list, axis=0)

    # Z-score the PAC data across subjects
    pac_zscore_all = np.stack(pac_zscore_list, axis=0)
    # print('PAC array shape:', pac_all.shape) # (24, 60, 20, 20) subs x electrodes x ph_freqs x amp_freqs
    print('z-scored PAC array shape:', pac_zscore_all.shape)

    return pac_zscore_all

def load_two_conditions(eeg_data_dir, subjects, cond1, cond2, conditions):
    """Load data for two conditions for all participants."""
    # print(f'Loading {cond1} vs {cond2}...')
    data1 = load_condition_data(eeg_data_dir, subjects, cond1, conditions)
    data2 = load_condition_data(eeg_data_dir, subjects, cond2, conditions)
    # print(data1.shape, data2.shape)
    return data1, data2

def iterate_comparisons(eeg_data_dir, subjects, conditions, comparisons):
    """Iterate through comparisons and yield loaded data."""
    for cond1, cond2 in comparisons:
        data1, data2 = load_two_conditions(eeg_data_dir, subjects, cond1, cond2, conditions)
        yield cond1, cond2, data1, data2


# SENSORS

In [76]:
group = 'O'
eeg_data_dir = f'D:\\BonoKat\\research project\\# study 1\\eeg_data\\set\\{group}'
group_data_dir = f'D:\\BonoKat\\research project\\# study 1\\eeg_data\\set\\{group}'
subjects = os.listdir(eeg_data_dir)

# Create directories for saving stats
group_save_path = f'D:\\BonoKat\\research project\\# study 1\\eeg_data\\set\\{group} group'
pac_stats_save_path = os.path.join(group_save_path, 'pac_stats', 'conditions')
check_paths(pac_stats_save_path)

# Create directories for saving figures
fig_group_path = os.path.join(group_save_path, 'pac_stats', 'figs')
fig_group_save_path = os.path.join(fig_group_path, group)
fig_task_save_path = os.path.join(fig_group_path, 'conditions')
check_paths(fig_task_save_path)

############### CREATE ADJACENCY MATRIX FOR STATISTICAL TEST #############
# find_ch_adjacency first attempts to find an existing "neighbor"
# (adjacency) file for given sensor layout.
# If such a file doesn't exist, an adjacency matrix is computed on the fly,
# using Delaunay triangulations.

# Load one epoch file to get info
epochs_path = os.path.join(eeg_data_dir, subjects[0], 'preproc', 'analysis')
epochs = mne.read_epochs(os.path.join(epochs_path, f"{subjects[0]}_BL_epochs_plan-epo.fif"), preload=True)
eeg_channel_names = epochs.copy().pick("eeg").ch_names
epochs.pick(eeg_channel_names)

# Create channel adjacency matrix
sensor_adjacency, ch_names = mne.channels.find_ch_adjacency(epochs.info, "eeg")
print(f'Adjacency matrix shape: {sensor_adjacency.shape}')

############### RUN STATISTICAL COMPARISON #############
for cond1, cond2, data1, data2 in iterate_comparisons(eeg_data_dir, subjects, conditions, comparisons):
    print(f"Running stats for {cond1} vs {cond2}...")

    pac_zscore_diff = data1 - data2
    print('z-scored diff PAC array shape:', pac_zscore_diff.shape) # (24, 60, 20, 20) subs x electrodes x ph_freqs x amp_freqs

    # Averafe z-scored PAC over phase and amplitude frequencies
    pac_zscore_diff_ave = np.mean(pac_zscore_diff, axis=(2, 3)) # (24, 60) subs x electrodes
    print(pac_zscore_diff_ave.shape)

    # # Save the PAC data
    np.save(os.path.join(pac_stats_save_path, f"pac_mi_{group}_{cond1}_vs_{cond2}_ZSCORE_freqs_ave.npy"), pac_zscore_diff_ave)

    ############# PLOT AND SAVE Z-SCORED PAC AVERAGED ACROSS PARTICIPANTS #############
    pac_plot, ax1 = plot_rect_topo_from_epochs(np.mean(pac_zscore_diff_ave, axis=(0)), epochs.info,
                                            title=f'{group}_{cond1}_vs_{cond2}: Averaged z-scored PAC MI',
                                            cmap='PiYG', vmin=-0.5, vmax=0.5)
    plt.savefig(os.path.join(fig_task_save_path, f"pac_mi_{group}_{cond1}_vs_{cond2}_PAC_MI_AVE_TOPO.png"), dpi=300)

    pac_ave_plot, ax2 = plot_matrix_topo_from_epochs(np.mean(pac_zscore_diff, axis=(0)), epochs.info,
                                                    title=f'{group}_{cond1}_vs_{cond2}: z-scored PAC MI',
                                                    cmap='PiYG', vmin=-0.5, vmax=0.5)
    plt.savefig(os.path.join(fig_task_save_path, f"pac_mi_{group}_{cond1}_vs_{cond2}_PAC_MI_TOPO.png"), dpi=300)


    ############# RUN CLUSTER-BASED PERMUTATION TEST #############
    tail = 0 # two-tailed test

    # Set the threshold for including data bins in clusters with t-value corresponding to p=0.01
    # Because we conduct a two-tailed test, we divide the p-value by 2 (which means we're making use of both tails of the distribution).
    # As the degrees of freedom, we specify the number of observations (here: subjects) minus 1.
    # Finally, we subtract 0.01 / 2 from 1, to get the critical t-value on the right tail
    degrees_of_freedom = pac_zscore_diff.shape[0] - 1
    t_thresh = scipy.stats.t.ppf(1 - 0.01 / 2, df=degrees_of_freedom)

    #!
    # threshold_tfce = dict(start=0, step=0.2) # Threshold-free cluster enhancement (TFCE) - more conservative, similar results

    # Set the number of permutations
    n_permutations = 10000

    # Run the analysis
    T_obs, clusters, cluster_p_values, H0 = permutation_cluster_1samp_test(
        pac_zscore_diff_ave,
        n_permutations=n_permutations,
        threshold=t_thresh,
        tail=tail,
        adjacency=sensor_adjacency,
        out_type="mask",
        max_step=1,
        verbose=True,
    )

    # # Save the results
    np.save(os.path.join(pac_stats_save_path, f"pac_mi_{group}_{cond1}_vs_{cond2}_freqs_ave_T_obs.npy"), T_obs)
    np.save(os.path.join(pac_stats_save_path, f"pac_mi_{group}_{cond1}_vs_{cond2}_freqs_ave_clusters.npy"), np.array(clusters, dtype=object))
    np.save(os.path.join(pac_stats_save_path, f"pac_mi_{group}_{cond1}_vs_{cond2}_freqs_ave_cluster_p_values.npy"), cluster_p_values)
    np.save(os.path.join(pac_stats_save_path, f"pac_mi_{group}_{cond1}_vs_{cond2}_freqs_ave_H0.npy"), H0)

    # SANITY CHECKS
    print(f't_thresh = {t_thresh}')
    print(f'T_obs_mean = {T_obs.mean()}')
    print(f'cluster_p_values = {cluster_p_values}')

    alpha = 0.05  # significance threshold
    significant_clusters = [i for i, p in enumerate(cluster_p_values) if p < alpha]
    print(f"Found {len(significant_clusters)} significant clusters")


    ####### PLOT THE RESULTS #######
    plot_significant_topomap(T_obs, clusters, cluster_p_values, epochs.info, group=group, task=f'{cond1}_vs_{cond2}')
    plt.savefig(os.path.join(fig_task_save_path, f"pac_cluster_stats_{group}_{cond1}_vs_{cond2}_freq_ave_TOPO.png"), dpi=300)

Reading D:\BonoKat\research project\# study 1\eeg_data\set\O\s1_pac_sub12\preproc\analysis\s1_pac_sub12_BL_epochs_plan-epo.fif ...
    Read a total of 1 projection items:
        Average EEG reference (1 x 60) active
    Found the data of interest:
        t =    -500.00 ...     500.00 ms
        0 CTF compensation matrices available
Adding metadata with 10 columns
102 matching events found
No baseline correction applied
Created an SSP operator (subspace dimension = 1)
1 projection items activated
Could not find a adjacency matrix for the data. Computing adjacency based on Delaunay triangulations.
-- number of adjacent vertices : 60
Adjacency matrix shape: (60, 60)
z-scored PAC array shape: (24, 60, 20, 20)
z-scored PAC array shape: (24, 60, 20, 20)
Running stats for BL_go vs BL_plan...
z-scored diff PAC array shape: (24, 60, 20, 20)
(24, 60)
stat_fun(H1): min=-3.3557483125625653 max=3.038159054736123
Running initial clustering …
Found 3 clusters


  0%|          | Permuting : 0/9999 [00:00<?,       ?it/s]

t_thresh = 2.8073356837675227
T_obs_mean = -0.09311410184643125
cluster_p_values = [0.2111 0.0037 0.1216]
Found 1 significant clusters


  fig, ax = plt.subplots()


z-scored PAC array shape: (24, 60, 20, 20)
z-scored PAC array shape: (24, 60, 20, 20)
Running stats for MAIN_go_baseline vs MAIN_plan_baseline...
z-scored diff PAC array shape: (24, 60, 20, 20)
(24, 60)
stat_fun(H1): min=-3.763331328120345 max=2.191138401812913
Running initial clustering …
Found 1 cluster


  0%|          | Permuting : 0/9999 [00:00<?,       ?it/s]

t_thresh = 2.8073356837675227
T_obs_mean = -0.12808056339509985
cluster_p_values = [0.0003]
Found 1 significant clusters
z-scored PAC array shape: (24, 60, 20, 20)
z-scored PAC array shape: (24, 60, 20, 20)
Running stats for MAIN_go_adaptation vs MAIN_plan_adaptation...
z-scored diff PAC array shape: (24, 60, 20, 20)
(24, 60)
stat_fun(H1): min=-3.909610482642881 max=2.9049820564453404
Running initial clustering …
Found 3 clusters


  0%|          | Permuting : 0/9999 [00:00<?,       ?it/s]

t_thresh = 2.8073356837675227
T_obs_mean = -0.34243597901827283
cluster_p_values = [0.0007 0.0024 0.2267]
Found 2 significant clusters
z-scored PAC array shape: (24, 60, 20, 20)
z-scored PAC array shape: (24, 60, 20, 20)
Running stats for MAIN_go_combined vs MAIN_plan_combined...
z-scored diff PAC array shape: (24, 60, 20, 20)
(24, 60)
stat_fun(H1): min=-4.3429072117575425 max=3.103761879630875
Running initial clustering …
Found 2 clusters


  0%|          | Permuting : 0/9999 [00:00<?,       ?it/s]

t_thresh = 2.8073356837675227
T_obs_mean = -0.24811695135131515
cluster_p_values = [2.139e-01 1.000e-04]
Found 1 significant clusters
z-scored PAC array shape: (24, 60, 20, 20)
z-scored PAC array shape: (24, 60, 20, 20)
Running stats for MAIN_go_combined vs BL_go...
z-scored diff PAC array shape: (24, 60, 20, 20)
(24, 60)
stat_fun(H1): min=-3.9138399866958875 max=2.815817050403034
Running initial clustering …
Found 3 clusters


  0%|          | Permuting : 0/9999 [00:00<?,       ?it/s]

t_thresh = 2.8073356837675227
T_obs_mean = -0.1402274597676461
cluster_p_values = [0.3235 0.0052 0.2743]
Found 1 significant clusters
z-scored PAC array shape: (24, 60, 20, 20)
z-scored PAC array shape: (24, 60, 20, 20)
Running stats for MAIN_plan_combined vs BL_plan...
z-scored diff PAC array shape: (24, 60, 20, 20)
(24, 60)
stat_fun(H1): min=-2.1100181369024886 max=3.60430421220729
Running initial clustering …
Found 1 cluster


  0%|          | Permuting : 0/9999 [00:00<?,       ?it/s]

t_thresh = 2.8073356837675227
T_obs_mean = -0.04830961938546022
cluster_p_values = [0.0059]
Found 1 significant clusters


_________________________________

# SOURCES