## BL SOURCE-LEVEL PAC AND STATS

**This script:**
1. Creates theta-gamma PAC comodulograms for condition (all vertices) for each subject and saves PAC data as numpy array
2. Runs cluster-besed permutation test on PAC data

**OUTCOME: PAC estimates for all vertices for each subject and statistical assessment of the results**

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import mne
import os
from utils import check_paths
import pandas as pd
from scipy.io import loadmat
import joblib
import matplotlib.gridspec as gridspec

from pactools import Comodulogram, REFERENCES, raw_to_mask

from mne.channels.layout import find_layout
from functools import partial
from mne.defaults import _handle_default

from mne.viz.topo import _erfimage_imshow_unified, _plot_topo

from mne.viz.utils import (
    _setup_vmin_vmax,
    add_background_image
)
from collections import namedtuple

from mne.stats import permutation_cluster_1samp_test

import scipy
from scipy.stats import zscore

%matplotlib qt

1. PAC analysis per condition per subject

In [2]:
eeg_data_dir = 'D:\\BonoKat\\research project\\# study 1\\eeg_data\\set'
group = 'Y'
subs = os.listdir(os.path.join(eeg_data_dir, group))
tasks = ['_BL'] # ['_BL', '_MAIN']
task_stages = ['_plan', '_go'] # ['_plan', '_go']

theta_range = np.linspace(4, 8, 20)  # Phase: 4-8 Hz
gamma_range = np.linspace(30, 80, 20)  # Amplitude: 30-80 Hz

tmin = 0.0
tmax = 0.495

In [4]:
subs[16:]

['s1_pac_sub63',
 's1_pac_sub64',
 's1_pac_sub66',
 's1_pac_sub67',
 's1_pac_sub71',
 's1_pac_sub76',
 's1_pac_sub77']

In [None]:
for sub_name in subs[16:]:
    print(f"Processing subject: {sub_name}")
    analysis_dir = os.path.join(eeg_data_dir, group, sub_name, 'preproc', 'analysis')
    pac_save_path = os.path.join(analysis_dir, 'source', 'PAC')
    check_paths(pac_save_path)

    for task in tasks:
        for task_stage in task_stages: # task_stages #############################
            if task == '_BL':
                stcs_path = os.path.join(analysis_dir, 'source', 'morphed_stcs', task, task_stage) 

                stcs = []
                stcs_data = []

                for stc_file in os.listdir(stcs_path):
                    if stc_file.endswith('-rh.stc'): # MNE will load both hemispheres anyway
                        stc_path = os.path.join(stcs_path, stc_file)
                        stc = mne.read_source_estimate(stc_path, subject=sub_name)
                        stc.crop(tmin=tmin, tmax=tmax)
                        stcs.append(stc)
                        stcs_data.append(stc.data)

                source_array = np.stack(stcs_data, axis=0)
                print(source_array.shape)  # (epochs x vertices x time)

                times = stcs[0].times

                #Estimate PAC
                estimator = Comodulogram(
                    fs=stcs[0].sfreq,
                    low_fq_range=theta_range,  # Phase frequencies (theta)
                    high_fq_range=gamma_range, # Amplitude frequencies (gamma)
                    method='tort',
                    progress_bar=True
                    )

                pac_results = np.empty(
                    (len(theta_range), source_array.shape[1], len(gamma_range))
                )

                for i in range(source_array.shape[1]):
                    print(f"Processing source {i+1}/{source_array.shape[1]}")

                    data_flat = np.reshape(source_array[:, i], -1)[None, :]
                    pac = estimator.fit(
                            data_flat,
                            data_flat,
                        )
                    pac_results[:, i] = pac.comod_

                    # if i in range(10):
                    #     # Convert the plot to a Plotly figure (if supported)
                    #     fig = pac.plot(tight_layout=False, cmap='magma')
                    #     # Add a title
                    #     plt.title(f"PAC MI {sub_name[-5:]} - source={i}: {task}{task_stage}")

                    #     # Save the plot
                    #     plt.show()

                np.save(os.path.join(pac_save_path, f"PAC_MI_SOURCE_{sub_name[-5:]}{task}{task_stage}.npy"), pac_results)
        else:
            continue

Processing subject: s1_pac_sub63
(98, 5124, 249)
Processing source 1/5124
[........................................] 100% | 0.24 sec | comodulogram: tort 
Processing source 2/5124
[........................................] 100% | 0.24 sec | comodulogram: tort 
Processing source 3/5124
[........................................] 100% | 0.24 sec | comodulogram: tort 
Processing source 4/5124
[........................................] 100% | 0.23 sec | comodulogram: tort 
Processing source 5/5124
[........................................] 100% | 0.25 sec | comodulogram: tort 
Processing source 6/5124
[........................................] 100% | 0.24 sec | comodulogram: tort 
Processing source 7/5124
[........................................] 100% | 0.25 sec | comodulogram: tort 
Processing source 8/5124
[........................................] 100% | 0.24 sec | comodulogram: tort 
Processing source 9/5124
[........................................] 100% | 0.23 sec | comodulogram: tor

  amplitude_dist * np.log(amplitude_dist * n_bins))
  amplitude_dist * np.log(amplitude_dist * n_bins))


[........................................] 100% | 0.23 sec | comodulogram: tort 
Processing source 601/5124
[........................................] 100% | 0.23 sec | comodulogram: tort 
Processing source 602/5124
[........................................] 100% | 0.23 sec | comodulogram: tort 
Processing source 603/5124
[........................................] 100% | 0.24 sec | comodulogram: tort 
Processing source 604/5124
[........................................] 100% | 0.22 sec | comodulogram: tort 
Processing source 605/5124
[........................................] 100% | 0.24 sec | comodulogram: tort 
Processing source 606/5124
[........................................] 100% | 0.23 sec | comodulogram: tort 
Processing source 607/5124
[........................................] 100% | 0.23 sec | comodulogram: tort 
Processing source 608/5124
[........................................] 100% | 0.23 sec | comodulogram: tort 
Processing source 609/5124
[...........................

_________________

In [None]:
# Plotting the first 10 vertices of the first epoch
plt.figure(figsize=(10, 6))

for i in range(10):
    plt.plot(times, source_array[0, i], label=f'Vertex {i}')


____________

# STATISTICS
**Cluster-based permutation test**

In [3]:
eeg_data_dir = 'D:\\BonoKat\\research project\\# study 1\\eeg_data\\set'

groups = ['Y']
task = '_BL' # ['_BL', '_MAIN']
task_stages = ['_plan', '_go']
block_names = ['_baseline', '_adaptation']

**STATS FOR DATA AVERAGED ACROSS PHASE AND AMPLITUDE FREQUENCIES**

**PER CONDITION**

In [4]:
# Main script to process PAC data and run cluster-based permutation tests

############### CREATE ADJACENCY MATRIX FOR STATISTICAL TEST #############
# find_ch_adjacency first attempts to find an existing "neighbor"
# (adjacency) file for given sensor layout.
# If such a file doesn't exist, an adjacency matrix is computed on the fly,
# using Delaunay triangulations.
src_fname = 'D:\\BonoKat\\research project\\# study 1\\mri_data\\fs_output\\freesurfer\\sub_dir\\Y\\fsaverage_bem\\bem\\fsaverage-ico4-src.fif'
src = mne.read_source_spaces(src_fname)
# src.plot(subjects_dir='D:\\BonoKat\\research project\\# study 1\\mri_data\\fs_output\\freesurfer\\sub_dir\\Y')
source_adjacency = mne.spatial_src_adjacency(src)
adj = source_adjacency.tocsr()  # Ensure adjacency is CSR format for fast indexing
print('adjacency shape:', source_adjacency.shape)


############## PROCESS PAC DATA FOR EACH GROUP AND TASK STAGE #############
for group in groups:
    group_save_path = os.path.join(eeg_data_dir, f'{group} group')
    pac_stats_save_path = os.path.join(group_save_path, 'source_pac_stats')
    check_paths(pac_stats_save_path)
    
    # # Create directories for saving figures
    # fig_group_path = os.path.join(pac_stats_save_path, 'figs')
    # fig_group_save_path = os.path.join(fig_group_path, group)
    # fig_task_save_path = os.path.join(fig_group_path, group, task)
    # check_paths(fig_group_path, fig_group_save_path, fig_task_save_path)

    subs = os.listdir(os.path.join(eeg_data_dir, group))

    for task_stage in task_stages: # [task_stages[0]]

        print(f'Processing {group} group, {task} task, {task_stage} stage')

        ############# STACK PAC DATA OF INDIVIDUAL PARTICIPANTS #############

        # Create a list to store the PAC data for each subject
        pac_list = []
        pac_zscore_list = []

        for sub_name in subs:

            sub_dir = os.path.join(eeg_data_dir, group, sub_name)
            pac_dir = os.path.join(sub_dir, 'preproc', 'analysis', 'source', 'PAC')

            # Load PAC data
            pac = np.load(os.path.join(pac_dir, f"PAC_MI_SOURCE_{sub_name[-5:]}{task}{task_stage}.npy"))
            pac_t = np.transpose(pac, (1, 0, 2))

            ### NAN Imputation for PAC Matrices ###
            pac_imputed = pac_t.copy()

            # Step 1: Detect vertices with any NaN in their 20×20 PAC
            nan_mask = np.isnan(pac_t).any(axis=(1, 2))  # shape (5124,)

            if nan_mask.any() == True:
                print(f"Found {nan_mask.sum()} vertices with NaN values.")

                # Step 2: Impute NaNs from neighbors
                for vtx in np.where(nan_mask)[0]:
                    neighbors = adj[[vtx]].indices
                    valid_neighbors = [n for n in neighbors if not nan_mask[n]]
                    # Average PAC matrices from neighbors
                    pac_imputed[vtx] = np.nanmean(pac_t[valid_neighbors], axis=0)

                print(f"NaN imputation for {sub_name} complete.")

            pac_list.append(pac_imputed)
            pac_zscore_list.append(zscore(pac_imputed, axis=0, nan_policy='omit')) # 'omit' ignores NaN values in the z-score calculation

        # Stack them along a new first axis (subject axis)
        pac_all = np.stack(pac_list, axis=0)

        # Z-score the PAC data across subjects
        pac_zscore_all = np.stack(pac_zscore_list, axis=0)
        # pac_zscore_all = zscore(pac_all, axis=1, nan_policy='omit') # another way to zscore the data
        print('PAC array shape:', pac_all.shape) # subs x electrodes x ph_freqs x amp_freqs
        print('z-scored PAC array shape:', pac_zscore_all.shape)

        # Averafe z-scored PAC over phase and amplitude frequencies
        # pac_zscore_all_ave = np.mean(pac_zscore_all, axis=(2, 3)) # (23, 5124) subs x electrodes
        # pac_zscore_all_med = np.median(pac_zscore_all, axis=(2, 3)) # produces more significant clusters

        ### Global normalization
        pac_all_ave = np.mean(pac_all, axis=(2, 3))
        pac_zscore_all_ave = (pac_all_ave - np.nanmean(pac_all_ave)) / np.nanstd(pac_all_ave)
        ###

        # # Save the PAC data
        np.save(os.path.join(pac_stats_save_path, f"PAC_MI_SOURCE_{group}{task}{task_stage}_ZSCORE_freqs_ave.npy"), pac_zscore_all_ave)

        # # ############# PLOT AND SAVE Z-SCORED PAC AVERAGED ACROSS PARTICIPANTS #############
        # pac_plot, ax1 = plot_rect_topo_from_epochs(np.mean(pac_zscore_all_ave, axis=(0)), epochs.info,
        #                                         title=f'{group}{task}{task_stage}{block_name}: Averaged z-scored PAC MI',
        #                                         cmap='PiYG', vmin=-0.5, vmax=0.5)
        # plt.savefig(os.path.join(fig_task_save_path, f"pac_mi_{group}{task}{task_stage}{block_name}_PAC_MI_AVE_TOPO.png"), dpi=300)

        # pac_ave_plot, ax2 = plot_matrix_topo_from_epochs(np.mean(pac_zscore_all, axis=(0)), epochs.info,
        #                                                 title=f'{group}{task}{task_stage}{block_name}: z-scored PAC MI',
        #                                                 cmap='PiYG', vmin=-0.5, vmax=0.5)
        # plt.savefig(os.path.join(fig_task_save_path, f"pac_mi_{group}{task}{task_stage}{block_name}_PAC_MI_TOPO.png"), dpi=300)


        ############# RUN CLUSTER-BASED PERMUTATION TEST #############

        tail = 0 # two-tailed test

        # Set the threshold for including data bins in clusters with t-value corresponding to p=0.01
        # Because we conduct a two-tailed test, we divide the p-value by 2 (which means we're making use of both tails of the distribution).
        # As the degrees of freedom, we specify the number of observations (here: subjects) minus 1.
        # Finally, we subtract 0.01 / 2 from 1, to get the critical t-value on the right tail
        p_threshold = 0.01
        degrees_of_freedom = pac_all.shape[0] - 1
        t_thresh = scipy.stats.t.ppf(1 - p_threshold / 2, df=degrees_of_freedom)

        #!
        # threshold_tfce = dict(start=0, step=0.2) # Threshold-free cluster enhancement (TFCE) - more conservative, similar results

        # Set the number of permutations
        n_permutations = 10000

        # Run the analysis
        T_obs, clusters, cluster_p_values, H0 = permutation_cluster_1samp_test(
            pac_zscore_all_ave,
            n_permutations=n_permutations,
            threshold=t_thresh, # None - default threshold based on t-distribution
            tail=tail,
            adjacency=adj,
            step_down_p=0.05,  # step-down p-value correction instead of max stats
            out_type="mask",
            max_step=1,
            n_jobs=-1 # to use all available CPU cores
        )

        # # Save the results
        np.save(os.path.join(pac_stats_save_path, f"PAC_MI_SOURCE_{group}{task}{task_stage}_freqs_ave_T_obs.npy"), T_obs)
        np.save(os.path.join(pac_stats_save_path, f"PAC_MI_SOURCE_{group}{task}{task_stage}_freqs_ave_clusters.npy"), np.array(clusters, dtype=object))
        np.save(os.path.join(pac_stats_save_path, f"PAC_MI_SOURCE_{group}{task}{task_stage}_freqs_ave_cluster_p_values.npy"), cluster_p_values)
        np.save(os.path.join(pac_stats_save_path, f"PAC_MI_SOURCE_{group}{task}{task_stage}_freqs_ave_H0.npy"), H0)

        # SANITY CHECKS
        print(f't_thresh = {t_thresh}')
        print(f'T_obs_mean = {T_obs.mean()}')
        print(f'cluster_p_values = {cluster_p_values}')

        alpha = 0.05  # significance threshold
        significant_clusters = [i for i, p in enumerate(cluster_p_values) if p < alpha]
        print(f"Condition {task_stage}: Found {len(significant_clusters)} significant clusters out of {len(cluster_p_values)} total clusters.")


        # ####### PLOT THE RESULTS #######
        # plot_significant_topomap(T_obs, clusters, cluster_p_values, epochs.info, group=group, task=task, task_stage=task_stage, block_name=block_name)
        # plt.savefig(os.path.join(fig_task_save_path, f"pac_cluster_stats_{group}{task}{task_stage}_freq_ave_TOPO.png"), dpi=300)

    Reading a source space...
    Computing patch statistics...
    Patch information added...
    [done]
    Reading a source space...
    Computing patch statistics...
    Patch information added...
    [done]
    2 source spaces read
-- number of adjacent vertices : 5124
adjacency shape: (5124, 5124)
Processing Y group, _BL task, _plan stage
Found 5 vertices with NaN values.
NaN imputation for s1_pac_sub01 complete.
Found 3 vertices with NaN values.
NaN imputation for s1_pac_sub07 complete.
Found 3 vertices with NaN values.
NaN imputation for s1_pac_sub10 complete.
Found 13 vertices with NaN values.
NaN imputation for s1_pac_sub11 complete.
Found 9 vertices with NaN values.
NaN imputation for s1_pac_sub22 complete.
Found 17 vertices with NaN values.
NaN imputation for s1_pac_sub24 complete.
Found 15 vertices with NaN values.
NaN imputation for s1_pac_sub26 complete.
Found 17 vertices with NaN values.
NaN imputation for s1_pac_sub29 complete.
Found 9 vertices with NaN values.
NaN imp

  0%|          | Permuting : 0/9999 [00:00<?,       ?it/s]

Step-down-in-jumps iteration #1 found 1 cluster to exclude from subsequent iterations


  0%|          | Permuting : 0/9999 [00:00<?,       ?it/s]

Step-down-in-jumps iteration #2 found 0 additional clusters to exclude from subsequent iterations
t_thresh = 2.818756060596369
T_obs_mean = -0.13270681161022274
cluster_p_values = [0.8343 0.5975 0.9991 0.9955 0.9957 0.9937 0.9849 0.9983 0.9993 0.6229
 0.9637 0.6387 0.0258 0.9993 0.9468 0.1773 0.9981 0.9437 0.8722 0.9488
 0.9827 0.443  0.7675 0.9989 0.9991 0.9991 0.9976 0.9811 0.997  0.9994
 0.8617 0.6057 0.9981 0.9943 0.7198 0.1992 0.5123 0.9987 0.9824 0.8119
 0.9994 0.9991 0.8713 0.9994 0.7255 0.9683 0.9982 0.4899 0.8325 0.8694
 0.9669 0.9938 0.9993 0.8911 0.8754 0.9835 0.9981]
Condition _plan: Found 1 significant clusters out of 57 total clusters.
Processing Y group, _BL task, _go stage
Found 5 vertices with NaN values.
NaN imputation for s1_pac_sub01 complete.
Found 3 vertices with NaN values.
NaN imputation for s1_pac_sub07 complete.
Found 3 vertices with NaN values.
NaN imputation for s1_pac_sub10 complete.
Found 13 vertices with NaN values.
NaN imputation for s1_pac_sub11 complet

  0%|          | Permuting : 0/9999 [00:00<?,       ?it/s]

Step-down-in-jumps iteration #1 found 0 clusters to exclude from subsequent iterations
t_thresh = 2.818756060596369
T_obs_mean = -0.2694719219444982
cluster_p_values = [0.6116 0.6915 0.9944 0.6275 0.8469 0.99   0.6611 0.9962 0.8289 0.9413
 0.996  0.9943 0.1385 0.9342 0.6507 0.4575 0.9929 0.6113 0.624  0.9924
 0.7682 0.8216 0.1162 0.5946 0.8547 0.7218 0.9408 0.3775 0.0823 0.996
 0.6418 0.6892 0.3351 0.991  0.9884 0.6294 0.8277 0.9886 0.9963 0.8303]
Condition _go: Found 0 significant clusters out of 40 total clusters.


______________________________

Dirty field

PLOTTING PAC DISTRIBUTIONS

In [None]:
# Global distribution of all PAC values across all subjects and vertices
# Flatten all PAC values into a 1D array
all_pac_values = pac_all.flatten()

# Remove NaNs if present
all_pac_values = all_pac_values[~np.isnan(all_pac_values)]

# Plot histogram
plt.figure(figsize=(8, 5))
plt.hist(all_pac_values, bins=100, color='skyblue', edgecolor='k')
plt.title("Distribution of all PAC z-scores")
plt.xlabel("PAC z-score")
plt.ylabel("Count")
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# Distribution per subject (e.g., mean PAC per subject)
# Mean PAC per subject, averaged over all space and frequencies
subject_means = np.nanmean(pac_all, axis=(1, 2, 3))  # shape (23,)

plt.figure(figsize=(8, 5))
plt.hist(subject_means, bins=15, color='coral', edgecolor='k')
plt.title("Mean PAC z-scores per subject")
plt.xlabel("Mean PAC z-score")
plt.ylabel("Count")
plt.grid(True)
plt.tight_layout()
plt.show()


In [21]:
# Violin plot: per-subject PAC distributions
import seaborn as sns

# Sample 5000 random PAC values per subject (if needed for speed)
np.random.seed(42)
subset = [pac_all[i].flatten() for i in range(23)]
subset = [x[~np.isnan(x)] for x in subset]
subset = [np.random.choice(x, 5000, replace=False) if len(x) > 5000 else x for x in subset]

# Create DataFrame for seaborn
import pandas as pd
df = pd.DataFrame({
    'PAC': np.concatenate(subset),
    'Subject': np.concatenate([[f'Subj {i+1}'] * len(x) for i, x in enumerate(subset)])
})

plt.figure(figsize=(12, 6))
sns.violinplot(data=df, x='Subject', y='PAC', inner='box')
plt.title("PAC value distribution per subject")
plt.ylabel("PAC z-score")
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()


In [18]:
np.min(pac_all)

np.float64(5.687941503192036e-07)