In [144]:
import numpy as np
import os
from utils import check_paths

import mne
from mne.stats import permutation_cluster_1samp_test

import scipy
from scipy.stats import zscore
from scipy.sparse import coo_matrix, save_npz

import matplotlib.pyplot as plt
%matplotlib qt

**Interpretaion of z-scored PAC stats:**
Negative z-scores don’t mean “negative PAC,” just “PAC lower than the group mean.”

**Significant cluster interpretaition:**
Talking about “significant clusters” can be convenient, but you must be aware of all associated caveats! For example, it is invalid to interpret the cluster p value as being spatially or temporally specific. A cluster with sufficiently low (for example < 0.05) p value at specific location does not allow you to say that the significant effect is at that particular location. The p value only tells you about the probability of obtaining similar or stronger/larger cluster anywhere in the data if there were no differences between the compared conditions. So it only allows you to draw conclusions about the differences in the data “in general”, not at specific locations.

**How NOT to interpret results from a cluster-based permutation test:**
https://www.fieldtriptoolbox.org/faq/stats/clusterstats_interpretation/

In [141]:
eeg_data_dir = 'D:\\BonoKat\\research project\\# study 1\\eeg_data\\set'

groups = ['Y', 'O']
task = '_MAIN' # ['_BL', '_MAIN']
task_stages = ['_plan', '_go']
block_names = ['_baseline', '_adaptation']


In [None]:
for group in groups:
    group_save_path = os.path.join(eeg_data_dir, f'{group} group')
    pac_stats_save_path = os.path.join(group_save_path, 'pac_stats')
    check_paths(pac_stats_save_path)

    subs = os.listdir(os.path.join(eeg_data_dir, group))

    for task_stage in task_stages:
        for block_name in block_names:

            print(f'Processing {group} group, {task} task, {task_stage} stage, {block_name} block...')

            ############# STACK PAC DATA OF INDIVIDUAL PARTICIPANTS #############

            # Create a list to store the PAC data for each subject
            pac_list = []
            pac_zscore_list = []

            for sub_name in subs:

                sub_dir = os.path.join(eeg_data_dir, group, sub_name)
                pac_dir = os.path.join(sub_dir, 'pac_results')
                
                # Get info about chnnals from one participant for adjacency matrix
                if sub_name == subs[0]: # read one epochs file to extract info
                    # Load EEG data
                    epochs_path = os.path.join(eeg_data_dir, group, sub_name, 'preproc', 'analysis') 
                    epochs = mne.read_epochs(os.path.join(epochs_path, f"{sub_name}{task}_epochs{task_stage}{block_name}-epo.fif"), preload=True)
                    eeg_channel_names = epochs.copy().pick("eeg").ch_names
                    epochs.pick(eeg_channel_names)
                    # info = epochs.info
                    # epochs.pick(choi)

                # Load PAC data
                pac = np.load(os.path.join(pac_dir, f"pac_mi_TOPO_{sub_name[-5:]}{task}{task_stage}{block_name}.npy"))
                pac_t = np.transpose(pac, (1, 0, 2))
                pac_list.append(pac_t)
                pac_zscore_list.append(zscore(pac_t))
            
            # Stack them along a new first axis (subject axis)
            pac_all = np.stack(pac_list, axis=0)

            # Z-score the PAC data across subjects
            pac_zscore_all = np.stack(pac_zscore_list, axis=0)
            print(pac_all.shape) # (24, 60, 20, 20) subs x electrodes x ph_freqs x amp_freqs
            print(pac_zscore_all.shape)

            # Average z-scored PAC over subjects
            pac_zscore_mean = pac_zscore_all.mean(axis=0)  # shape: (60, 20, 20)

            # Save the PAC data
            np.save(os.path.join(pac_stats_save_path, f"pac_mi_{group}{task}{task_stage}{block_name}_RAW.npy"), pac_all)
            np.save(os.path.join(pac_stats_save_path, f"pac_mi_{group}{task}{task_stage}{block_name}_ZSCORE.npy"), pac_zscore_all)
            np.save(os.path.join(pac_stats_save_path, f"pac_mi_{group}{task}{task_stage}{block_name}_ZSCORE_MEAN.npy"), pac_zscore_mean)

            ############### CREATE ADJACENCY MATRIX FOR STATISTICAL TEST #############
            # find_ch_adjacency first attempts to find an existing "neighbor"
            # (adjacency) file for given sensor layout.
            # If such a file doesn't exist, an adjacency matrix is computed on the fly,
            # using Delaunay triangulations.
            sensor_adjacency, ch_names = mne.channels.find_ch_adjacency(epochs.info, "eeg")
            adjacency = mne.stats.combine_adjacency(
                                                    sensor_adjacency,
                                                    pac_zscore_all.shape[2],
                                                    pac_zscore_all.shape[3]
                                                    )
            print(adjacency.shape)

            # Save adjacency matrix
            adjacency_matrix = coo_matrix(adjacency)
            save_npz(f"{group}{task}{task_stage}{block_name}_adjacency_matrix.npz", adjacency_matrix)

            ############# RUN CLUSTER-BASED PERMUTATION TEST #############

            tail = 0 # two-tailed test

            # Set the threshold for including data bins in clusters with t-value corresponding to p=0.01
            # Because we conduct a two-tailed test, we divide the p-value by 2 (which means we're making use of both tails of the distribution).
            # As the degrees of freedom, we specify the number of observations (here: subjects) minus 1.
            # Finally, we subtract 0.01 / 2 from 1, to get the critical t-value on the right tail
            degrees_of_freedom = pac_all.shape[0] - 1
            t_thresh = scipy.stats.t.ppf(1 - 0.01 / 2, df=degrees_of_freedom)

            # Set the number of permutations
            n_permutations = 10000

            # Run the analysis
            T_obs, clusters, cluster_p_values, H0 = permutation_cluster_1samp_test(
                pac_zscore_all,
                n_permutations=n_permutations,
                threshold=t_thresh,
                tail=tail,
                adjacency=adjacency,
                out_type="mask",
                max_step=1,
                verbose=True,
            )

            # Save the results
            np.save(os.path.join(pac_stats_save_path, f"pac_mi_{group}{task}{task_stage}{block_name}_T_obs.npy"), T_obs)
            np.save(os.path.join(pac_stats_save_path, f"pac_mi_{group}{task}{task_stage}{block_name}_clusters.npy"), np.array(clusters, dtype=object))
            np.save(os.path.join(pac_stats_save_path, f"pac_mi_{group}{task}{task_stage}{block_name}_cluster_p_values.npy"), cluster_p_values)
            np.save(os.path.join(pac_stats_save_path, f"pac_mi_{group}{task}{task_stage}{block_name}_H0.npy"), H0)

            # SANITY CHECKS
            print(f't_thresh = {t_thresh}')
            print(f'T_obs_mean = {T_obs.mean()}')
            print(f'cluster_p_values = {cluster_p_values}')

            alpha = 0.05  # significance threshold
            significant_clusters = [i for i, p in enumerate(cluster_p_values) if p < alpha]
            print(f"Found {len(significant_clusters)} significant clusters")


            ####### PLOT THE RESULTS #######
            # Create directories for saving figures
            fig_group_path = os.path.join(pac_stats_save_path, 'figs')
            fig_group_save_path = os.path.join(fig_group_path, group)
            fig_task_save_path = os.path.join(fig_group_path, group, task)
            check_paths(fig_group_path, fig_group_save_path, fig_task_save_path)

            # Reshape the stats results
            T_obs_reshaped = T_obs.reshape(pac_zscore_all.shape[1:])  # (30, 20, 20)
            sig_mask_reshaped = np.zeros_like(T_obs_reshaped, dtype=bool)

            for i in significant_clusters:
                sig_mask_reshaped[clusters[i].reshape(pac_zscore_all.shape[1:])] = True


            for elec_idx in range(pac_zscore_mean.shape[0]):
                # Create masked array: T-values where significant, NaN elsewhere
                masked_T = np.where(sig_mask_reshaped[elec_idx], T_obs_reshaped[elec_idx], np.nan)

                # # Build a colormap with gray for NaNs
                cmap = plt.cm.PiYG.copy()  # try 'RdYlBu' or 'PRGn' (purple-green)
                cmap.set_bad(color='lightgray')  # this sets the NaNs to gray

                # Plot
                plt.figure(figsize=(6, 5))
                im = plt.imshow(
                    masked_T,
                    origin='lower',
                    aspect='equal',
                    cmap=cmap,
                    interpolation='none',
                    vmin=-10,
                    vmax=10
                )
                plt.colorbar(im, label='T-value')
                plt.title(f'{group}{task}{task_stage}{block_name}: PAC Cluster stats - {eeg_channel_names[elec_idx]}')
                plt.xlabel('Amplitude Freq Index')
                plt.ylabel('Phase Freq Index')
                plt.tight_layout()
                plt.show()

                plt.savefig(os.path.join(fig_task_save_path, f"pac_cluster_stats_{group}{task}{task_stage}{block_name}_{eeg_channel_names[elec_idx]}.png"), dpi=300)


Reading D:\BonoKat\research project\# study 1\eeg_data\set\Y\s1_pac_sub01\preproc\analysis\s1_pac_sub01_MAIN_epochs_plan_baseline-epo.fif ...
    Read a total of 1 projection items:
        Average EEG reference (1 x 60) active
    Found the data of interest:
        t =    -500.00 ...     500.00 ms
        0 CTF compensation matrices available
Adding metadata with 15 columns
50 matching events found
No baseline correction applied
Created an SSP operator (subspace dimension = 1)
1 projection items activated
(24, 60, 20, 20)
(24, 60, 20, 20)
Could not find a adjacency matrix for the data. Computing adjacency based on Delaunay triangulations.
-- number of adjacent vertices : 60
(24000, 24000)
stat_fun(H1): min=-8.746169828510352 max=3.267540114221343
Running initial clustering …
Found 73 clusters


100%|██████████| Permuting : 9999/9999 [13:45<00:00,   12.12it/s]


t_thresh = 2.8073356837675227
T_obs_mean = -0.1795172628423524
cluster_p_values = [9.928e-01 1.000e+00 1.000e+00 1.000e+00 9.976e-01 1.000e+00 1.000e+00
 1.000e+00 1.000e+00 1.000e+00 8.693e-01 1.400e-03 8.536e-01 1.000e+00
 2.000e-04 9.348e-01 9.996e-01 1.000e+00 1.000e+00 1.000e+00 4.868e-01
 1.000e+00 9.663e-01 9.876e-01 4.430e-02 1.000e+00 9.674e-01 9.994e-01
 5.400e-03 9.653e-01 1.000e+00 9.996e-01 1.000e+00 1.000e+00 1.000e+00
 9.623e-01 1.910e-01 9.999e-01 1.600e-02 1.000e+00 1.000e+00 7.275e-01
 1.000e+00 1.000e+00 8.920e-01 1.000e+00 1.000e+00 1.000e+00 1.000e+00
 1.000e+00 1.000e+00 1.000e+00 9.994e-01 6.964e-01 1.000e+00 1.000e+00
 1.000e+00 1.000e+00 1.000e+00 1.000e+00 1.000e+00 1.000e+00 1.000e+00
 1.000e+00 1.000e+00 9.993e-01 1.000e+00 9.999e-01 1.000e+00 6.936e-01
 9.931e-01 1.000e+00 1.000e+00]
Found 5 significant clusters


  plt.figure(figsize=(6, 5))


Reading D:\BonoKat\research project\# study 1\eeg_data\set\Y\s1_pac_sub01\preproc\analysis\s1_pac_sub01_MAIN_epochs_plan_adaptation-epo.fif ...
    Read a total of 1 projection items:
        Average EEG reference (1 x 60) active
    Found the data of interest:
        t =    -500.00 ...     500.00 ms
        0 CTF compensation matrices available
Adding metadata with 15 columns
135 matching events found
No baseline correction applied
Created an SSP operator (subspace dimension = 1)
1 projection items activated
(24, 60, 20, 20)
(24, 60, 20, 20)
Could not find a adjacency matrix for the data. Computing adjacency based on Delaunay triangulations.
-- number of adjacent vertices : 60
(24000, 24000)
stat_fun(H1): min=-10.14524048806355 max=4.509423663389078
Running initial clustering …
Found 90 clusters


 34%|███▍      | Permuting : 3410/9999 [05:16<10:05,   10.88it/s]