# SWB Connectivity Analysis: Beta Coherence

Created: 08/22/2024 \
Updated: 09/27/2024 \

*updated using connectivity epochs data*

In [1]:
import numpy as np
import mne
from glob import glob
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import seaborn as sns
from scipy.stats import zscore, linregress, ttest_ind, ttest_rel, ttest_1samp, pearsonr, spearmanr
import pandas as pd
from mne.preprocessing.bads import _find_outliers
import os 
import joblib
import re
import datetime
import scipy
import random
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.regression.mixed_linear_model import MixedLM 
from joblib import Parallel, delayed
import pickle
import itertools
import time 
from matplotlib.ticker import StrMethodFormatter


import mne_connectivity
from mne_connectivity import phase_slope_index, seed_target_indices, spectral_connectivity_epochs
# import fooof
# Import plotting functions
# from fooof.plts.spectra import plot_spectrum, plot_spectra
# # Import the FOOOF object
# from fooof import FOOOF
# from fooof import FOOOFGroup

from tqdm import tqdm
from IPython.display import clear_output

from joblib import delayed, Parallel
from statsmodels.stats import multitest
import warnings
warnings.filterwarnings('ignore')
# print('\n'.join(f'{m.__name__}=={m.__version__}' for m in globals().values() if getattr(m, '__version__', None)))

%load_ext autoreload
%autoreload 2



In [2]:
# Specify root directory for un-archived data and results 
base_dir   = '/sc/arion/projects/guLab/Alie/SWB/'
neural_dir = f'{base_dir}ephys_analysis/data/'
behav_dir  = f'{base_dir}ephys_analysis/behav/data/'
save_dir   = f'{base_dir}ephys_analysis/results/connectivity/coherence/beta/'
os.makedirs(save_dir,exist_ok=True)

script_dir = '/hpc/users/finka03/swb_ephys_analysis/scripts/'

date = datetime.date.today().strftime('%m%d%Y')
print(date)

# anat_dir   = f'{base_dir}ephys_analysis/recon_labels/'
# behav_dir  = f'{base_dir}swb_behav_models/data/behavior_preprocessed/'


09292024


In [3]:
import sys
sys.path.append(f'{base_dir}ephys_analysis/LFPAnalysis/')

from LFPAnalysis import analysis_utils,oscillation_utils

sys.path.append(f'{script_dir}analysis_notebooks/')

from ieeg_tools import *

sys.path.append(f'{script_dir}behav/')

from behav_utils import *
from swb_subj_behav import *


In [4]:
subj_ids = list(pd.read_excel(f'{base_dir}ephys_analysis/subj_info/SWB_subjects.xlsx', 
                              sheet_name='Usable_Subjects', usecols=[0]).PatientID)
n_subj = len(subj_ids)
# subj_ids


# Load Behav + Elec ROI Data
- all_behav from updated task_dfs and behav_utils formatting
- roi_reref_labels same as usual

In [5]:
# all_behav = pd.read_csv(f'{behav_dir}all_behav.csv') ## this isn't normalized yet 
raw_behav = [pd.read_csv(f'{behav_dir}{subj_id}_task_df.csv') for subj_id in subj_ids]
all_behav,drops_data = format_all_behav(raw_behav,drops_data=True)

all_behav

Unnamed: 0,subj_id,bdi,bdi_thresh,Round,RT,TrialOnset,ChoiceOnset,DecisionOnset,FeedbackOnset,ChoicePos,...,choiceEV_t1,rpe_t1,res_type_t1,cf_t1,max_cf_t1,cpe_t1,max_cpe_t1,keep_epoch,keep_epoch_t1,CpeOnset
0,MS002,14,low,1,2.059852,513.380590,513.390239,515.450091,515.457173,right,...,-0.748193,-0.553325,gamble_bad,-0.151490,0.017828,-0.451721,-0.457514,keep,keep,517.450091
1,MS002,14,low,2,1.954564,522.640856,522.641563,524.596127,526.627092,right,...,1.074702,0.954543,gamble_good,0.278569,0.017828,0.731568,0.723865,keep,keep,526.596127
2,MS002,14,low,3,1.583462,531.174799,531.175599,532.759061,534.780269,right,...,-0.089733,-0.008658,safe_good,-0.458674,-0.387722,0.437278,0.328575,keep,keep,534.759061
3,MS002,14,low,4,2.491611,545.592613,545.593355,548.084966,548.092333,left,...,0.769731,0.702276,gamble_good,0.340006,0.017828,0.400492,0.526220,keep,keep,550.084966
4,MS002,14,low,5,1.768936,555.337336,555.345720,557.114656,559.135069,left,...,-0.089733,-0.008658,safe_good,-0.642985,-0.539803,0.621209,0.463333,keep,keep,559.114656
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4045,DA039,22,high,146,1.079701,2259.827656,2259.828749,2260.908450,2262.926195,right,...,-0.128169,0.007477,safe_good,-1.322517,-0.943820,0.970134,0.636137,keep,keep,2262.908450
4046,DA039,22,high,147,1.837272,2267.502359,2267.534059,2269.371331,2269.377701,right,...,-0.128169,0.007477,safe_bad,0.374778,0.215896,-0.312218,-0.167905,keep,keep,2271.371331
4047,DA039,22,high,148,4.030006,2282.349445,2282.350662,2286.380667,2286.389886,left,...,-0.128169,0.007477,safe_bad,0.383664,0.221967,-0.318932,-0.172115,keep,keep,2288.380667
4048,DA039,22,high,149,3.167144,2293.040983,2293.042042,2296.209186,2296.218136,left,...,-1.591335,-1.093440,gamble_bad,-0.433881,-0.033049,-1.379726,-1.047721,keep,keep,2298.209186


In [6]:
band = 'beta'
all_behav.to_csv(f'{save_dir}all_behav_{band}_coh_{date}.csv', index=False)

In [7]:
roi_reref_labels_master_df = pd.read_csv(
    glob(f'{base_dir}ephys_analysis/results/roi_info/roi_reref_labels_master.csv')[0]).drop(columns=['Unnamed: 0'])

# roi_reref_labels_master_df #= roi_reref_labels_master_df



In [9]:
# #### update all subjects epochs data and resave 
# epoch_id = 'CpeOnset'
# # iterate through subjects
# for subj_id in subj_ids:
#     print(subj_id)
#     # load & format rereferenced epoch data 
#     subj_epochs = mne.read_epochs(f'{neural_dir}{subj_id}/{epoch_id}_epochs.fif', preload=True)
#     # drop bad trials
#     subj_drops = drops_data[subj_id]
#     subj_epochs.drop(subj_drops)
#     # replace old metadata with updated subject data
#     subj_epochs.metadata = all_behav[all_behav.subj_id == subj_id]
#     # save updated epochs data
#     subj_epochs.save(f'{neural_dir}{subj_id}/{epoch_id}_epochs-clean.fif', overwrite=True)
#     del subj_epochs

# Connectivity Computations : Beta Coherence 

In [8]:
# define connectivity analysis parameters:

# spectral parameters - wavelet freqs, wavelet cycles, freq band ranges
freqs = np.logspace(*np.log10([2, 200]), num=30)
n_cycles = np.floor(np.logspace(*np.log10([3, 10]), num=30))

freq_dict = {'theta':[4, 8], 
             'alpha':[8, 13],
             'beta': [13, 30], 
             'gamma': [30,70],
             'hfa': [70, 200]}

# analysis parameters - connectivity metric, conn freq band, num of surrogates for permutations, buffer time in ms
metric   = 'coh'
band     = 'beta' # set band(s) of interest for analysis
n_surr   = 500
buf_ms   = 1000

# data info - analysis epoch + rois for pairwise coh
epoch_id = 'CpeOnset' 
# rois
coh_rois = ['acc','ains','ofc','dlpfc','vlpfc','amy','pins','dmpfc']
# coh_rois = ['acc','ains','ofc','dlpfc','vlpfc','amy']

# dict of subj_ids with elecs in roi 
roi_subj_ids = {f'{roi}':roi_reref_labels_master_df.subj_id[
    roi_reref_labels_master_df.roi == roi].unique().tolist() 
                for roi in coh_rois}
# make unique list of pairs [[pair1,pair2],..] without hard coding 
pairs = [list(tup) for tup in list(itertools.combinations(coh_rois,2))]
# pairs


In [12]:
# pairs = pairs[1:]
pairs



[['acc', 'dlpfc'],
 ['acc', 'vlpfc'],
 ['acc', 'amy'],
 ['acc', 'pins'],
 ['acc', 'dmpfc'],
 ['ains', 'ofc'],
 ['ains', 'dlpfc'],
 ['ains', 'vlpfc'],
 ['ains', 'amy'],
 ['ains', 'pins'],
 ['ains', 'dmpfc']]

In [13]:
pairs = pairs[1:12]
pairs

[['acc', 'vlpfc'],
 ['acc', 'amy'],
 ['acc', 'pins'],
 ['acc', 'dmpfc'],
 ['ains', 'ofc'],
 ['ains', 'dlpfc'],
 ['ains', 'vlpfc'],
 ['ains', 'amy'],
 ['ains', 'pins'],
 ['ains', 'dmpfc']]

In [None]:
### resume coherence calculation with remaining pairs 

for pair in pairs:
    
    source_region = pair[0]
    target_region = pair[1]    
    
    # unique pair id roi1_roi2
    pair_id = '_'.join([source_region,target_region])
    
    # find subj with elecs in each roi 
    source_subj = roi_subj_ids[source_region]
    target_subj = roi_subj_ids[target_region]
    # find subj with elecs in both rois
    pair_subj = list(set(source_subj).intersection(target_subj))    
    # save pair subj list 
    
    # initialize the storage list 
    all_subj_pair_df = []
    
    # iterate through pair subjects
    for subj_id in pair_subj:
        
        # load & format rereferenced epoch data 
        subj_epochs = mne.read_epochs(f'{neural_dir}{subj_id}/{subj_id}_conn_epochs_{epoch_id}.fif', preload=False)
        subj_elecs  = subj_epochs.ch_names
        
        # construct the seed-to-target mapping based on subject's roi coverage 
        elec_roi_df = roi_reref_labels_master_df[roi_reref_labels_master_df.subj_id==subj_id].reset_index(drop=True)
        # get ch names of subj elecs in roi 
        source_ch_names  = elec_roi_df.reref_ch_names[np.where(elec_roi_df.roi == source_region)[0]].tolist()
        target_ch_names  = elec_roi_df.reref_ch_names[np.where(elec_roi_df.roi == target_region)[0]].tolist()
        # get idx of ch in subj_elecs list (will correspond to idx in epochs array)
        source_elec_idx = [subj_elecs.index(elec) for elec in source_ch_names]
        target_elec_idx = [subj_elecs.index(elec) for elec in target_ch_names]
        # make seed to target indices using mne function 
        seed_to_target = seed_target_indices(
                        source_elec_idx,
                        target_elec_idx)
        
        # elec name for every elec pair 
        subj_pair_ch = list(map(lambda x,y: '_'.join([x,y]), 
                                  [subj_elecs[idx] for idx in  seed_to_target[0]], 
                                  [subj_elecs[idx] for idx in  seed_to_target[1]]))
        # unique elec name for every elec pair 
        unique_ch_pair = list(map(lambda x,y: '_'.join([x,y]), [subj_id]*len(subj_pair_ch), subj_pair_ch))
        
        # compute pwise coherence 
        pwise = oscillation_utils.compute_connectivity(subj_epochs, 
                                           band = freq_dict[band], 
                                           metric = metric, 
                                           indices = seed_to_target, 
                                           freqs = freqs, 
                                           n_cycles = n_cycles,
                                           buf_ms = buf_ms, 
                                           n_surr=n_surr,
                                           avg_over_dim='time',
                                           band1 = freq_dict[band],
                                           parallelize=True)

        
        coh_df = pd.concat([pd.DataFrame({'epoch':np.arange(0,pwise.shape[0]),'coh':pwise[:,ch_ix],
                                          'unique_ch_pair':[ch_name]*pwise.shape[0],
                                          'roi_pair_chans':['_'.join(ch_name.split('_')[1:])]*pwise.shape[0],
                                          'roi1_ch_names':[ch_name.split('_')[1]]*pwise.shape[0],
                                          'roi2_ch_names':[ch_name.split('_')[2]]*pwise.shape[0],
                                          'roi1_elec_idx':[seed_to_target[0]]*pwise.shape[0],
                                          'roi2_elec_idx':[seed_to_target[1]]*pwise.shape[0]}) 
                            for ch_ix, ch_name in enumerate(unique_ch_pair)])



        coh_df['subj_id']  = subj_id
        coh_df['bdi']      = all_behav[all_behav.subj_id == subj_id].bdi.unique().tolist()[0]
        coh_df['Round']    = all_behav[all_behav.subj_id == subj_id].Round
        coh_df['epoch']    = all_behav[all_behav.subj_id == subj_id].epoch
        coh_df['band']     = band
        coh_df['metric']   = metric
        coh_df['pair_id']  = pair_id
        coh_df['roi1']     = source_region
        coh_df['roi2']     = target_region

        # one pair one subj data 
        coh_df.to_csv(f'{save_dir}{subj_id}_{pair_id}_{metric}_{band}_df.csv')
        all_subj_pair_df.append(coh_df)
        del coh_df, subj_epochs, pwise
#         print(f'finished {subj_id} {pair_id}')
        
    # one pair all subj data 
#     print(f'finished all {pair_id}')
    all_subj_pair_df = pd.concat(all_subj_pair_df).reset_index()
    # save roi pair df separately 
    all_subj_pair_df.to_csv(f'{save_dir}{pair_id}_{metric}_{band}_df_1sec.csv', index=False)
    
    del all_subj_pair_df
    

Reading /sc/arion/projects/guLab/Alie/SWB/ephys_analysis/data/MS016/MS016_conn_epochs_CpeOnset.fif ...
    Found the data of interest:
        t =   -1000.00 ...    2000.00 ms
        0 CTF compensation matrices available
Not setting metadata
150 matching events found
No baseline correction applied
0 projection items activated
Loading data for 150 events and 1501 original time points ...
Loading data for 150 events and 1501 original time points ...
Loading data for 150 events and 1501 original time points ...
   Processing epoch 147 / 150 ...
   Processing epoch 148 / 150 ...
   Processing epoch 149 / 150 ...
   Processing epoch 150 / 150 ...
[Connectivity computation done]


In [None]:
######## to compute coh after notebook ends in middle of roi pair
# pairs = pairs[-4:]
# pairs
# pair_id = 'ofc_amy'
# pair    = pairs[0]

# source_region = pair[0]
# target_region = pair[1]    

# # unique pair id roi1_roi2
# pair_id = '_'.join([source_region,target_region])

# # find subj with elecs in each roi 
# source_subj = roi_subj_ids[source_region]
# target_subj = roi_subj_ids[target_region]

# # find subj with elecs in both rois
# pair_subj = list(set(source_subj).intersection(target_subj))    
# # find completed subj files
# complete_subj_files = glob(f'{save_dir}*ofc_amy_coh_beta_df.csv')
# complete_subj = [file.split('/')[-1].split('_')[0] for file in complete_subj_files]
# # find subj from pair_subj with no saved data
# incomplete_subj = list(np.setdiff1d(pair_subj, complete_subj)) # should be ['MS027','MS017']

# # initialize the storage list 
# all_subj_pair_df = []

# # run incomplete subj only! 
# for subj_id in incomplete_subj:

#     # load & format rereferenced epoch data 
#     subj_epochs = mne.read_epochs(f'{neural_dir}{subj_id}/{epoch_id}_epochs-clean.fif', preload=True)
#     subj_elecs  = subj_epochs.ch_names

#     # construct the seed-to-target mapping based on subject's roi coverage 
#     elec_roi_df = roi_reref_labels_master_df[roi_reref_labels_master_df.subj_id==subj_id].reset_index(drop=True)
#     # get ch names of subj elecs in roi 
#     source_ch_names  = elec_roi_df.reref_ch_names[np.where(elec_roi_df.roi == source_region)[0]].tolist()
#     target_ch_names  = elec_roi_df.reref_ch_names[np.where(elec_roi_df.roi == target_region)[0]].tolist()
#     # get idx of ch in subj_elecs list (will correspond to idx in epochs array)
#     source_elec_idx = [subj_elecs.index(elec) for elec in source_ch_names]
#     target_elec_idx = [subj_elecs.index(elec) for elec in target_ch_names]
#     # make seed to target indices using mne function 
#     seed_to_target = seed_target_indices(
#                     source_elec_idx,
#                     target_elec_idx)

#     # elec name for every elec pair 
#     subj_pair_ch = list(map(lambda x,y: '_'.join([x,y]), 
#                               [subj_elecs[idx] for idx in  seed_to_target[0]], 
#                               [subj_elecs[idx] for idx in  seed_to_target[1]]))
#     # unique elec name for every elec pair 
#     unique_ch_pair = list(map(lambda x,y: '_'.join([x,y]), [subj_id]*len(subj_pair_ch), subj_pair_ch))

#     # compute pwise coherence 
#     pwise = oscillation_utils.compute_connectivity(subj_epochs.copy(), 
#                                        band = freq_dict[band], 
#                                        metric = metric, 
#                                        indices = seed_to_target, 
#                                        freqs = freqs, 
#                                        n_cycles = n_cycles,
#                                        buf_ms = buf_ms, 
#                                        n_surr=n_surr,
#                                        avg_over_dim='time',
#                                        band1 = freq_dict[band],
#                                        parallelize=True)


#     coh_df = pd.concat([pd.DataFrame({'epoch':np.arange(0,pwise.shape[0]),'coh':pwise[:,ch_ix],
#                                       'unique_ch_pair':[ch_name]*pwise.shape[0],
#                                       'roi_pair_chans':['_'.join(ch_name.split('_')[1:])]*pwise.shape[0],
#                                       'roi1_ch_names':[ch_name.split('_')[1]]*pwise.shape[0],
#                                       'roi2_ch_names':[ch_name.split('_')[2]]*pwise.shape[0],
#                                       'roi1_elec_idx':[seed_to_target[0]]*pwise.shape[0],
#                                       'roi2_elec_idx':[seed_to_target[1]]*pwise.shape[0]}) 
#                         for ch_ix, ch_name in enumerate(unique_ch_pair)])



#     coh_df['subj_id']  = subj_id
#     coh_df['bdi']      = all_behav[all_behav.subj_id == subj_id].bdi.unique().tolist()[0]
#     coh_df['Round']    = all_behav[all_behav.subj_id == subj_id].Round
#     coh_df['epoch']    = all_behav[all_behav.subj_id == subj_id].epoch
#     coh_df['band']     = band
#     coh_df['metric']   = metric
#     coh_df['pair_id']  = pair_id
#     coh_df['roi1']     = source_region
#     coh_df['roi2']     = target_region

#     # one pair one subj data 
#     coh_df.to_csv(f'{save_dir}{subj_id}_{pair_id}_{metric}_{band}_df.csv')
#     all_subj_pair_df.append(coh_df)
#     del coh_df, subj_epochs,pwise

# # add completed subj dfs to concat list 
# for file in complete_subj_files:
#     subj_id = file.split('/')[-1].split('_')[0]
#     coh_df = pd.read_csv(file)
#     all_subj_pair_df.append(coh_df)

# # make pair df 
# all_subj_pair_df = pd.concat(all_subj_pair_df).reset_index()
# # save roi pair df  
# all_subj_pair_df.to_csv(f'{save_dir}{pair_id}_{metric}_{band}_df.csv', index=False)

    