# SWB Anatomical Coverage 
Created: 04/07/2024

In [1]:
import numpy as np
import mne
from glob import glob
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import seaborn as sns
from scipy.stats import zscore, linregress, ttest_ind, ttest_rel, ttest_1samp, pearsonr, spearmanr
import pandas as pd
from mne.preprocessing.bads import _find_outliers
import os 
import joblib
import re
import datetime
import scipy
import random
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.outliers_influence import variance_inflation_factor

import warnings
warnings.filterwarnings('ignore')




In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import sys
sys.path.append('/sc/arion/projects/guLab/Alie/SWB/ephys_analysis/LFPAnalysis/')

In [4]:
from LFPAnalysis import lfp_preprocess_utils, sync_utils, analysis_utils, nlx_utils

In [5]:
# Specify root directory for un-archived data and results 
base_dir = '/sc/arion/projects/guLab/Alie/SWB/'
anat_dir = f'{base_dir}ephys_analysis/recon_labels/'
neural_dir = f'{base_dir}ephys_analysis/data/'
behav_dir = f'{base_dir}swb_behav_models/data/behavior_preprocessed/'

date = datetime.date.today().strftime('%m%d%Y')
print(date)

04082024


## Data Loading
- ROI labels for all bp reref elecs 
- Current subj list for CCN 2024

In [6]:
# Subj list for CCN 2024
subj_ids = list(pd.read_excel(f'{base_dir}ephys_analysis/subj_info/SWB_subjects.xlsx', sheet_name='Usable_Subjects', usecols=[0]).PatientID)
n_subj = len(subj_ids)
# subj_ids


In [7]:
# load bp reref ch names 
reref_ch_all_subj  = {}

for subj_id in subj_ids:
    # load reref data 
    bp_data = mne.io.read_raw_fif(f'{neural_dir}{subj_id}/bp_ref_ieeg.fif', preload=False,verbose=0)
    subj_ch = bp_data.ch_names
    # save info 
    reref_ch_all_subj[subj_id]  = subj_ch
# reref_ch_all_subj

In [8]:
# load anat recon files 
anat_info_all_subj = {}

for subj_id in subj_ids:
    # load anat recon file 
    subj_anat_df = pd.read_csv(f'{anat_dir}{subj_id}_labels.csv')
    # save info 
    anat_info_all_subj[subj_id] = subj_anat_df 
# anat_info_all_subj

In [9]:
# Anatomical Localization Info - Lab YBA ROIs  

# ROI labels for YBA regions
roi_label_info = pd.read_excel(f'{base_dir}ephys_analysis/LFPAnalysis/LFPAnalysis/YBA_ROI_labelled.xlsx',
                               usecols=['Hemisphere','Lobe','Region','Custom','Long.name'])
roi_label_info = roi_label_info.apply(lambda x: x.str.lower())
roi_label_info = roi_label_info.apply(lambda x: x.str.strip())
roi_label_info

Unnamed: 0,Hemisphere,Lobe,Region,Custom,Long.name
0,left,temporal,temporal pole,temporal pole,left superior temporal pole a
1,left,temporal,temporal pole,temporal pole,left superior temporal pole b
2,left,temporal,temporal pole,temporal pole,left superior temporal pole b
3,left,temporal,temporal pole,temporal pole,left superior temporal pole c
4,left,temporal,temporal pole,temporal pole,left superior temporal pole d
...,...,...,...,...,...
685,right,insula,insula,pins,right anterior long insular gyrus d
686,right,insula,insula,pins,right posterior long insular gyrus a
687,right,insula,insula,pins,right posterior long insular gyrus b
688,right,insula,insula,pins,right posterior long insular gyrus c


## ROI labels 

In [10]:
roi_reref_labels_all_subj = {}
roi_reref_labels_master = []
unknown_subj = {}

for subj_id in subj_ids:
    # get subj reref ch
    reref_ch_names = reref_ch_all_subj[subj_id]
    # get anat file 
    anat_df = anat_info_all_subj[subj_id]
    anat_df['label'] = anat_df['label'].str.lower()
    anat_df['final_loc'] = [row.ManualExamination.lower().strip() if pd.isna(row.ManualExamination) == False else row.YBA_1.lower().strip() for index, row in anat_df.iterrows()]
    if np.sum(anat_df.final_loc.unique().tolist().count('unknown'))!=0:
        unknown_subj[subj_id] = np.sum(anat_df.final_loc.unique().tolist().count('unknown'))
    
    # check which elec should be used for roi label 
    loc4roi = []
    ch_label4roi = []
    ch_type4roi = []
    for ch in reref_ch_names:
        anode = ch.split('-')[0]
        cathode = ch.split('-')[1]
        anode_loc = anat_df.final_loc[anat_df.label == anode].str.lower().tolist()
        cathode_loc = anat_df.final_loc[anat_df.label == cathode].str.lower().tolist()
        if (anode_loc[0] == 'wm') | (anode_loc[0] == 'unknown'): # if anode is in wm or labeled unknown check cathode
            if (cathode_loc[0] == 'wm') | (cathode_loc[0] == 'unknown'): # check if cathode also wm or unknown 
                if (anode_loc[0] == 'wm') & (cathode_loc[0] == 'unknown'): # if anode is wm and cathode is unknown, use anode label
                    loc4roi.append('wm') # roi location detail 
                    ch_label4roi.append(anode) # ch label used for roi 
                    ch_type4roi.append('anode') # whether anode vs cathode was used 
                else: # if cathode is wm and anode is wm or unknown, use cathode, if cathode is unknown and anode is unknown use cathode
                    loc4roi.append(cathode_loc[0]) # roi location detail 
                    ch_label4roi.append(cathode) # ch label used for roi 
                    ch_type4roi.append('cathode') # whether anode vs cathode was used 
            else: # if cathode is in gm use cathode
                loc4roi.append(cathode_loc[0]) # roi location detail 
                ch_label4roi.append(cathode) # ch label used for roi
                ch_type4roi.append('cathode') # whether anode vs cathode was used 
        else: # if anode is in gm use anode 
            loc4roi.append(anode_loc[0]) # roi location detail 
            ch_label4roi.append(anode) # ch label used for roi
            ch_type4roi.append('anode') # whether anode vs cathode was used 
            
    roi_reref_labels_all_subj[subj_id] = {'subj_id':[subj_id]*len(reref_ch_names),
                                          'reref_ch_names':reref_ch_names,
                                          'loc4roi':loc4roi,
                                          'ch_label4roi':ch_label4roi,
                                         'ch_type4roi':ch_type4roi }
    roi_reref_labels_master.append(pd.DataFrame({'subj_id':[subj_id]*len(reref_ch_names),
                                          'reref_ch_names':reref_ch_names,
                                          'ch_label4roi':ch_label4roi,
                                         'ch_type4roi':ch_type4roi,
                                                 'loc4roi':loc4roi }))
  

 
roi_reref_labels_master_df = pd.concat(roi_reref_labels_master).reset_index()
    
    

In [11]:
unknown_subj

{'MS009': 1, 'MS015': 1, 'MS022': 1, 'MS024': 1}

In [12]:
# add hemisphere to df by taking first letter of channel used for localization
roi_reref_labels_master_df['hemi'] = pd.Series(roi_reref_labels_master_df.ch_label4roi.str[0])
# np.unique(roi_reref_labels_master_df.hemi) #MS004 has a probe that starts with p - replace with second letter of label
roi_reref_labels_master_df.hemi.loc[roi_reref_labels_master_df.index[roi_reref_labels_master_df.hemi=='p']] = roi_reref_labels_master_df.loc[roi_reref_labels_master_df.index[roi_reref_labels_master_df.hemi=='p']].ch_label4roi.str[1]
# np.unique(roi_reref_labels_master_df.hemi)
roi_reref_labels_master_df = roi_reref_labels_master_df.drop(columns=['index']) # clean df remove old index column
roi_reref_labels_master_df



Unnamed: 0,subj_id,reref_ch_names,ch_label4roi,ch_type4roi,loc4roi,hemi
0,MS002,lacas1-lacas2,lacas1,anode,left cingulate gyrus d,l
1,MS002,lacas2-lacas3,lacas2,anode,left cingulate gyrus e,l
2,MS002,lacas3-lacas4,lacas3,anode,left cingulate gyrus f,l
3,MS002,lacas4-lacas5,lacas4,anode,left cingulate gyrus f,l
4,MS002,lacas5-lacas6,lacas5,anode,left cingulate gyrus g,l
...,...,...,...,...,...,...
1343,MS041,rmolf4-rmolf5,rmolf4,anode,right frontal orbital 2 c,r
1344,MS041,rmolf9-rmolf10,rmolf10,cathode,right anterior pars triangularis a,r
1345,MS041,rmolf10-rmolf11,rmolf10,anode,right anterior pars triangularis a,r
1346,MS041,rppvnh1-rppvnh2,rppvnh1,anode,right caudate,r


In [13]:
(roi_reref_labels_master_df.loc4roi.unique().tolist())

['left cingulate gyrus d',
 'left cingulate gyrus e',
 'left cingulate gyrus f',
 'left cingulate gyrus g',
 'left superior frontal gyrus 1 a',
 'left superior frontal gyrus 2 b',
 'left superior frontal gyrus 2 c',
 'left amygdala inferior',
 'left superior middle temporal gyrus a',
 'left middle short insular gyrus c',
 'left anterior short insular gyrus b',
 'left anterior short insular gyrus a',
 'left pars opercularis c',
 'left superior frontal gyrus 4 d',
 'left hippocampus pes',
 'left hippocampus body',
 'left superior middle temporal gyrus d',
 'left inferior middle temporal gyrus d',
 'left frontal orbital 4 d',
 'left middle pars triangularis a',
 'left posterior pars triangularis a',
 'left frontal orbital 3 b',
 'left anterior pars triangularis b',
 'left posterior long insular gyrus b',
 'left anterior long insular gyrus a',
 'left middle frontal gyrus 7 a',
 'left superior frontal gyrus 5 d',
 'left pars opercularis a',
 'left anterior motor b',
 'left posterior motor i

In [14]:
roi_label_info.Custom[~roi_label_info.Custom.isnull()].unique()

array(['temporal pole', 'temporal', 'sts', 'parietal', 'motor', 'dmpfc',
       'dlpfc', 'vlpfc', 'ofc', 'vmpfc', 'phg', 'acc', 'mcc', 'pcc',
       'hpc', 'amy', 'ains', 'pins'], dtype=object)

In [15]:
yba_roi = roi_label_info.Custom[~roi_label_info.Custom.isnull()].unique().tolist()
all_roi = yba_roi+['caudate','thalamus']
all_roi

['temporal pole',
 'temporal',
 'sts',
 'parietal',
 'motor',
 'dmpfc',
 'dlpfc',
 'vlpfc',
 'ofc',
 'vmpfc',
 'phg',
 'acc',
 'mcc',
 'pcc',
 'hpc',
 'amy',
 'ains',
 'pins',
 'caudate',
 'thalamus']

In [16]:
roi_label_info.Custom[]

SyntaxError: invalid syntax (1114721983.py, line 1)

In [None]:
roi_reref_labels_master_df.loc4roi.str.contains('thalamus').iloc[idx]

In [17]:
#### currently there are a few bad ch with locs listed as 'wm', 'gm', 'unknown' 
bad_rois = ['wm', 'gm', 'unknown','oob'] # no oob but adding just in case 
roi_labels = []

#### add roi to df 
for idx, row in roi_reref_labels_master_df.iterrows():
    loc = row.loc4roi
    if any(loc in x  for x in bad_rois): #pd.Series(row['loc4roi']).isin(bad_rois)[0]
        roi_labels.append(np.nan)
    else: 
        if roi_reref_labels_master_df.loc4roi.str.contains('thalamus').iloc[idx]: 
            roi = 'thalamus'
            roi_labels.append(roi)
        elif roi_reref_labels_master_df.loc4roi.str.contains('caudate').iloc[idx]: 
            roi = 'caudate'
            roi_labels.append(roi)
        elif pd.Series(row['loc4roi'].strip()).isin(roi_label_info['Long.name'])[0]:
            roi = roi_label_info['Custom'][np.where(row['loc4roi'].strip() == roi_label_info['Long.name'])[0]].item() 
            roi_labels.append(roi)
        else: 
            roi_labels.append(np.nan)
    
roi_reref_labels_master_df['roi'] = roi_labels


In [18]:
#### list of labels without ROIs - either poorly labeled or isn't labeled in our YBA roi key
bad_ch_df = roi_reref_labels_master_df[roi_reref_labels_master_df.roi.isnull()]
bad_ch_df.loc4roi.unique().tolist()

['left inferior middle temporal gyrus d',
 'right inferior middle temporal gyrus d',
 'left inferior middle temporal gyrus a',
 'right somatosensory m',
 'right supramarginal gyrus 1 d',
 'wm',
 'gm',
 'left supramarginal gyrus 4 a',
 'left somatosensory m',
 'left medial occipitotemporal gyrus d',
 'left inferior middle temporal gyrus e',
 'right basal inferior temporal gyrus b',
 'right inferior middle temporal gyrus f',
 'unknown',
 'left inferior middle temporal gyrus c',
 'left supramarginal gyrus 3 a',
 'left somatosensory f',
 'right medial occipitotemporal gyrus a',
 'left supramarginal gyrus 5 e',
 'left supramarginal gyrus 1 b',
 'left superior angular gyrus b',
 'right inferior middle temporal gyrus b',
 'right superior angular gyrus b',
 'left inferior middle temporal gyrus b',
 'left medial occipitotemporal gyrus e',
 'right inferior middle temporal gyrus a',
 'right medial occipitotemporal gyrus e',
 'left basal inferior temporal gyrus e',
 'left lateral inferior temporal

In [19]:
roi_reref_labels_master_df

Unnamed: 0,subj_id,reref_ch_names,ch_label4roi,ch_type4roi,loc4roi,hemi,roi
0,MS002,lacas1-lacas2,lacas1,anode,left cingulate gyrus d,l,acc
1,MS002,lacas2-lacas3,lacas2,anode,left cingulate gyrus e,l,acc
2,MS002,lacas3-lacas4,lacas3,anode,left cingulate gyrus f,l,acc
3,MS002,lacas4-lacas5,lacas4,anode,left cingulate gyrus f,l,acc
4,MS002,lacas5-lacas6,lacas5,anode,left cingulate gyrus g,l,acc
...,...,...,...,...,...,...,...
1343,MS041,rmolf4-rmolf5,rmolf4,anode,right frontal orbital 2 c,r,ofc
1344,MS041,rmolf9-rmolf10,rmolf10,cathode,right anterior pars triangularis a,r,vlpfc
1345,MS041,rmolf10-rmolf11,rmolf10,anode,right anterior pars triangularis a,r,vlpfc
1346,MS041,rppvnh1-rppvnh2,rppvnh1,anode,right caudate,r,caudate


In [20]:
roi_reref_labels_master_df.to_csv(f'{base_dir}ephys_analysis/subj_info/roi_reref_labels_master_{date}.csv')

In [21]:
roi_reref_labels_master_df['roi'].value_counts()

dmpfc            179
acc              155
ofc              142
sts              101
dlpfc             88
amy               79
hpc               78
temporal          74
vlpfc             70
ains              50
pins              46
thalamus          36
mcc               28
parietal          22
phg               19
motor             17
vmpfc             17
temporal pole     12
caudate            3
pcc                1
Name: roi, dtype: int64

In [22]:
counts_all_subj = roi_reref_labels_master_df.groupby('subj_id')['roi'].value_counts()

roi_subj_counts = []

for subj_id in subj_ids:
    subj_roi_nums = counts_all_subj[subj_id]
    count_df = pd.DataFrame()
    count_df['subj_id'] = [subj_id]*len(subj_roi_nums.to_list())
    count_df['roi'] = subj_roi_nums.index.tolist()
    count_df['count'] = subj_roi_nums.to_list()
    roi_subj_counts.append(count_df)

roi_subj_counts = pd.concat(roi_subj_counts)

In [23]:
roi_subj_counts.to_csv(f'{base_dir}ephys_analysis/subj_info/roi_subj_counts_{date}.csv')

# Check anat by BDI

In [30]:
bdi_list = pd.read_excel(f'{base_dir}ephys_analysis/subj_info/SWB_subjects.xlsx', sheet_name='Usable_Subjects', usecols=[3])
bdi_list = list(bdi_list.SWB_BDI)
# list(zip(subj_ids,bdi_list))
subj_info_df = pd.DataFrame({'subj_id':subj_ids,'bdi':bdi_list})
subj_info_df

Unnamed: 0,subj_id,bdi
0,MS002,14
1,MS003,8
2,MS004,7
3,MS009,16
4,MS011,13
5,MS015,26
6,MS016,10
7,MS017,26
8,MS019,12
9,MS022,10


In [32]:
high_bdi_subj = subj_info_df.subj_id[subj_info_df.bdi>=20]
low_bdi_subj = subj_info_df.subj_id[subj_info_df.bdi<20]


In [None]:
high_bdi_roi

low_bdi_roi 

In [33]:
roi_subj_counts.roi.unique()

array(['vlpfc', 'dmpfc', 'sts', 'ofc', 'acc', 'ains', 'dlpfc', 'hpc',
       'amy', 'pins', 'motor', 'vmpfc', 'mcc', 'phg', 'temporal',
       'temporal pole', 'parietal', 'thalamus', 'pcc', 'caudate'],
      dtype=object)

In [53]:
roi_counts_by_bdi = []

for region in roi_subj_counts.roi.unique().tolist():
#     high_bdi_info = roi_subj_counts[roi_subj_counts.subj_id.isin(high_bdi_subj)]
#     low_bdi_info = roi_subj_counts[roi_subj_counts.subj_id.isin(low_bdi_subj)]
    high_bdi_num = np.sum(roi_subj_counts['count'][(roi_subj_counts.subj_id.isin(high_bdi_subj))
                                                   &(roi_subj_counts.roi==region)].values)
    low_bdi_num = np.sum(roi_subj_counts['count'][(roi_subj_counts.subj_id.isin(low_bdi_subj))
                                                  &(roi_subj_counts.roi==region)].values)
    region_bdi_counts = pd.DataFrame({'roi':region,'high_bdi_num':high_bdi_num,'low_bdi_num':low_bdi_num},index=[0])
    roi_counts_by_bdi.append(region_bdi_counts)
    
roi_counts_by_bdi = pd.concat(roi_counts_by_bdi)
roi_counts_by_bdi
    
    

Unnamed: 0,roi,high_bdi_num,low_bdi_num
0,vlpfc,13,57
0,dmpfc,43,136
0,sts,8,93
0,ofc,37,105
0,acc,34,121
0,ains,11,39
0,dlpfc,18,70
0,hpc,22,56
0,amy,19,60
0,pins,18,28


In [54]:
roi_counts_by_bdi.to_csv(f'{base_dir}ephys_analysis/subj_info/roi_counts_by_bdi_{date}.csv')