# SWB analysis pipeline part 1: 

- create master df with roi labels for bipolar re-ref channels 
- determine roi coverage for each subject + by BDI score


*Created: 04/07/2024* \
*Updated: 06/24/2024* 


In [None]:
import numpy as np
import pandas as pd
import mne
from glob import glob
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import seaborn as sns
from scipy.stats import zscore, linregress, ttest_ind, ttest_rel, ttest_1samp, pearsonr, spearmanr
from mne.preprocessing.bads import _find_outliers
import os 
import joblib
import re
import datetime
import scipy
import random
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.outliers_influence import variance_inflation_factor

import warnings
warnings.filterwarnings('ignore')




In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys
sys.path.append('/sc/arion/projects/guLab/Alie/SWB/ephys_analysis/LFPAnalysis/')

In [None]:
from LFPAnalysis import lfp_preprocess_utils, sync_utils, analysis_utils, nlx_utils

In [None]:
# Specify root directory for un-archived data and results 
base_dir = '/sc/arion/projects/guLab/Alie/SWB/'
anat_dir = f'{base_dir}ephys_analysis/recon_labels/'
neural_dir = f'{base_dir}ephys_analysis/data/'
behav_dir = f'{base_dir}swb_behav_models/data/behavior_preprocessed/'

date = datetime.date.today().strftime('%m%d%Y')
print(date)

## Data Loading

In [None]:
# Subj list for CCN 2024
subj_ids = list(pd.read_excel(f'{base_dir}ephys_analysis/subj_info/SWB_subjects.xlsx', sheet_name='Usable_Subjects', usecols=[0]).PatientID)
n_subj = len(subj_ids)
subj_ids


In [None]:
anat_info_all_subj = {f'{subj_id}': pd.read_csv(f'{anat_dir}{subj_id}_labels.csv') for subj_id in subj_ids}


In [None]:
anat_info_all_subj

In [None]:
anat_info_all_subj.keys()

In [None]:
# load bp reref ch names 
reref_ch_all_subj  = {}

for subj_id in subj_ids:
    # load reref data 
    bp_data = mne.io.read_raw_fif(f'{neural_dir}{subj_id}/bp_ref_ieeg.fif', preload=False,verbose=12)
    # drop bad channels  
    bp_data = bp_data.drop_channels(bp_data.info['bads'])
    # save good re-ref ch names
    subj_ch = bp_data.ch_names
    # save info 
    reref_ch_all_subj[subj_id]  = subj_ch
    del bp_data
    
reref_ch_all_subj

## BP Re-Ref ROI Labels
- Choose which bipolar re-ref channel (anode vs cathode) should be used for ROI label
- Compile yba/manual anatomical information from anat dfs
- Find custom ROI label for yba labels

#### FUTURE IMPLEMENTATION: 
- USE SALMAN'S FUNCTION TO FIND OTHER DEFAULTS FOR MISSING ELECS
- DEAL WITH LESION ELECS 

In [None]:
# Anatomical Localization Info - Lab YBA ROIs  

# ROI labels for YBA regions
roi_label_info = pd.read_excel(f'{base_dir}ephys_analysis/LFPAnalysis/LFPAnalysis/YBA_ROI_labelled.xlsx',
                               usecols=['Hemisphere','Lobe','Region','Custom','Long.name'])
roi_label_info = roi_label_info.apply(lambda x: x.str.lower())
roi_label_info = roi_label_info.apply(lambda x: x.str.strip())
roi_label_info

In [None]:
roi_label_info.Custom.unique().tolist()

In [None]:
# roi_reref_labels_all_subj = {}
roi_reref_labels_master = []
unknown_subj = {}

for subj_id in subj_ids:
    # get subj reref ch
    reref_ch_names = reref_ch_all_subj[subj_id]
    # get anat file 
    anat_df = anat_info_all_subj[subj_id]
    anat_df['label'] = anat_df['label'].str.lower()
    anat_df['final_loc'] = [row.ManualExamination.lower().strip() if pd.isna(row.ManualExamination) == False else row.YBA_1.lower().strip() for index, row in anat_df.iterrows()]
    if np.sum(anat_df.final_loc.unique().tolist().count('unknown'))!=0:
        unknown_subj[subj_id] = np.sum(anat_df.final_loc.unique().tolist().count('unknown'))
    
    # check which elec should be used for roi label 
    loc4roi      = []
    ch_label4roi = []
    ch_type4roi  = []

    for ch in reref_ch_names:
        anode = ch.split('-')[0]
        cathode = ch.split('-')[1]
        anode_loc = anat_df.final_loc[anat_df.label == anode].str.lower().tolist()
        cathode_loc = anat_df.final_loc[anat_df.label == cathode].str.lower().tolist()
        if (anode_loc[0] == 'wm') | (anode_loc[0] == 'unknown'): # if anode is in wm or labeled unknown check cathode
            if (cathode_loc[0] == 'wm') | (cathode_loc[0] == 'unknown'): # check if cathode also wm or unknown 
                if (anode_loc[0] == 'wm') & (cathode_loc[0] == 'unknown'): # if anode is wm and cathode is unknown, use anode label
                    loc4roi.append('wm') # roi location detail 
                    ch_label4roi.append(anode) # ch label used for roi 
                    ch_type4roi.append('anode') # whether anode vs cathode was used 
                else: # if cathode is wm and anode is wm or unknown, use cathode, if cathode is unknown and anode is unknown use cathode
                    loc4roi.append(cathode_loc[0]) # roi location detail 
                    ch_label4roi.append(cathode) # ch label used for roi 
                    ch_type4roi.append('cathode') # whether anode vs cathode was used 
            else: # if cathode is in gm use cathode
                loc4roi.append(cathode_loc[0]) # roi location detail 
                ch_label4roi.append(cathode) # ch label used for roi
                ch_type4roi.append('cathode') # whether anode vs cathode was used 
        else: # if anode is in gm use anode 
            loc4roi.append(anode_loc[0]) # roi location detail 
            ch_label4roi.append(anode) # ch label used for roi
            ch_type4roi.append('anode') # whether anode vs cathode was used 
    
            
#     roi_reref_labels_all_subj[subj_id] = {'subj_id':[subj_id]*len(reref_ch_names),
#                                           'reref_ch_names':reref_ch_names,
#                                           'loc4roi':loc4roi,
#                                           'ch_label4roi':ch_label4roi,
#                                          'ch_type4roi':ch_type4roi }

    # add mni coordinate info for plot recon roi script
    mni_x = [anat_df.mni_x[anat_df.label == ch].item() for ch in ch_label4roi]
    mni_y = [anat_df.mni_y[anat_df.label == ch].item() for ch in ch_label4roi]
    mni_z = [anat_df.mni_z[anat_df.label == ch].item() for ch in ch_label4roi]
    
    roi_reref_subj_df = pd.DataFrame({'subj_id':[subj_id]*len(reref_ch_names),
                                      'reref_ch_names':reref_ch_names,
                                      'ch_label4roi':ch_label4roi,
                                      'ch_type4roi':ch_type4roi,
                                      'loc4roi':loc4roi,
                                      'mni_x':mni_x,
                                      'mni_y':mni_y,
                                      'mni_z':mni_z})
    
    roi_reref_labels_master.append(roi_reref_subj_df)
  

 
roi_reref_labels_master_df = pd.concat(roi_reref_labels_master).reset_index(drop=True)
    
    

In [None]:
roi_reref_labels_master_df

In [None]:
unknown_subj

In [None]:
# add hemisphere to df by taking first letter of channel used for localization
roi_reref_labels_master_df['hemi'] = pd.Series(roi_reref_labels_master_df.ch_label4roi.str[0])

In [None]:
roi_reref_labels_master_df['hemi'].unique().tolist()

In [None]:
# add hemisphere to df by taking first letter of channel used for localization
roi_reref_labels_master_df['hemi'] = pd.Series(roi_reref_labels_master_df.ch_label4roi.str[0])
roi_reref_labels_master_df



In [None]:
(roi_reref_labels_master_df.loc4roi.unique().tolist())

In [None]:
roi_label_info.Custom[~roi_label_info.Custom.isnull()].unique()

In [None]:
yba_roi = roi_label_info.Custom[~roi_label_info.Custom.isnull()].unique().tolist()
all_roi = yba_roi+['caudate','thalamus']
all_roi

In [None]:
#### currently there are a few bad ch with locs listed as 'wm', 'gm', 'unknown' 
bad_rois = ['wm', 'gm', 'unknown','oob'] # no oob but adding just in case 
roi_labels = []

#### add roi to df 
for idx, row in roi_reref_labels_master_df.iterrows():
    loc = row.loc4roi
    if any(loc in x  for x in bad_rois): #pd.Series(row['loc4roi']).isin(bad_rois)[0]
        roi_labels.append(np.nan)
    else: 
        if roi_reref_labels_master_df.loc4roi.str.contains('thalamus').iloc[idx]: 
            roi = 'thalamus'
            roi_labels.append(roi)
        elif roi_reref_labels_master_df.loc4roi.str.contains('caudate').iloc[idx]: 
            roi = 'caudate'
            roi_labels.append(roi)
        elif pd.Series(row['loc4roi'].strip()).isin(roi_label_info['Long.name'])[0]:
            roi = roi_label_info['Custom'][np.where(row['loc4roi'].strip() == roi_label_info['Long.name'])[0]].item() 
            roi_labels.append(roi)
        else: 
            roi_labels.append(np.nan)
    
roi_reref_labels_master_df['roi'] = roi_labels


In [None]:
#### list of labels without ROIs - either poorly labeled or isn't labeled in our YBA roi key
bad_ch_df = roi_reref_labels_master_df[roi_reref_labels_master_df.roi.isnull()]
bad_ch_df.loc4roi.unique().tolist()

In [None]:
roi_reref_labels_master_df

In [None]:
# roi_reref_labels_master_df.to_csv(f'{base_dir}ephys_analysis/results/roi_reref_labels_master_{date}.csv')
roi_reref_labels_master_df.to_csv(f'{base_dir}ephys_analysis/results/roi_info/roi_reref_labels_master.csv')



In [None]:
roi_reref_labels_master_df['roi'].value_counts()

In [None]:
counts_all_subj = roi_reref_labels_master_df.groupby('subj_id')['roi'].value_counts()

roi_subj_counts = []

for subj_id in subj_ids:
    subj_roi_nums = counts_all_subj[subj_id]
    count_df = pd.DataFrame()
    count_df['subj_id'] = [subj_id]*len(subj_roi_nums.to_list())
    count_df['roi'] = subj_roi_nums.index.tolist()
    count_df['count'] = subj_roi_nums.to_list()
    roi_subj_counts.append(count_df)

roi_subj_counts = pd.concat(roi_subj_counts)

In [None]:
roi_subj_counts[roi_subj_counts.subj_id=='MS048']

In [None]:
# roi_subj_counts.to_csv(f'{base_dir}ephys_analysis/results/roi_subj_counts_{date}.csv')
roi_subj_counts.to_csv(f'{base_dir}ephys_analysis/results/roi_info/roi_subj_counts.csv')


In [None]:
roi_reref_labels_master_df

# Check anat by BDI

In [None]:
bdi_list = pd.read_excel(f'{base_dir}ephys_analysis/subj_info/SWB_subjects.xlsx', sheet_name='Usable_Subjects', usecols=[3])
bdi_list = list(bdi_list.SWB_BDI)
# list(zip(subj_ids,bdi_list))
subj_info_df = pd.DataFrame({'subj_id':subj_ids,'bdi':bdi_list})
subj_info_df

In [None]:
high_bdi_subj = subj_info_df.subj_id[subj_info_df.bdi>=20]
low_bdi_subj = subj_info_df.subj_id[subj_info_df.bdi<20]


In [None]:
roi_counts_by_bdi = []

for region in roi_subj_counts.roi.unique().tolist():
#     high_bdi_info = roi_subj_counts[roi_subj_counts.subj_id.isin(high_bdi_subj)]
#     low_bdi_info = roi_subj_counts[roi_subj_counts.subj_id.isin(low_bdi_subj)]
    high_bdi_num = np.sum(roi_subj_counts['count'][(roi_subj_counts.subj_id.isin(high_bdi_subj))
                                                   &(roi_subj_counts.roi==region)].values)
    low_bdi_num = np.sum(roi_subj_counts['count'][(roi_subj_counts.subj_id.isin(low_bdi_subj))
                                                  &(roi_subj_counts.roi==region)].values)
    region_bdi_counts = pd.DataFrame({'roi':region,'high_bdi_num':high_bdi_num,'low_bdi_num':low_bdi_num},index=[0])
    roi_counts_by_bdi.append(region_bdi_counts)
    
roi_counts_by_bdi = pd.concat(roi_counts_by_bdi).reset_index(drop=True)

    
    

In [None]:
roi_counts_by_bdi

In [None]:
# roi_counts_by_bdi.to_csv(f'{base_dir}ephys_analysis/results/roi_counts_by_bdi_{date}.csv')
roi_counts_by_bdi.to_csv(f'{base_dir}ephys_analysis/results/roi_info/roi_counts_by_bdi.csv')
