# SWB analysis pipeline part 3: 

- aggregate CPE epoch power data + metadata + roi locs 
- make dataframes for trial averaged regressions for all freqs


*Created:* 04/08/24 \
*Updated:* 11/21/2024 \
**Updated with PT data + scripts**

In [1]:
import numpy as np
import mne
from glob import glob
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import seaborn as sns
from scipy.stats import zscore, linregress, ttest_ind, ttest_rel, ttest_1samp, pearsonr, spearmanr
import pandas as pd
from mne.preprocessing.bads import _find_outliers
import os 
import joblib
import re
import datetime
import scipy
import random
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.regression.mixed_linear_model import MixedLM 
from joblib import Parallel, delayed
import pickle
import itertools
import time 
from matplotlib.ticker import StrMethodFormatter
import sys


import warnings
warnings.filterwarnings('ignore')




In [4]:
# Specify root directory for un-archived data and results 
base_dir   = '/sc/arion/projects/guLab/Alie/SWB/ephys_analysis/'
anat_dir   = f'{base_dir}recon_labels/'
neural_dir = f'{base_dir}epoch_data/'
behav_dir  = f'{base_dir}behav/behav_data/'
raw_dir    = f'{base_dir}behav/raw_data/'
pt_path    = f'{base_dir}behav/subj_pt_data/'
save_dir   = f'{base_dir}results/cpe_encoding/regression_dfs/'

os.makedirs(save_dir,exist_ok=True)

date = datetime.date.today().strftime('%m%d%Y')
print(date)

11212024


In [5]:
%load_ext autoreload
%autoreload 2


sys.path.append(f'{base_dir}LFPAnalysis/')

from LFPAnalysis import analysis_utils

script_dir = '/hpc/users/finka03/swb_ephys_analysis/scripts/'
sys.path.append(f'{script_dir}analysis_notebooks/')

from ieeg_tools import *

sys.path.append(f'{script_dir}behav/')

from behav_utils import *
from swb_subj_behav import *

In [6]:
subj_ids = list(pd.read_excel(f'{base_dir}subj_info/SWB_subjects.xlsx', sheet_name='Usable_Subjects', usecols=[0]).PatientID)
n_subj = len(subj_ids)
# subj_ids


In [7]:
analysis_freqs = {'theta':[4,8],'alpha':[8,13],'beta':[13,30],'gamma':[30,70],'hfa':[70,200]}
analysis_freqs

{'theta': [4, 8],
 'alpha': [8, 13],
 'beta': [13, 30],
 'gamma': [30, 70],
 'hfa': [70, 200]}

In [8]:
### load ROI reref master 
roi_reref_labels_master_df = pd.read_csv(glob(f'{base_dir}results/roi_info/roi_reref_labels_master.csv')[0])
roi_reref_labels_master_df = roi_reref_labels_master_df.drop(columns=['Unnamed: 0'])
roi_reref_labels_master_df



Unnamed: 0,subj_id,reref_ch_names,ch_label4roi,ch_type4roi,loc4roi,mni_x,mni_y,mni_z,hemi,roi
0,MS002,lacas1-lacas2,lacas1,anode,left cingulate gyrus d,-6.382462,37.158688,-3.130044,l,acc
1,MS002,lacas2-lacas3,lacas2,anode,left cingulate gyrus e,-6.368174,38.606223,2.270621,l,acc
2,MS002,lacas3-lacas4,lacas3,anode,left cingulate gyrus f,-6.390079,39.941566,7.640265,l,acc
3,MS002,lacas4-lacas5,lacas4,anode,left cingulate gyrus f,-6.914519,41.546899,12.993427,l,acc
4,MS002,lacas5-lacas6,lacas5,anode,left cingulate gyrus g,-6.933604,42.745784,18.267675,l,acc
...,...,...,...,...,...,...,...,...,...,...
1875,DA039,rtp6-rtp7,rtp6,anode,right superior middle temporal pole d,40.532268,7.354525,-33.253436,r,temporal pole
1876,DA039,rtp7-rtp8,rtp7,anode,right superior middle temporal pole d,43.911226,7.341369,-31.915085,r,temporal pole
1877,DA039,rsgcc6-rsgcc7,rsgcc7,cathode,right anterior pars triangularis b,42.378987,31.277597,2.471854,r,vlpfc
1878,DA039,rsgcc7-rsgcc8,rsgcc7,anode,right anterior pars triangularis b,42.378987,31.277597,2.471854,r,vlpfc


## load behav data

In [9]:
# load task_dfs into master list 
raw_behav = [pd.read_csv(f'{behav_dir}{subj_id}_task_df.csv') for subj_id in subj_ids]
all_behav,beh_drops = format_all_behav(raw_behav, return_drops=True, drop_bads=False, drop_bads_t1=False,
                     norm=True,norm_type='zscore',pt_dir=pt_path)
all_behav

Unnamed: 0,subj_id,bdi,bdi_thresh,Round,TrialNum,RT,TrialOnset,ChoiceOnset,DecisionOnset,FeedbackOnset,...,P_Safe_raw,P_Safe_t1_raw,util_LowBet_raw,util_LowBet_t1_raw,util_HighBet_raw,util_HighBet_t1_raw,risk_raw,risk_t1_raw,Choice_Util_raw,Choice_Util_t1_raw
0,MS002,14,low,1,25.0,2.059852,513.380590,513.390239,515.450091,515.457173,...,0.897969,0.638282,-0.734723,-0.529325,0.030942,-0.995865,0.819173,0.819173,0.301649,-1.159835
1,MS002,14,low,2,117.0,1.954564,522.640856,522.641563,524.596127,526.627092,...,0.638282,0.341225,-0.525205,1.110125,-0.999406,1.493454,0.819173,0.819173,-1.165390,1.285700
2,MS002,14,low,3,79.0,1.583462,531.174799,531.175599,532.759061,534.780269,...,0.341225,0.714422,1.116755,-0.314041,1.498244,0.893763,0.819173,0.819173,1.287589,0.302752
3,MS002,14,low,4,42.0,2.491611,545.592613,545.593355,548.084966,548.092333,...,0.714422,0.483386,-0.309590,1.110125,0.896547,0.945208,0.819173,0.819173,0.301649,1.069216
4,MS002,14,low,5,85.0,1.768936,555.337336,555.345720,557.114656,559.135069,...,0.483386,0.882421,1.116755,-0.738523,0.948163,0.261760,0.819173,0.819173,1.070446,0.302752
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4045,DA039,22,high,146,79.0,1.079701,2259.827656,2259.828749,2260.908450,2262.926195,...,0.209864,0.723269,0.941137,-1.286902,1.505088,-0.112975,1.187095,1.187095,1.703533,-0.190891
4046,DA039,22,high,147,30.0,1.837272,2267.502359,2267.534059,2269.371331,2269.377701,...,0.723269,0.509159,-1.275583,0.331666,-0.133242,-0.373925,1.187095,1.187095,-0.190203,-0.190891
4047,DA039,22,high,148,13.0,4.030006,2282.349445,2282.350662,2286.380667,2286.389886,...,0.509159,0.585308,0.339517,-0.118750,-0.382021,-0.360674,1.187095,1.187095,-0.190203,-0.190891
4048,DA039,22,high,149,18.0,3.167144,2293.040983,2293.042042,2296.209186,2296.218136,...,0.585308,0.884229,-0.109935,-3.139208,-0.369389,-0.830554,1.187095,1.187095,-0.190203,-2.805472


In [12]:
# ### load mood info all subj 
# raw_mood = [pd.read_csv(f'{behav_dir}{subj_id}_mood_df.csv') for subj_id in subj_ids]
# all_mood, mood_drops = format_all_mood(raw_mood, all_behav, return_drops=True, drop_bads=False)
# all_mood

In [13]:
all_behav.to_csv(f'{base_dir}results/cpe_encoding/all_behav_regression_data_{date}.csv')

# Make band power regression dfs for each subj

In [15]:
regression_epoch = 'CpeOnset'

In [16]:
regression_dfs = {}

for subj_id in subj_ids:
    behav_df = all_behav[all_behav.subj_id == subj_id]
    
    subj_drops = beh_drops[subj_id]['bad_epochs']
    
    subj_roi_labels = roi_reref_labels_master_df[roi_reref_labels_master_df['subj_id']==subj_id][
                                ['subj_id','reref_ch_names','roi','hemi']]
    
    power_epochs = mne.time_frequency.read_tfrs(f'{neural_dir}{regression_epoch}/{subj_id}_{regression_epoch}-tfr.h5')
    power_df = power_epochs.to_data_frame()
    del power_epochs
    
    subj_band_dfs = {}
    
    # extract power df with mean freq-specific power for each epoch for each elec 
    for band,freq_range in analysis_freqs.items():
        band_df = power_df[(power_df.freq >= freq_range[0])&(power_df.freq <= freq_range[1])].groupby(['epoch']
                                                                ).agg('mean',numeric_only=True).reset_index()
        band_df  = band_df.melt(id_vars=['epoch','time','freq'],
                                    value_vars=band_df.columns[3:],
                                    var_name='reref_ch_names', value_name='band_pow',
                                    ignore_index = False)
        
        band_df['band'] = band
        
        subj_df = pd.merge(band_df, behav_df, on='epoch')
        
        subj_df = pd.merge(subj_df, subj_roi_labels,left_on=['reref_ch_names','subj_id'],
                                    right_on=['reref_ch_names','subj_id'])#.reset_index(drop=True)
        
        subj_df['unique_reref_ch'] = subj_df[['subj_id', 'reref_ch_names']].agg('_'.join, axis=1)
        
        # subj_df = subj_df[~subj_df['epoch'].isin(subj_drops)]
        
        subj_band_dfs[band] = subj_df 
        del band_df,subj_df 
    
    del power_df # delete full power df to save memory
    
    regression_dfs[subj_id] = subj_band_dfs
    

    

Reading /sc/arion/projects/guLab/Alie/SWB/ephys_analysis/epoch_data/CpeOnset/MS002_CpeOnset-tfr.h5 ...
Reading /sc/arion/projects/guLab/Alie/SWB/ephys_analysis/epoch_data/CpeOnset/MS003_CpeOnset-tfr.h5 ...
Reading /sc/arion/projects/guLab/Alie/SWB/ephys_analysis/epoch_data/CpeOnset/MS009_CpeOnset-tfr.h5 ...
Reading /sc/arion/projects/guLab/Alie/SWB/ephys_analysis/epoch_data/CpeOnset/MS011_CpeOnset-tfr.h5 ...
Reading /sc/arion/projects/guLab/Alie/SWB/ephys_analysis/epoch_data/CpeOnset/MS015_CpeOnset-tfr.h5 ...
Reading /sc/arion/projects/guLab/Alie/SWB/ephys_analysis/epoch_data/CpeOnset/MS016_CpeOnset-tfr.h5 ...
Reading /sc/arion/projects/guLab/Alie/SWB/ephys_analysis/epoch_data/CpeOnset/MS017_CpeOnset-tfr.h5 ...
Reading /sc/arion/projects/guLab/Alie/SWB/ephys_analysis/epoch_data/CpeOnset/MS019_CpeOnset-tfr.h5 ...
Reading /sc/arion/projects/guLab/Alie/SWB/ephys_analysis/epoch_data/CpeOnset/MS022_CpeOnset-tfr.h5 ...
Reading /sc/arion/projects/guLab/Alie/SWB/ephys_analysis/epoch_data/CpeOn

# aggregate cpe epoch data and save dfs 

In [17]:
for band in list(analysis_freqs.keys()):
    
    band_master_df = []
    for subj_id in subj_ids: 
        subj_df = regression_dfs[subj_id][band]
        subj_df = subj_df[subj_df.keep_epoch=='keep'].reset_index(drop=True)
        # add to master list 
        band_master_df.append(subj_df)
        # delete for memory
        del subj_df
            
    band_master_df = pd.concat(band_master_df).reset_index(drop=True)
    band_master_df.to_csv(f'{save_dir}{band}_master_df.csv')

In [18]:
band_master_df

Unnamed: 0,epoch,time,freq,reref_ch_names,band_pow,band,subj_id,bdi,bdi_thresh,Round,...,util_LowBet_t1_raw,util_HighBet_raw,util_HighBet_t1_raw,risk_raw,risk_t1_raw,Choice_Util_raw,Choice_Util_t1_raw,roi,hemi,unique_reref_ch
0,0,1.5,118.986976,lacas1-lacas2,-0.027361,hfa,MS002,14,low,1,...,-0.529325,0.030942,-0.995865,0.819173,0.819173,0.301649,-1.159835,acc,l,MS002_lacas1-lacas2
1,1,1.5,118.986976,lacas1-lacas2,-0.013610,hfa,MS002,14,low,2,...,1.110125,-0.999406,1.493454,0.819173,0.819173,-1.165390,1.285700,acc,l,MS002_lacas1-lacas2
2,2,1.5,118.986976,lacas1-lacas2,0.095670,hfa,MS002,14,low,3,...,-0.314041,1.498244,0.893763,0.819173,0.819173,1.287589,0.302752,acc,l,MS002_lacas1-lacas2
3,3,1.5,118.986976,lacas1-lacas2,0.096172,hfa,MS002,14,low,4,...,1.110125,0.896547,0.945208,0.819173,0.819173,0.301649,1.069216,acc,l,MS002_lacas1-lacas2
4,4,1.5,118.986976,lacas1-lacas2,0.082093,hfa,MS002,14,low,5,...,-0.738523,0.948163,0.261760,0.819173,0.819173,1.070446,0.302752,acc,l,MS002_lacas1-lacas2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
269233,145,1.5,118.986976,rvmot6-rvmot7,-0.189824,hfa,DA039,22,high,146,...,-1.286902,1.505088,-0.112975,1.187095,1.187095,1.703533,-0.190891,motor,r,DA039_rvmot6-rvmot7
269234,146,1.5,118.986976,rvmot6-rvmot7,-0.086939,hfa,DA039,22,high,147,...,0.331666,-0.133242,-0.373925,1.187095,1.187095,-0.190203,-0.190891,motor,r,DA039_rvmot6-rvmot7
269235,147,1.5,118.986976,rvmot6-rvmot7,-0.076524,hfa,DA039,22,high,148,...,-0.118750,-0.382021,-0.360674,1.187095,1.187095,-0.190203,-0.190891,motor,r,DA039_rvmot6-rvmot7
269236,148,1.5,118.986976,rvmot6-rvmot7,-0.097313,hfa,DA039,22,high,149,...,-3.139208,-0.369389,-0.830554,1.187095,1.187095,-0.190203,-2.805472,motor,r,DA039_rvmot6-rvmot7


In [None]:
# all_behav = format_all_behav(raw_behav,norm=False)
# all_behav.to_csv(f'{base_dir}ephys_analysis/behav/all_behav_nodrop_nonorm.csv')
# all_behav = format_all_behav(raw_behav)
# all_behav.to_csv(f'{base_dir}ephys_analysis/behav/all_behav_nodrop.csv')
# all_behav = format_all_behav(raw_behav,drop_bads=True)
# all_behav.to_csv(f'{base_dir}ephys_analysis/behav/all_behav.csv')
# all_behav = format_all_behav(raw_behav,drop_bads=True,drop_bads_t1=True)
# all_behav.to_csv(f'{base_dir}ephys_analysis/behav/all_behav_alldrops.csv')
# all_behav, drops_data = format_all_behav(raw_behav,return_drops=True,drop_bads=True,drop_bads_t1=True)
# all_behav, drops_data = format_all_behav(raw_behav,return_drops=True,drop_bads=True)
# all_behav

In [None]:
# for band in list(analysis_freqs.keys()):
    
#     band_master_df = []
    
#     for subj_id in subj_ids: 
#         subj_pow   = all_subj_power[subj_id][band]
#         subj_behav = all_behav[all_behav.subj_id == subj_id]
        
#         # merge by epochs
#         merge_df = pd.merge(subj_pow, subj_behav,left_on=['epoch'],
#                                     right_on=['epoch']).reset_index(drop=True)
#         # melt df to get pow estimates for each elec in one col
#         subj_df = merge_df.melt(id_vars=subj_behav.columns.tolist(),
#                                             value_vars=subj_pow.columns.drop(['epoch','time','freq','band']).tolist().copy(),
#                                             var_name='reref_ch_names', value_name='band_pow',
#                                             ignore_index = False)
#         # add roi info to df 
#         subj_roi_labels = roi_reref_labels_master_df[roi_reref_labels_master_df['subj_id']==subj_id][
#                                     ['subj_id','reref_ch_names','roi','hemi']]
#         subj_df = pd.merge(subj_df, subj_roi_labels,left_on=['reref_ch_names','subj_id'],
#                                             right_on=['reref_ch_names','subj_id']).reset_index(drop=True)
        
#         subj_df['unique_reref_ch'] = subj_df[['subj_id', 'reref_ch_names']].agg('_'.join, axis=1)
#         # drop bad epochs
#         subj_drops = drops_data[subj_id]
#         subj_df = subj_df[~subj_df['epoch'].isin(subj_drops)]
        
#         # add to master list 
#         band_master_df.append(subj_df)
#         # delete for memory
#         del subj_pow,subj_behav,merge_df

    
    
#     band_master_df = pd.concat(band_master_df).reset_index(drop=True)
#     # reorder columns 
# #     band_master_df = band_master_df[['subj_id','bdi','reref_ch_names','unique_reref_ch','epoch','freq','band',
# #                                      'band_pow','roi','hemi','ch_label4roi','ch_type4roi','loc4roi',
# #                                      'logRT_t1','logRT','RT','Round', 'Round_t1','TrialType','TrialType_t1',
# #                                      'GambleEV', 'GambleEV_t1','SafeBet','GambleChoice','GambleChoice_t1','TrialEV',                                 
# #                                      'Profit','TotalProfit','RPE','decisionCPE', 'decisionRegret','decisionRelief']]

#     band_master_df.to_csv(f'{save_dir}{band}_master_df_{epoch_len}.csv')
#     regression_band_data[band] = band_master_df
# #     del band_master_df


