# Format Mood Epochs Data for Regressions: 

*Created:* 10/05/2024 \
*Updated:* 11/26/2024 \
**Updated with new epochs from 11/21 and new behav scripts + pt data**

In [1]:
import numpy as np
import mne
from glob import glob
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import seaborn as sns
from scipy.stats import zscore, linregress, ttest_ind, ttest_rel, ttest_1samp, pearsonr, spearmanr
import pandas as pd
from mne.preprocessing.bads import _find_outliers
import os 
import joblib
import re
import datetime
import scipy
import random
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.regression.mixed_linear_model import MixedLM 
from joblib import Parallel, delayed
import pickle
import itertools
import time 
from matplotlib.ticker import StrMethodFormatter
import sys


import warnings
warnings.filterwarnings('ignore')




In [2]:
# Specify root directory for un-archived data and results 
base_dir   = '/sc/arion/projects/guLab/Alie/SWB/ephys_analysis/'
anat_dir   = f'{base_dir}recon_labels/'
# neural_dir = f'{base_dir}data/'
behav_dir  = f'{base_dir}behav/behav_data/'
save_dir   = f'{base_dir}results/mood_results/regression_dfs/'
pt_path    = f'{base_dir}behav/subj_pt_data/'
neural_dir  = f'{base_dir}epoch_data/'

os.makedirs(save_dir,exist_ok=True)

date = datetime.date.today().strftime('%m%d%Y')
print(date)

11262024


In [3]:
%load_ext autoreload
%autoreload 2


sys.path.append(f'{base_dir}LFPAnalysis/')

from LFPAnalysis import analysis_utils

script_dir = '/hpc/users/finka03/swb_ephys_analysis/scripts/'
sys.path.append(f'{script_dir}analysis_notebooks/')

from ieeg_tools import *

sys.path.append(f'{script_dir}behav/')

from behav_utils import *
from swb_subj_behav import *

In [4]:
##### MS050 + MS028 CANNOT BE INCLUDED - THEY DID NOT DO THE MOOD TASK

subj_ids = list(pd.read_excel(f'{base_dir}subj_info/SWB_subjects.xlsx', sheet_name='Usable_Subjects', usecols=[0]).PatientID)
subj_ids.remove('MS050')
subj_ids.remove('MS028')
n_subj = len(subj_ids)
subj_ids


['MS002',
 'MS003',
 'MS009',
 'MS011',
 'MS015',
 'MS016',
 'MS017',
 'MS019',
 'MS022',
 'MS024',
 'MS025',
 'MS026',
 'MS027',
 'MS029',
 'MS030',
 'MS033',
 'MS035',
 'MS041',
 'MS043',
 'MS048',
 'DA8',
 'DA023',
 'DA026',
 'DA037',
 'DA039']

In [5]:
### load ROI reref master 
roi_reref_labels_master_df = pd.read_csv(glob(f'{base_dir}results/roi_info/roi_reref_labels_master.csv')[0])
roi_reref_labels_master_df = roi_reref_labels_master_df.drop(columns=['Unnamed: 0'])
roi_reref_labels_master_df = roi_reref_labels_master_df[roi_reref_labels_master_df.subj_id.isin(subj_ids)]
roi_reref_labels_master_df



Unnamed: 0,subj_id,reref_ch_names,ch_label4roi,ch_type4roi,loc4roi,mni_x,mni_y,mni_z,hemi,roi
0,MS002,lacas1-lacas2,lacas1,anode,left cingulate gyrus d,-6.382462,37.158688,-3.130044,l,acc
1,MS002,lacas2-lacas3,lacas2,anode,left cingulate gyrus e,-6.368174,38.606223,2.270621,l,acc
2,MS002,lacas3-lacas4,lacas3,anode,left cingulate gyrus f,-6.390079,39.941566,7.640265,l,acc
3,MS002,lacas4-lacas5,lacas4,anode,left cingulate gyrus f,-6.914519,41.546899,12.993427,l,acc
4,MS002,lacas5-lacas6,lacas5,anode,left cingulate gyrus g,-6.933604,42.745784,18.267675,l,acc
...,...,...,...,...,...,...,...,...,...,...
1875,DA039,rtp6-rtp7,rtp6,anode,right superior middle temporal pole d,40.532268,7.354525,-33.253436,r,temporal pole
1876,DA039,rtp7-rtp8,rtp7,anode,right superior middle temporal pole d,43.911226,7.341369,-31.915085,r,temporal pole
1877,DA039,rsgcc6-rsgcc7,rsgcc7,cathode,right anterior pars triangularis b,42.378987,31.277597,2.471854,r,vlpfc
1878,DA039,rsgcc7-rsgcc8,rsgcc7,anode,right anterior pars triangularis b,42.378987,31.277597,2.471854,r,vlpfc


In [6]:
roi_reref_labels_master_df.roi.value_counts()

roi
dmpfc            193
ofc              179
acc              167
sts              135
hpc              124
amy              102
temporal         100
dlpfc             95
vlpfc             75
pins              58
ains              57
thalamus          43
temporal pole     31
mcc               28
phg               25
motor             25
parietal          24
vmpfc             22
caudate            3
pcc                1
Name: count, dtype: int64

In [7]:
analysis_freqs = {'theta':[4,8],'alpha':[8,13],'beta':[13,30],'gamma':[30,70],'hfa':[70,200]}
analysis_freqs

{'theta': [4, 8],
 'alpha': [8, 13],
 'beta': [13, 30],
 'gamma': [30, 70],
 'hfa': [70, 200]}

## load behav + mood data

In [8]:
# load task_dfs into master list 
raw_behav = [pd.read_csv(f'{behav_dir}{subj_id}_task_df.csv') for subj_id in subj_ids]
all_behav,beh_drops = format_all_behav(raw_behav, return_drops=True, drop_bads=False, drop_bads_t1=False,
                     norm=True,norm_type='zscore',pt_dir=pt_path)
all_behav

Unnamed: 0,subj_id,bdi,bdi_thresh,Round,TrialNum,RT,TrialOnset,ChoiceOnset,DecisionOnset,FeedbackOnset,...,risk_raw,risk_t1_raw,loss_raw,loss_t1_raw,temp_raw,temp_t1_raw,P_Safe_raw,P_Safe_t1_raw,Unnamed: 0_raw,Unnamed: 0_t1_raw
0,MS002,14,low,1,25.0,2.059852,513.380590,513.390239,515.450091,515.457173,...,0.819173,0.819173,2.373595,2.373595,2.247505,2.247505,0.897969,0.638282,0,1.0
1,MS002,14,low,2,117.0,1.954564,522.640856,522.641563,524.596127,526.627092,...,0.819173,0.819173,2.373595,2.373595,2.247505,2.247505,0.638282,0.341225,1,2.0
2,MS002,14,low,3,79.0,1.583462,531.174799,531.175599,532.759061,534.780269,...,0.819173,0.819173,2.373595,2.373595,2.247505,2.247505,0.341225,0.714422,2,3.0
3,MS002,14,low,4,42.0,2.491611,545.592613,545.593355,548.084966,548.092333,...,0.819173,0.819173,2.373595,2.373595,2.247505,2.247505,0.714422,0.483386,3,4.0
4,MS002,14,low,5,85.0,1.768936,555.337336,555.345720,557.114656,559.135069,...,0.819173,0.819173,2.373595,2.373595,2.247505,2.247505,0.483386,0.882421,4,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3745,DA039,22,high,146,79.0,1.079701,2259.827656,2259.828749,2260.908450,2262.926195,...,1.187095,1.187095,0.864195,0.864195,2.252215,2.252215,0.209864,0.723269,145,146.0
3746,DA039,22,high,147,30.0,1.837272,2267.502359,2267.534059,2269.371331,2269.377701,...,1.187095,1.187095,0.864195,0.864195,2.252215,2.252215,0.723269,0.509159,146,147.0
3747,DA039,22,high,148,13.0,4.030006,2282.349445,2282.350662,2286.380667,2286.389886,...,1.187095,1.187095,0.864195,0.864195,2.252215,2.252215,0.509159,0.585308,147,148.0
3748,DA039,22,high,149,18.0,3.167144,2293.040983,2293.042042,2296.209186,2296.218136,...,1.187095,1.187095,0.864195,0.864195,2.252215,2.252215,0.585308,0.884229,148,149.0


In [9]:
all_behav.columns.tolist()

['subj_id',
 'bdi',
 'bdi_thresh',
 'Round',
 'TrialNum',
 'RT',
 'TrialOnset',
 'ChoiceOnset',
 'DecisionOnset',
 'FeedbackOnset',
 'TrialType',
 'SafeBet',
 'LowBet',
 'HighBet',
 'GambleChoice',
 'Outcome',
 'Profit',
 'TotalProfit',
 'epoch',
 'CpeOnset',
 'logRT',
 'GambleEV',
 'TrialEV',
 'CR',
 'choiceEV',
 'rpe',
 'res_type',
 'cf',
 'cpe',
 'Round_t1',
 'TrialNum_t1',
 'RT_t1',
 'TrialType_t1',
 'SafeBet_t1',
 'LowBet_t1',
 'HighBet_t1',
 'GambleChoice_t1',
 'Outcome_t1',
 'Profit_t1',
 'TotalProfit_t1',
 'epoch_t1',
 'logRT_t1',
 'GambleEV_t1',
 'TrialEV_t1',
 'CR_t1',
 'choiceEV_t1',
 'rpe_t1',
 'res_type_t1',
 'cf_t1',
 'cpe_t1',
 'keep_epoch',
 'keep_epoch_t1',
 'util_HighBet',
 'util_HighBet_t1',
 'P_Gamble',
 'P_Gamble_t1',
 'Choice_Util',
 'Choice_Util_t1',
 'util_SafeBet',
 'util_SafeBet_t1',
 'util_Gamble',
 'util_Gamble_t1',
 'Choice_Prob',
 'Choice_Prob_t1',
 'util_LowBet',
 'util_LowBet_t1',
 'risk',
 'risk_t1',
 'loss',
 'loss_t1',
 'temp',
 'temp_t1',
 'P_Safe',


In [10]:
### TO DO - edit format_all_mood code to remove cols from all_behav, instead of inputting all predictors manually. 
### Also, remove Psafe/Pgamble + risk/loss/temp from normalization vars

behav_vars = ['SafeBet', 'LowBet', 'HighBet', 'Profit', 'TotalProfit', 'GambleEV', 'TrialEV', 'CR', 'choiceEV', 'rpe', 'cf', 'cpe', 
              'keep_epoch', 'Choice_Util', 'util_HighBet', 'util_Gamble', 'util_SafeBet', 'util_LowBet', 'SafeBet_raw', 'LowBet_raw',
              'HighBet_raw', 'Profit_raw', 'TotalProfit_raw', 'GambleEV_raw', 'TrialEV_raw', 'CR_raw', 'choiceEV_raw', 'rpe_raw', 
              'cf_raw', 'cpe_raw', 'Choice_Prob_raw', 'Choice_Util_raw', 'P_Safe_raw', 'temp_raw', 'util_HighBet_raw', 'P_Gamble_raw',
              'util_Gamble_raw', 'risk_raw', 'loss_raw', 'util_SafeBet_raw', 'util_LowBet_raw']


In [11]:
###### manally input more behav vars info format_all_mood -- should be predictor variables from all behav to be included in mood df
### load mood info all subj 
raw_mood = [pd.read_csv(f'{behav_dir}{subj_id}_mood_df.csv') for subj_id in subj_ids]
all_mood, mood_drops = format_all_mood(raw_mood, all_behav, return_drops=True, drop_bads=False, behav_vars=behav_vars)


In [12]:
all_mood

Unnamed: 0,subj_id,bdi,bdi_thresh,Round,Rating,RatingOnset,RT,Round_t1_idx,Round_t2_idx,Round_t3_idx,...,loss_raw_t3,util_SafeBet_raw_t1,util_SafeBet_raw_t2,util_SafeBet_raw_t3,util_LowBet_raw_t1,util_LowBet_raw_t2,util_LowBet_raw_t3,mood_epoch_len,next_round_start,norm_mood
0,MS002,14,low,4,70.0,539.343131,5.210698,3,2,1,...,2.373595,0.990841,-0.969323,0.309105,1.116755,-0.525205,-0.734723,5.310698,545.592613,0.277664
1,MS002,14,low,7,70.0,572.673634,3.332630,6,5,4,...,2.373595,0.309105,1.127574,0.309105,-0.734723,1.116755,-0.309590,3.432630,577.049490,0.277664
2,MS002,14,low,10,69.0,605.109393,3.499831,9,8,7,...,2.373595,0.309105,0.309105,-1.946547,-0.734723,0.478076,-1.257242,3.599831,609.642231,0.134538
3,MS002,14,low,13,71.0,638.791836,3.884104,12,11,10,...,2.373595,0.309105,0.695491,0.847710,-0.734723,1.116755,1.116755,3.984104,643.713750,0.420789
4,MS002,14,low,16,71.0,671.977904,4.250371,15,14,13,...,2.373595,0.309105,1.259415,-1.946547,-1.270286,1.116755,-1.502501,4.350371,677.264040,0.420789
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1245,DA039,22,high,139,45.0,2176.828649,2.673696,138,137,136,...,0.864195,-1.402674,-0.054942,1.881424,-1.811231,0.613067,0.941137,2.773696,2180.513505,-0.524890
1246,DA039,22,high,142,45.0,2211.211948,2.961470,141,140,139,...,0.864195,1.504581,-1.728339,-0.509104,0.941137,-2.476313,0.013637,3.061470,2215.197736,-0.524890
1247,DA039,22,high,145,55.0,2246.799223,3.347751,144,143,142,...,0.864195,-0.789875,1.504581,-0.054942,-0.861147,0.941137,-0.109935,3.447751,2251.180979,0.330135
1248,DA039,22,high,148,68.0,2276.912826,4.403968,147,146,145,...,0.864195,-0.054942,1.141662,1.504581,-1.275583,0.941137,0.941137,4.503968,2282.349445,1.441667


In [13]:
all_mood.to_csv(f'{base_dir}results/mood_results/all_mood_regression_data_{date}.csv')


In [14]:
all_mood.columns

Index(['subj_id', 'bdi', 'bdi_thresh', 'Round', 'Rating', 'RatingOnset', 'RT',
       'Round_t1_idx', 'Round_t2_idx', 'Round_t3_idx',
       ...
       'loss_raw_t3', 'util_SafeBet_raw_t1', 'util_SafeBet_raw_t2',
       'util_SafeBet_raw_t3', 'util_LowBet_raw_t1', 'util_LowBet_raw_t2',
       'util_LowBet_raw_t3', 'mood_epoch_len', 'next_round_start',
       'norm_mood'],
      dtype='object', length=143)

## make regression dfs

In [15]:
regression_epoch = 'MoodChoiceOnset'

In [16]:
mood_regression_dfs = {}

for subj_id in subj_ids:
    mood_df = all_mood[all_mood.subj_id == subj_id]
    
    subj_drops = mood_drops[subj_id]
    
    subj_roi_labels = roi_reref_labels_master_df[roi_reref_labels_master_df['subj_id']==subj_id][
                                ['subj_id','reref_ch_names','roi','hemi']]
    
    power_epochs = mne.time_frequency.read_tfrs(f'{neural_dir}{regression_epoch}/{subj_id}_{regression_epoch}-tfr.h5')
    power_df = power_epochs.to_data_frame()
    del power_epochs
    
    subj_band_dfs = {}
    
    # extract power df with mean freq-specific power for each epoch for each elec 
    for band,freq_range in analysis_freqs.items():
        band_df = power_df[(power_df.freq >= freq_range[0])&(power_df.freq <= freq_range[1])].groupby(['epoch']
                                                                ).agg('mean',numeric_only=True).reset_index()
        band_df  = band_df.melt(id_vars=['epoch','time','freq'],
                                    value_vars=band_df.columns[3:],
                                    var_name='reref_ch_names', value_name='band_pow',
                                    ignore_index = False)
        
        band_df['band'] = band
        
        subj_df = pd.merge(band_df, mood_df,on='epoch')
        
        subj_df = pd.merge(subj_df, subj_roi_labels,left_on=['reref_ch_names','subj_id'],
                                    right_on=['reref_ch_names','subj_id'])#.reset_index(drop=True)
        
        subj_df['unique_reref_ch'] = subj_df[['subj_id', 'reref_ch_names']].agg('_'.join, axis=1)
                
        # subj_df = subj_df[~subj_df['epoch'].isin(subj_drops)]
        
        subj_band_dfs[band] = subj_df 
        del band_df,subj_df 
    
    del power_df # delete full power df to save memory
    
    mood_regression_dfs[subj_id] = subj_band_dfs
    del subj_band_dfs
    

    

Reading /sc/arion/projects/guLab/Alie/SWB/ephys_analysis/epoch_data/MoodChoiceOnset/MS002_MoodChoiceOnset-tfr.h5 ...
Reading /sc/arion/projects/guLab/Alie/SWB/ephys_analysis/epoch_data/MoodChoiceOnset/MS003_MoodChoiceOnset-tfr.h5 ...
Reading /sc/arion/projects/guLab/Alie/SWB/ephys_analysis/epoch_data/MoodChoiceOnset/MS009_MoodChoiceOnset-tfr.h5 ...
Reading /sc/arion/projects/guLab/Alie/SWB/ephys_analysis/epoch_data/MoodChoiceOnset/MS011_MoodChoiceOnset-tfr.h5 ...
Reading /sc/arion/projects/guLab/Alie/SWB/ephys_analysis/epoch_data/MoodChoiceOnset/MS015_MoodChoiceOnset-tfr.h5 ...
Reading /sc/arion/projects/guLab/Alie/SWB/ephys_analysis/epoch_data/MoodChoiceOnset/MS016_MoodChoiceOnset-tfr.h5 ...
Reading /sc/arion/projects/guLab/Alie/SWB/ephys_analysis/epoch_data/MoodChoiceOnset/MS017_MoodChoiceOnset-tfr.h5 ...
Reading /sc/arion/projects/guLab/Alie/SWB/ephys_analysis/epoch_data/MoodChoiceOnset/MS019_MoodChoiceOnset-tfr.h5 ...
Reading /sc/arion/projects/guLab/Alie/SWB/ephys_analysis/epoch_d

# aggregate mood epoch data and save dfs 

In [17]:
mood_regression_dfs['MS002'][band].columns.tolist()

['epoch',
 'time',
 'freq',
 'reref_ch_names',
 'band_pow',
 'band',
 'subj_id',
 'bdi',
 'bdi_thresh',
 'Round',
 'Rating',
 'RatingOnset',
 'RT',
 'Round_t1_idx',
 'Round_t2_idx',
 'Round_t3_idx',
 'epoch_t1_idx',
 'epoch_t2_idx',
 'epoch_t3_idx',
 'keep_mood',
 'logRT',
 'MoodChoiceOnset',
 'SafeBet_t1',
 'SafeBet_t2',
 'SafeBet_t3',
 'LowBet_t1',
 'LowBet_t2',
 'LowBet_t3',
 'HighBet_t1',
 'HighBet_t2',
 'HighBet_t3',
 'Profit_t1',
 'Profit_t2',
 'Profit_t3',
 'TotalProfit_t1',
 'TotalProfit_t2',
 'TotalProfit_t3',
 'GambleEV_t1',
 'GambleEV_t2',
 'GambleEV_t3',
 'TrialEV_t1',
 'TrialEV_t2',
 'TrialEV_t3',
 'CR_t1',
 'CR_t2',
 'CR_t3',
 'choiceEV_t1',
 'choiceEV_t2',
 'choiceEV_t3',
 'rpe_t1',
 'rpe_t2',
 'rpe_t3',
 'cf_t1',
 'cf_t2',
 'cf_t3',
 'cpe_t1',
 'cpe_t2',
 'cpe_t3',
 'keep_epoch_t1',
 'keep_epoch_t2',
 'keep_epoch_t3',
 'Choice_Util_t1',
 'Choice_Util_t2',
 'Choice_Util_t3',
 'util_HighBet_t1',
 'util_HighBet_t2',
 'util_HighBet_t3',
 'util_Gamble_t1',
 'util_Gamble_t2',

In [19]:
mood_band_data = {f'{freq}':[] for freq in list(analysis_freqs.keys())}


In [20]:
for band in list(analysis_freqs.keys()):
    
    band_master_df = []
    for subj_id in subj_ids: 
        subj_df = mood_regression_dfs[subj_id][band]
        subj_df = subj_df[subj_df.keep_mood=='keep'].reset_index(drop=True)
        # add to master list 
        band_master_df.append(subj_df)
        # delete for memory
        del subj_df
            
    band_master_df = pd.concat(band_master_df).reset_index(drop=True)
    band_master_df.to_csv(f'{save_dir}{band}_mood_epochs_df.csv')


In [21]:
band_master_df

Unnamed: 0,epoch,time,freq,reref_ch_names,band_pow,band,subj_id,bdi,bdi_thresh,Round,...,util_SafeBet_raw_t3,util_LowBet_raw_t1,util_LowBet_raw_t2,util_LowBet_raw_t3,mood_epoch_len,next_round_start,norm_mood,roi,hemi,unique_reref_ch
0,0,-0.5,118.986976,lacas1-lacas2,0.018702,hfa,MS002,14,low,4,...,0.309105,1.116755,-0.525205,-0.734723,5.310698,545.592613,0.277664,acc,l,MS002_lacas1-lacas2
1,1,-0.5,118.986976,lacas1-lacas2,-0.076237,hfa,MS002,14,low,7,...,0.309105,-0.734723,1.116755,-0.309590,3.432630,577.049490,0.277664,acc,l,MS002_lacas1-lacas2
2,2,-0.5,118.986976,lacas1-lacas2,0.221728,hfa,MS002,14,low,10,...,-1.946547,-0.734723,0.478076,-1.257242,3.599831,609.642231,0.134538,acc,l,MS002_lacas1-lacas2
3,3,-0.5,118.986976,lacas1-lacas2,-0.029262,hfa,MS002,14,low,13,...,0.847710,-0.734723,1.116755,1.116755,3.984104,643.713750,0.420789,acc,l,MS002_lacas1-lacas2
4,4,-0.5,118.986976,lacas1-lacas2,0.076720,hfa,MS002,14,low,16,...,-1.946547,-1.270286,1.116755,-1.502501,4.350371,677.264040,0.420789,acc,l,MS002_lacas1-lacas2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80265,31,-0.5,118.986976,rvmot6-rvmot7,0.271121,hfa,DA039,22,high,97,...,-1.728339,0.339517,-1.275583,-3.162552,3.631755,1668.345561,0.843150,motor,r,DA039_rvmot6-rvmot7
80266,32,-0.5,118.986976,rvmot6-rvmot7,0.115498,hfa,DA039,22,high,100,...,-1.089041,-0.001675,0.941137,-0.861147,4.200405,1708.109957,1.014155,motor,r,DA039_rvmot6-rvmot7
80267,33,-0.5,118.986976,rvmot6-rvmot7,0.009869,hfa,DA039,22,high,103,...,-0.054942,-0.592812,0.941137,-0.592812,3.884470,1744.677006,0.672145,motor,r,DA039_rvmot6-rvmot7
80268,34,-0.5,118.986976,rvmot6-rvmot7,-0.171878,hfa,DA039,22,high,106,...,-0.054942,0.560514,0.941137,-0.109935,4.610689,1784.225934,-1.379915,motor,r,DA039_rvmot6-rvmot7


In [22]:
band_master_df.columns

Index(['epoch', 'time', 'freq', 'reref_ch_names', 'band_pow', 'band',
       'subj_id', 'bdi', 'bdi_thresh', 'Round',
       ...
       'util_SafeBet_raw_t3', 'util_LowBet_raw_t1', 'util_LowBet_raw_t2',
       'util_LowBet_raw_t3', 'mood_epoch_len', 'next_round_start', 'norm_mood',
       'roi', 'hemi', 'unique_reref_ch'],
      dtype='object', length=151)

In [27]:
len(band_master_df[band_master_df.subj_id == 'MS002'].Round.unique())

50