In [2]:
import pandas as pd
import numpy as np
import seaborn as sn
import sklearn

df = pd.read_csv("./kieranFeatures_27-Jun-2024.csv")
df.head()

Unnamed: 0,ID,EDA_TonicMean_version02,EDA_TonicMean_version03,EDA_TonicMean_version04,EDA_TonicMean_version05,EDA_TonicMean_version09,EDA_TonicMean_version10,EDA_TonicMean_version11,EDA_TonicMean_version12,EDA_TonicMean_version16,...,EEG_n170_poz_version16,EEG_n170_poz_version17,EEG_n170_poz_version19,EEG_n170_poz_version20,EEG_n170_poz_version22,EEG_n170_poz_version23,adjSA1,adjSA2,adjSA3,adjSAtotal
0,5,-0.085436,-0.180806,-1.240107,-1.645483,-0.232165,-0.505124,-0.736092,-0.905539,-0.288854,...,-0.05357,-0.266902,0.0,0.0,0.0,0.0,0.094009,1.527268,-0.721172,0.365941
1,5,-0.113606,-0.015427,1.570607,1.874802,-0.322842,-0.005955,0.917734,1.070093,-0.326261,...,-0.000853,0.022885,0.0,0.0,0.0,0.0,0.104321,-1.487409,0.822773,-0.20392
2,5,-0.125999,-0.138711,1.680446,0.912605,-0.362735,-0.378065,1.008151,0.482729,-0.342719,...,0.089682,0.075,0.0,0.0,0.0,0.0,-1.026176,0.980463,-1.571474,-0.855062
3,5,-0.187367,-0.165227,1.701432,1.249241,-0.56027,-0.458101,1.090228,0.740325,-0.424209,...,0.164293,0.1764,0.0,0.0,0.0,0.0,-0.64602,-0.257742,0.932179,0.111856
4,5,-0.192078,-0.272304,-0.480807,-1.039028,-0.575435,-0.781291,-0.386282,-0.692473,-0.430465,...,-0.176117,0.066671,0.0,0.0,0.0,0.0,-0.311617,0.799208,-1.454544,-0.563575


In [3]:
# Create binary variables for high and low 
rows = df.shape[0]

adj_SA_1_median = np.median(df["adjSA1"])
adj_SA_2_median = np.median(df["adjSA2"])
adj_SA_3_median = np.median(df["adjSA3"])

# Will be high if adjusted SA level score is equal to or above median, low otherwise
df["Lv_1_Lo"] = (df["adjSA1"] < adj_SA_1_median).astype(int)
df["Lv_1_Hi"] = (df["adjSA1"] >= adj_SA_1_median).astype(int)
df["Lv_2_Lo"] = (df["adjSA2"] < adj_SA_2_median).astype(int)
df["Lv_2_Hi"] = (df["adjSA1"] >= adj_SA_2_median).astype(int)
df["Lv_3_Lo"] = (df["adjSA3"] < adj_SA_2_median).astype(int)
df["Lv_3_Hi"] = (df["adjSA1"] >= adj_SA_2_median).astype(int)

In [3]:
# Get dataframe for ECG_pNN50 columns (correlation with SA 1)
pNN50_cols = [col for col in df.columns if "ECG_pNN50" in col]
SA1_data = df.loc[:, pNN50_cols + ["adjSA1", "Lv_1_Lo", "Lv_1_Hi"]]
SA1_data

Unnamed: 0,ECG_pNN50_version02,ECG_pNN50_version03,ECG_pNN50_version04,ECG_pNN50_version05,ECG_pNN50_version09,ECG_pNN50_version10,ECG_pNN50_version19,ECG_pNN50_version20,ECG_pNN50_version22,adjSA1,Lv_1_Lo,Lv_1_Hi
0,0.784491,0.371088,-0.009137,-0.019937,-0.150832,-0.14172,0.0,0.0,0.0,0.094009,0,1
1,-1.529229,0.371088,-1.250573,-0.019937,-1.204083,-0.14172,0.0,0.0,0.0,0.104321,0,1
2,0.784491,0.371088,-0.009137,-0.019937,-0.150832,-0.14172,0.0,0.0,0.0,-1.026176,1,0
3,0.784491,0.371088,-0.009137,-0.019937,-0.150832,-0.14172,0.0,0.0,0.0,-0.646020,1,0
4,0.784491,0.371088,1.953674,1.401106,-0.150832,-0.14172,0.0,0.0,0.0,-0.311617,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...
198,-0.100167,0.371088,-0.483804,-0.019937,0.000000,0.00000,0.0,0.0,0.0,-0.053761,1,0
199,-0.196325,0.371088,-0.535398,-0.019937,0.000000,0.00000,0.0,0.0,0.0,-1.704308,1,0
200,-0.225603,0.371088,-0.551107,-0.019937,0.000000,0.00000,0.0,0.0,0.0,-1.200400,1,0
201,-0.984824,-2.539907,-0.958470,-2.305963,0.000000,0.00000,0.0,0.0,0.0,1.096837,0,1


In [4]:
# Get datframe for EDA_Phasic columns (correlation with SA 2 (phasic electrodermal level) and 3 (pretrial data))
phasic_cols = [col for col in df.columns if "EDA_Phasic" in col]
SA2_and_3_data = df.loc[:, phasic_cols + ["adjSA2", "Lv_2_Lo", "Lv_2_Hi"]]
SA2_and_3_data

Unnamed: 0,EDA_PhasicMean_version02,EDA_PhasicMean_version03,EDA_PhasicMean_version04,EDA_PhasicMean_version05,EDA_PhasicMean_version09,EDA_PhasicMean_version10,EDA_PhasicMean_version11,EDA_PhasicMean_version12,EDA_PhasicMean_version16,EDA_PhasicMean_version17,...,EDA_PhasicMax_version12,EDA_PhasicMax_version16,EDA_PhasicMax_version17,EDA_PhasicMax_version19,EDA_PhasicMax_version20,EDA_PhasicMax_version22,EDA_PhasicMax_version23,adjSA2,Lv_2_Lo,Lv_2_Hi
0,2.838623,0.562525,-0.671030,-1.788668,-0.318871,-1.254703,-0.231677,-0.354268,-0.204832,-0.210528,...,-0.281661,-0.199833,-0.232730,0.0,0.0,0.0,0.0,1.527268,0,0
1,3.627534,5.157661,0.040489,1.710840,0.097864,1.411513,-0.150325,0.141003,-0.186261,-0.094027,...,-0.037180,-0.136221,-0.072318,0.0,0.0,0.0,0.0,-1.487409,1,0
2,2.912867,1.925504,-0.168255,-0.417926,-0.279652,-0.463868,-0.180779,-0.177311,-0.203085,-0.175972,...,-0.197869,-0.188034,-0.172852,0.0,0.0,0.0,0.0,0.980463,0,0
3,2.156099,-0.416636,2.137353,0.198651,-0.679409,-1.822838,1.066664,-0.195258,-0.220900,-0.235352,...,-0.164746,-0.219727,-0.257344,0.0,0.0,0.0,0.0,-0.257742,1,0
4,1.154209,1.059655,-0.119670,0.008588,-1.208648,-0.966256,-0.209360,-0.143752,-0.244485,-0.197924,...,-0.258534,-0.181016,-0.207461,0.0,0.0,0.0,0.0,0.799208,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
198,0.557486,-0.036165,1.391179,0.793433,0.000000,0.000000,1.395097,0.662460,0.000000,0.000000,...,0.450293,0.000000,0.000000,0.0,0.0,0.0,0.0,-0.585629,1,0
199,1.720750,1.751326,1.703686,1.686390,0.000000,0.000000,0.645585,0.822849,0.000000,0.000000,...,0.930931,0.000000,0.000000,0.0,0.0,0.0,0.0,-1.683620,1,0
200,0.818826,2.617283,-0.773017,0.809586,0.000000,0.000000,-0.308129,0.037635,0.000000,0.000000,...,-0.163277,0.000000,0.000000,0.0,0.0,0.0,0.0,-1.594971,1,0
201,1.136268,-0.132988,0.144776,-0.638952,0.000000,0.000000,-0.160637,-0.339043,0.000000,0.000000,...,-0.261168,0.000000,0.000000,0.0,0.0,0.0,0.0,0.372913,0,1


In [5]:
# Clear memory
del df