Dated Created: 2022-08-04

## Import Packages

In [1]:
import sys
# sys.path.insert(0, '../../IDEaSv2')

In [2]:
import pandas as pd
import numpy as np
import os
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
import neurokit2 as nk
from scipy.stats import skew, kurtosis, iqr

from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import classification_report
from feat_functions.main_utils import *

# Processing 

In [3]:
def windowSegments(signal:pd.DataFrame, fs:float, window_size_sec:int, signal_col:str='ecg_'):
    """
    perform cropped signals of window_size seconds for the whole signal
    overlap input is in percentage of window_size
    window_size is in seconds """
    
    window_size = fs * window_size_sec
    start = 0
    counter = 10
    signal.reset_index(inplace=True, drop=False)
    while(start+window_size <= len(signal)):
        signal.loc[start:start+window_size, 'index'] = counter
        start = start + window_size
        counter += 10
    
    return signal[:start+1].copy()

def labelMean(signal:pd.DataFrame, window_size:int):
    """
    perform cropped signals of window_size seconds for the whole signal
    overlap input is in percentage of window_size
    window_size is in seconds """
    
    # start = 0
    # counter = 10
    signal.reset_index(inplace=True, drop=False)
    for x in range(0, 54, 6):
        signal.loc[x:x+6, 'index'] = np.round(signal.iloc[x:x+6]['label'].mean())
    signal.rename(columns={'index':'meanLabel'}, inplace=True)    
    return signal

import support_functions
from  support_functions import compute_ecg_eda_features

def make_window_for_ECGEDA(signal:np.ndarray, fs:float, overlap:int, window_size_sec:int, signal_type:str) -> pd.DataFrame:
    """ perform cropped signals of window_size seconds for the whole signal
    overlap input is in percentage of window_size
    window_size is in seconds """
    
    window_size = fs * window_size_sec
    overlap     = int(window_size * (overlap / 100))
    start       = 0
    # segmented   = np.zeros((1, window_size, signal.shape[1]), dtype = int)
    segmented = pd.DataFrame()
    while(start+window_size <= len(signal)):
        segment     = signal[start:start+window_size]
        # segment     = segment.reshape(1, len(segment), signal.shape[1])
        if signal_type == 'ecg':
            signal_feat = compute_ecg_eda_features._ecg_features(segment, fs)
        else: 
            signal_feat = compute_ecg_eda_features._eda_features(segment, fs)

        # segmented   = np.append(segmented, signal_feat, axis=0)
        segmented = segmented.append(signal_feat, ignore_index=True)
        start       = start + window_size - overlap

    return segmented

def make_windows_1min(df:pd.DataFrame, fs:float, overlap:int, window_size_sec:int):
    numSec = 60 #seconds
    start = 0
    windowSize = fs * numSec
    winSizeArr = fs * window_size_sec

    # ecgSegments   = np.zeros((1, winSizeArr, 1), dtype = int)
    # edaSegments   = np.zeros((1, winSizeArr, 3), dtype = int)
    ecgSegments   = pd.DataFrame()
    edaSegments   = pd.DataFrame()
    labelSegments = []

    while(start+windowSize <= len(df)):
        dfOnemin = df[start:windowSize+start]
        start = start + windowSize
        ecgOne = dfOnemin['ECG LL-RA'].values
        # ecgOne = np.expand_dims(ecgOne, axis=1)
        edaOne = dfOnemin[['GSR Conductance CAL', 'EDA_Tonic', 'EDA_Phasic']].values
        labelOne = dfOnemin['meanLabel'].mean()
        ecgArr = make_window_for_ECGEDA(ecgOne, fs, overlap, window_size_sec, 'ecg')
        edaArr = make_window_for_ECGEDA(edaOne, fs, overlap, window_size_sec, 'eda')

        labels = [labelOne] * ecgArr.shape[0]
        # ecgSegments = np.append(ecgSegments, ecgArr, axis=0)
        # edaSegments = np.append(edaSegments, edaArr, axis=0)
        ecgSegments = ecgSegments.append(ecgArr, ignore_index=True)
        edaSegments = edaSegments.append(edaArr, ignore_index=True)
        labelSegments = labelSegments + labels
    return ecgSegments, edaSegments, labelSegments # ecgSegments[1:], edaSegments[1:], labelSegments

In [21]:
mainPath = r'X:\Thesis\matb2\ECG_EDA_Combined'
listDir = os.listdir(mainPath)
samplingRate=256
numSec = 60 #seconds
overlapValue = 50 
windowSegLength = 10
ecgSamples = {}
edaSamples = {}
labelSamples = {}
for subs in listDir:
    csvPath = os.path.join(mainPath, f'{subs}', f'{subs}.csv')
    try:
        dfMain = pd.read_csv(csvPath)
        # consider each experiment separately
        grp = dfMain.groupby(by='exp')
        grpList = grp.groups.keys()
        ecgSegs = []
        edaSegs = []
        labelSegs = []
        for grp in grpList:
            df = dfMain.groupby(by='exp').get_group(grp)
            # selecting 1 min of session and creating overlapping samples from makewindow function
            ## selecting 1 min of session
            ecgSegments, edaSegments, labelSegments = make_windows_1min(df, samplingRate, overlapValue, windowSegLength)
            ecgSegs.append(ecgSegments)
            edaSegs.append(edaSegments)
            labelSegs.append(labelSegments)
        
        ecgSamples[subs] = ecgSegs
        edaSamples[subs] = edaSegs
        labelSamples[subs] = labelSegs
            
    except FileExistsError as e:
        print('File Not found!')

In [9]:
ecgSegments['ecg_entropy_features'][1]

(0.3215858323131756,
 {'Dimension': 2,
  'Delay': 1,
  'Tolerance': 0.19180572570953183,
  'Corrected': True})

In [16]:
ecgSamples['1105'][1].columns

Index(['ecg_mean_features', 'ecg_std_features', 'ecg_min_features',
       'ecg_max_features', 'ecg_skew_features', 'ecg_kurtosis_features',
       'ecg_median_features', 'ecg_entropy_features', 'ecg_iqr_features',
       'ecg_area_ts', 'ecg_sq_area_ts', 'ecg_mad_ts', 'ecg_HRV_MeanNN',
       'ecg_HRV_SDNN', 'ecg_HRV_SDANN1', 'ecg_HRV_SDNNI1', 'ecg_HRV_SDANN2',
       'ecg_HRV_SDNNI2', 'ecg_HRV_SDANN5', 'ecg_HRV_SDNNI5', 'ecg_HRV_RMSSD',
       'ecg_HRV_SDSD', 'ecg_HRV_CVNN', 'ecg_HRV_CVSD', 'ecg_HRV_MedianNN',
       'ecg_HRV_MadNN', 'ecg_HRV_MCVNN', 'ecg_HRV_IQRNN', 'ecg_HRV_Prc20NN',
       'ecg_HRV_Prc80NN', 'ecg_HRV_pNN50', 'ecg_HRV_pNN20', 'ecg_HRV_MinNN',
       'ecg_HRV_MaxNN', 'ecg_HRV_HTI', 'ecg_HRV_TINN', 'ecg_nni_counter',
       'ecg_nni_mean', 'ecg_nni_min', 'ecg_nni_max', 'ecg_hr_mean',
       'ecg_hr_min', 'ecg_hr_max', 'ecg_hr_std', 'ecg_nni_diff_mean',
       'ecg_nni_diff_min', 'ecg_nni_diff_max', 'ecg_ulf_peak', 'ecg_vlf_peak',
       'ecg_lf_peak', 'ecg_hf_peak', '

In [22]:
len(labelSamples['1105'][1])

99

In [23]:
path_pickle = r'X:\TAFFC_dataset\matb2\Processed_Data'

mk_dirs(path_pickle)

with open(os.path.join(path_pickle, 'cola_ecg.pickle'), 'wb') as handle:
    pickle.dump(ecgSamples, handle, protocol = pickle.HIGHEST_PROTOCOL)

with open(os.path.join(path_pickle, 'cola_eda.pickle'), 'wb') as handle:
    pickle.dump(edaSamples, handle, protocol = pickle.HIGHEST_PROTOCOL)

with open(os.path.join(path_pickle, 'cola_labels.pickle'), 'wb') as handle:
    pickle.dump(labelSamples, handle, protocol = pickle.HIGHEST_PROTOCOL)

In [24]:
ecgSamples

{'1105': [    ecg_mean_features  ecg_std_features  ecg_min_features  ecg_max_features  \
  0            0.332634          5.212547        -10.164570         93.590432   
  1           -0.006191          0.631911         -2.119824          3.493008   
  2           -0.007362          0.644941         -2.188423          3.555990   
  3           -0.006626          0.645022         -2.188423          3.555990   
  4           -0.006835          0.646915         -2.169433          3.537451   
  ..                ...               ...               ...               ...   
  94          -0.008121          0.626763         -2.039113          3.507335   
  95          -0.005276          0.622834         -2.044674          3.521118   
  96          -0.005467          0.616965         -2.044674          3.521118   
  97          -0.009197          0.630426         -1.980002          3.465511   
  98           0.000142          0.616907         -1.950635          3.411695   
  
      ecg_skew_fe

In [86]:
for k in ecgSamples.keys():
    dataList_ecg = ecgSamples[k]
    dataList_eda = edaSamples[k]
    
    
    df = pd.DataFrame()

    print(len(dataList_ecg))
    for d in range(len(dataList_ecg)):
        # print(d)
        eda_col = list(dataList_eda[d].columns)
        eda_col = [x.replace('eda_', '') for x in eda_col]
        eda_col = [ 'eda_' + x for x in eda_col]
        dataList_eda[d].columns = eda_col

        # print(dataList_ecg[d].shape)
        # print(dataList_eda[d].shape)
        dataList_label = pd.DataFrame(labelSamples[k][d], columns=['label'])
        comData = pd.concat([dataList_ecg[d], dataList_eda[d], dataList_label], axis=1)

        df = df.append([comData])
    
    df['ecg_entropy_features'] = df.loc[:, 'ecg_entropy_features'].apply(lambda x: x[0])
    df['eda_entropy_features'] = df.loc[:, 'eda_entropy_features'].apply(lambda x: x[0])
    df['eda_ph_entropy_features'] = df.loc[:, 'eda_ph_entropy_features'].apply(lambda x: x[0])
    df['eda_ton_entropy_features'] = df.loc[:, 'eda_ton_entropy_features'].apply(lambda x: x[0])

    # eda_ph_entropy_features
    df.reset_index(inplace=True, drop=True)

    df.to_csv(f'X:\TAFFC_dataset\data\{k}.csv', index=False)

4
4
4
4
4
4
4
4
3
1
3
3
4
2
3
4
4
4
2


In [90]:
df['eda_ton_entropy_features']

0      0.002046
1      0.002026
2      0.000465
3      0.000400
4      0.000774
         ...   
193    0.000503
194    0.000383
195    0.000337
196    0.003558
197    0.000946
Name: eda_ton_entropy_features, Length: 198, dtype: float64