In [1]:
import pandas as pd
import numpy as np
import scipy.signal as signal
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
from scipy.stats import zscore, ttest_ind
from matplotlib.colors import LogNorm, Normalize
import os
import pickle


In [2]:
FS = 4 # E4 sampling rate

PANDAS_RESAMPLE_RATE = "{:.4f}".format(1/FS)+'S' # find missing samples (datetime)
MODALITY = 'eda'

In [3]:
RIGHT_DATA_FOLDER_PATH = '/media/bayesian-posterior/sdc/sensecode_data/' + MODALITY + '/right_hand_eda/'
RIGHT_DATA_FOLDER = os.fsencode(RIGHT_DATA_FOLDER_PATH)

LEFT_DATA_FOLDER_PATH = '/media/bayesian-posterior/sdc/sensecode_data/' + MODALITY + '/'
LEFT_DATA_FOLDER = os.fsencode(LEFT_DATA_FOLDER_PATH)

In [4]:
def plot_signal(df_eda,
                fs,
                label,
                ylim = None):
    
    plt.figure(figsize=(15, 5))
    
    t = df_eda.index.to_series()
    x = df_eda[label]
    xtick_freq = int(fs*60*60*24*7)
    
    plt.plot(range(len(t)), x, label = label)
    plt.legend(loc=1)
    plt.xticks(range(len(t))[::xtick_freq], t[::xtick_freq], rotation='vertical')

    if ylim is not None:
        plt.ylim(ylim)
    plt.xlabel('Date')
    plt.ylabel('EDA [uS]')
    plt.grid()

In [5]:
def remove_artifacts_and_interpolate(df, 
                                    fs, 
                                    pandas_resample_rate,
                                    interpolate_method = 'time',
                                    z_score = True):
    
    # if z_score:
    #     print('modified z score')
    #     med_eda = df[MODALITY].median()
    #     med_dev_eda = (np.abs(df[MODALITY] - med_eda)).median()
    #     df[MODALITY] = df[MODALITY].apply(lambda x : 0.6745*(x - med_eda) / med_dev_eda)
    
    df_without_artifacts = df.resample(pandas_resample_rate).mean()
    
    # From the appendix of Stirling et al., 2021 - do not use, seems 'cheating'
    # df_without_artifacts['hour'] = df_without_artifacts.index.to_series().apply(lambda x : x.hour)    
    # df_without_artifacts[MODALITY] = df_without_artifacts.groupby('hour', group_keys=False)[MODALITY].apply(lambda x: x.fillna(x.mean()))
    
    df_without_artifacts.fillna(df_without_artifacts[MODALITY].median(), inplace=True)
    
    if z_score:
        print('z-scored')
        df_without_artifacts[MODALITY] = zscore(df_without_artifacts[MODALITY])

    return df_without_artifacts

In [6]:
def apply_fir(eda: np.ndarray,
              fs,
              cutoff,
              filter_type, # bandpass, lowpass, highpass, bandstop
              transition_band,
              window) -> np.ndarray:
    
    if window == 'hann' or window == 'hamming' or window == 'bartlett':
        M = int(4 * fs / transition_band)
    elif window == 'blackman':
        M = int(6 * fs / transition_band)
    else:
        raise ValueError('Length estimation for this window not implemented')
        
    # print('Using ' + window + ' window for ' + filter_type + ' FIR filter.')
    h = signal.firwin(numtaps = M, 
                      cutoff = cutoff,
                      fs = fs,
                      pass_zero = filter_type,
                      window = window)
    
    eda = np.squeeze(eda)
    return signal.lfilter(h, [1.0], eda) 
    # return signal.filtfilt(h, [1.0], eda) # avoid phase shift of single filter

In [7]:
left_eda_mean_dict, right_eda_mean_dict = {}, {}

for file in os.listdir(LEFT_DATA_FOLDER_PATH):
    
    filename = os.fsdecode(file)
    
    if filename.endswith("worn_left.h5"):
        
        subject = filename.split('_')[0]
        left_filepath = LEFT_DATA_FOLDER_PATH+filename
        print(left_filepath, subject)
        
        left_df = pd.read_hdf(left_filepath)
        
        # plot_signal(df_eda = df_eda, label = 'eda', fs = FS)
        left_df_without_artifacts = remove_artifacts_and_interpolate(df = left_df, 
                                                                    fs = FS, 
                                                                    pandas_resample_rate = PANDAS_RESAMPLE_RATE, 
                                                                    z_score = False)
        
        # plot_signal(df_eda = df_eda_without_artifacts, label = 'eda', fs = FS)
        left_df_without_artifacts['low_passed_eda'] = apply_fir(eda = left_df_without_artifacts[MODALITY].to_numpy(), 
                                                                 fs = FS, 
                                                                 cutoff = 0.1, 
                                                                 transition_band = 0.5,
                                                                 filter_type = 'lowpass', 
                                                                 window = 'hamming')
        

        right_filepath = RIGHT_DATA_FOLDER_PATH+subject+'_eda_worn_right.h5'
        print(right_filepath, subject)
        
        right_df = pd.read_hdf(right_filepath)
        
        # plot_signal(df_eda = df_eda, label = 'eda', fs = FS)
        right_df_without_artifacts = remove_artifacts_and_interpolate(df = right_df, 
                                                                    fs = FS, 
                                                                    pandas_resample_rate = PANDAS_RESAMPLE_RATE, 
                                                                    z_score = False)
        
        # plot_signal(df_eda = df_eda_without_artifacts, label = 'eda', fs = FS)
        right_df_without_artifacts['low_passed_eda'] = apply_fir(eda = right_df_without_artifacts[MODALITY].to_numpy(), 
                                                                 fs = FS, 
                                                                 cutoff = 1, 
                                                                 transition_band = 0.1,
                                                                 filter_type = 'lowpass', 
                                                                 window = 'hamming')
        
        left_eda_mean_dict[subject] = left_df_without_artifacts['low_passed_eda'].resample('W').median()
        right_eda_mean_dict[subject] = right_df_without_artifacts['low_passed_eda'].resample('W').median()
        print(len(left_eda_mean_dict[subject]), len(right_eda_mean_dict[subject]))
        
        

/media/bayesian-posterior/sdc/sensecode_data/eda/SP22_eda_worn_left.h5 SP22
/media/bayesian-posterior/sdc/sensecode_data/eda/right_hand_eda/SP22_eda_worn_right.h5 SP22
12 13
/media/bayesian-posterior/sdc/sensecode_data/eda/SP69_eda_worn_left.h5 SP69
/media/bayesian-posterior/sdc/sensecode_data/eda/right_hand_eda/SP69_eda_worn_right.h5 SP69
13 13
/media/bayesian-posterior/sdc/sensecode_data/eda/SP28_eda_worn_left.h5 SP28
/media/bayesian-posterior/sdc/sensecode_data/eda/right_hand_eda/SP28_eda_worn_right.h5 SP28
13 13
/media/bayesian-posterior/sdc/sensecode_data/eda/SP21_eda_worn_left.h5 SP21
/media/bayesian-posterior/sdc/sensecode_data/eda/right_hand_eda/SP21_eda_worn_right.h5 SP21
12 12
/media/bayesian-posterior/sdc/sensecode_data/eda/SP59_eda_worn_left.h5 SP59
/media/bayesian-posterior/sdc/sensecode_data/eda/right_hand_eda/SP59_eda_worn_right.h5 SP59
13 13
/media/bayesian-posterior/sdc/sensecode_data/eda/SP72_eda_worn_left.h5 SP72
/media/bayesian-posterior/sdc/sensecode_data/eda/right

In [10]:
left_mean_list, right_mean_list = [], []

for subject, left_eda in left_eda_mean_dict.items():
    left_mean_list.append(np.median(left_eda))
    right_mean_list.append(np.median(right_eda_mean_dict[subject]))
    
    
ttest_ind(left_mean_list, right_mean_list)

Ttest_indResult(statistic=-1.0596296491622894, pvalue=0.29306251467174205)

In [9]:
file1 = open('left_eda_mean_dict.pkl', 'wb')
pickle.dump(left_eda_mean_dict, file1)

file2 = open('right_eda_mean_dict.pkl', 'wb')
pickle.dump(right_eda_mean_dict, file2)