In [1]:
## Should have all the same signal processing functions from the signal package in R:
## https://cran.r-project.org/web/packages/signal/signal.pdf
from scipy.signal import butter, lfilter

## https://docs.scipy.org/doc/scipy/reference/signal.html

import pandas as pd
import numpy as np
import csv
import glob
import matplotlib.pyplot as plt
import datetime

In [92]:
output_path = r'/Users/dancohen/Dropbox/E4 stuff/test data/EDA_All_Data'

Define helper functions to convert from the unix timestamp to UTC and to add time to an input date based on a sample rate (will be 4hz for this code)

In [93]:
def get_utc_date_time(ts):
    return datetime.datetime.fromtimestamp(ts)
#.strftime('%H:%M:%S:%f')

def add_fs(sample_rate, date):
    return date + datetime.timedelta(milliseconds=1.0/(sample_rate) * 1000.0)

Found some code on stack overflow and in SciPy's community website that implements a band pass butterworth filter for a 1d array: https://scipy-cookbook.readthedocs.io/items/ButterworthBandpass.html#.  I dont know what order filter we need for this data, but for exploratory data analysis, it appears to work ok

In [94]:
def butter_bandpass(lowcut, highcut, fs, order=5):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')
    return b, a

## May need to better understand which order filter is needed for this dataset
def butter_bandpass_filter(data, lowcut, highcut, fs, column_name, order=5):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = lfilter(b, a, data)
    y = pd.DataFrame(y, columns=[column_name])
    return y

Helper function that will generate the list of timestamps that will be added as a new column to our dataframe

In [132]:
def time_range(df_length, start_timestamp):
    # Generate date time range based on sample rate and starting timestamp
    time_range = []
    t_0 = get_utc_date_time(float(start_timestamp))
    time_range.append(t_0)
    
    next_date = t_0
    
    for count in range(df_length-1):
        next_date = add_fs(sample_rate, next_date)
        time_range.append(next_date)
    return time_range

Helper function that returns a list of floats iterated upwards by sample rate

In [147]:
def iterate_sample_rate(df_length, fs):
    result = []
    curr_time = 0.0
    result.append(curr_time)
    
    for i in range(df_length-1):
        curr_time += 1.0/fs
        result.append(curr_time)
    return result

Change file_path to point to where the folders containing EDA data will be

In [148]:
file_path = r'/Users/dancohen/Dropbox/E4 stuff/test data'
output_path = r'/Users/dancohen/Dropbox/E4 stuff/test data/EDA_All_Data'
all_folders = glob.glob(file_path + "/PRF*")

In [149]:
fs = 4.0 ## Sampling frequency.  This test data was gathered at 4hz
lowcut = 0.05 ## Lower bound for the filter, as directed by Empatica documentation
highcut = 1.0  ## Upper bound for the filter, as directed by Empatica documentation

Now that we have a list of file paths that each will contain the files we are interested in, we can start to do work with the csv files they contain

In [156]:
eda_list = []

for folder in all_folders:
    spl = folder.split('/')
    part_id = spl[len(spl)-1][3:]
    
    eda_df_raw = pd.read_csv(folder+"/eda.csv")
    starting_timestamp = eda_df_raw.columns[0]
    sample_rate = eda_df_raw[starting_timestamp][0]
    
    # Drop the first 2 rows, as we already have the timestamp and sample rate
    eda_df_raw = eda_df_raw.drop(eda_df_raw.index[0:2])
    
    # Rename the column to the participant ID
    eda_df_raw = eda_df_raw.rename(columns={starting_timestamp: part_id})
    #time_col = time_range(len(eda_df_raw), starting_timestamp)
    time_col = iterate_sample_rate(len(eda_df_raw), sample_rate)
    
    eda_df_filtered = butter_bandpass_filter(eda_df_raw[part_id], lowcut, highcut, fs, part_id)
    eda_df_filtered['Timestamp'] = time_col
    
    #Set the index to be the timestamp, so that when we concatenate the data together we get an easy join
    eda_df_filtered = eda_df_filtered.set_index('Timestamp')
    #eda_df_filtered = eda_df_filtered.transpose()
    eda_list.append(eda_df_filtered.transpose())

In [157]:
eda_all_participants = pd.concat(eda_list)
eda_all_participants

Timestamp,0.0,0.25,0.5,0.75,1.0,1.25,1.5,1.75,2.0,2.25,...,1980.25,1980.5,1980.75,1981.0,1981.25,1981.5,1981.75,1982.0,1982.25,1982.5
12,0.035781,0.224242,0.613521,0.984608,1.104027,1.07577,1.103839,1.127801,0.94893,0.526488,...,,,,,,,,,,
205,0.011937,0.07325,0.188914,0.260583,0.197817,0.071141,0.007552,0.009351,-0.003641,-0.057317,...,,,,,,,,,,
20,0.002046,0.012965,0.034941,0.051446,0.043791,0.020799,0.00553,0.001418,-0.003526,-0.012286,...,,,,,,,,,,
2,0.001216,0.008277,0.023869,0.037354,0.033335,0.016045,0.003916,0.001096,-0.001983,-0.006556,...,-0.032933,-0.004779,0.016179,0.013743,0.011977,0.020638,0.026583,0.02522,0.024721,0.027532


In [152]:
#eda_all_participants = eda_all_participants.transpose()
eda_all_participants = eda_all_participants.transpose()

In [153]:
eda_all_participants.to_csv(output_path+"/EDA_All_Data.csv")