In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import os

In [99]:
input_path = "../../data/input/e4/"
output_path = "../../data/output/"
working_path = "../../data/working/"

In [2]:
#adding timestamps per sample for all modalities
step_eda = 0.25
step_temp = 0.25
step_bvp = 0.015625
step_acc = 0.03125
step_hr = 1.0

def eda_timestamps(eda):
    ini_time_eda = eda[0].iat[0]

    eda["timestamp"] = np.arange(ini_time_eda, ini_time_eda+len(eda)*step_eda, step_eda)
    eda["timestamp"] = eda["timestamp"].shift(2)
    eda = eda.iloc[2:]
    eda = eda.reset_index(drop=True)
    eda.columns = ['data', 'timestamp']
    return eda

def temp_timestamps(temp):
    ini_time_temp = temp[0].iat[0]

    temp["timestamp"] = np.arange(ini_time_temp, ini_time_temp+len(temp)*step_temp, step_temp)
    temp["timestamp"] = temp["timestamp"].shift(2)
    temp = temp.iloc[2:]
    temp = temp.reset_index(drop=True)
    temp.columns = ["data", "timestamp"]
    return temp
    
def bvp_timestamps(bvp):
    ini_time_bvp = bvp[0].iat[0]

    bvp["timestamp"] = np.arange(ini_time_bvp, ini_time_bvp+len(bvp)*step_bvp, step_bvp)
    bvp["timestamp"] = bvp["timestamp"].shift(2)
    bvp = bvp.iloc[2:]
    bvp = bvp.reset_index(drop=True)
    bvp.columns = ["data", "timestamp"]
    return bvp
    
def hr_timestamps(hr):
    ini_time_hr = hr[0].iat[0]

    hr["timestamp"] = np.arange(ini_time_hr, ini_time_hr+len(hr)*step_hr, step_hr)
    hr["timestamp"] = hr["timestamp"].shift(2)
    hr = hr.iloc[2:]
    hr = hr.reset_index(drop=True)
    hr.columns = ["data", "timestamp"]
    return hr

def acc_timestamps(acc):
    ini_time_acc = acc[0].iat[0]

    acc["timestamp"] = np.arange(ini_time_acc, ini_time_acc+len(acc)*step_acc, step_acc)
    acc["timestamp"] = acc["timestamp"].shift(2)
    acc = acc.iloc[2:]
    acc = acc.reset_index(drop=True)
    acc.columns = ["acc_x","acc_y","acc_z", "timestamp"]
    return acc

In [24]:
for i, g in enumerate(sorted(glob.glob(input_path + '/*/*/'))):
    
    acc = pd.read_csv(g + "ACC.csv", header=None)
    bvp = pd.read_csv(g + "BVP.csv", header=None)
    eda = pd.read_csv(g + "EDA.csv", header=None)
    hr = pd.read_csv(g + "HR.csv", header=None)
    temp = pd.read_csv(g + "TEMP.csv", header=None)

    # print (str(g[20:23]))
    # print (i)
    
    eda_ = eda_timestamps(eda)
    temp_ = temp_timestamps(temp)
    bvp_ = bvp_timestamps(bvp)
    acc_ = acc_timestamps(acc)
    hr_ = hr_timestamps(hr)
    
    eda_.to_csv(output_path + str(g[20:23]) + '_EDA.csv', mode='a')
    temp_.to_csv(output_path + str(g[20:23]) + '_TEMP.csv', mode='a')
    bvp_.to_csv(output_path + str(g[20:23]) +  '_BVP.csv', mode='a')
    acc_.to_csv(output_path + str(g[20:23]) +  '_ACC.csv', mode='a')
    hr_.to_csv(output_path +  str(g[20:23]) +  '_HR.csv', mode='a')

In [44]:
# for g in sorted(glob.glob(output_path + '*')):

#     df = pd.read_csv(g, index_col=0)
#     df1 = df[df.iloc[:, 0] != df.columns[0]]
#     df1.to_csv(output_path + str(g[32:44]))

In [47]:
indices = {2,3}

eda_list = [f for f in sorted(os.listdir(output_path)) if f.endswith('EDA.csv')]
bvp_list = [f for f in sorted(os.listdir(output_path)) if f.endswith('BVP.csv')]
temp_list = [f for f in sorted(os.listdir(output_path)) if f.endswith('TEMP.csv')]
acc_list = [f for f in sorted(os.listdir(output_path)) if f.endswith('ACC.csv')]
hr_list = [f for f in sorted(os.listdir(output_path)) if f.endswith('HR.csv')]

eda_list = [v for i, v in enumerate(eda_list) if i not in indices]
bvp_list = [v for i, v in enumerate(bvp_list) if i not in indices]
temp_list = [v for i, v in enumerate(temp_list) if i not in indices]
acc_list = [v for i, v in enumerate(acc_list) if i not in indices]
hr_list = [v for i, v in enumerate(hr_list) if i not in indices]

In [92]:
#specify eye tracking data path and output_path for mapped timestamps from E4

participants = ["p01","p02","p05","p06","p07","p08","p09","p10","p11","p12","p13","p14","p15"]
save_path = "../../data/output/E4_mapped/"

#function to map E4 timestamps with eye tracking timestamps
def E4_split(participant, pnum):
    
    eda = pd.read_csv(output_path + eda_list[pnum], index_col=0)
    bvp = pd.read_csv(output_path + bvp_list[pnum], index_col=0)
    temp = pd.read_csv(output_path + temp_list[pnum], index_col=0)
    acc = pd.read_csv(output_path + acc_list[pnum], index_col=0)
    hr = pd.read_csv(output_path + hr_list[pnum], index_col=0)

    for i,g in enumerate(sorted(glob.glob(working_path + "/%s/*" %participant ))):
            
        timestamp = pd.read_csv(g)["#timestamp"].values
        
        eda_ = eda[((eda["timestamp"] - 3600) >= timestamp[0]*(10**-3)) & ((eda["timestamp"] - 3600) <= timestamp[-1]*(10**-3))]
        bvp_ = bvp[((bvp["timestamp"] - 3600) >= timestamp[0]*(10**-3)) & ((bvp["timestamp"] - 3600) <= timestamp[-1]*(10**-3))]
        temp_ = temp[((temp["timestamp"] - 3600) >= timestamp[0]*(10**-3)) & ((temp["timestamp"] - 3600) <= timestamp[-1]*(10**-3))]
        acc_ = acc[((acc["timestamp"] - 3600) >= timestamp[0]*(10**-3)) & ((acc["timestamp"] - 3600) <= timestamp[-1]*(10**-3))]
        hr_ = hr[((hr["timestamp"] - 3600) >= timestamp[0]*(10**-3)) & ((hr["timestamp"] - 3600) <= timestamp[-1]*(10**-3))]

        eda_.to_csv(save_path + '%s' %participant + '_d' + str(i+1).zfill(2) + '_EDA.csv', index=True)
        bvp_.to_csv(save_path + '%s' %participant + '_d' + str(i+1).zfill(2) + '_BVP.csv', index=True)
        temp_.to_csv(save_path + '%s' %participant + '_d' + str(i+1).zfill(2) + '_TEMP.csv', index=True)
        acc_.to_csv(save_path + '%s' %participant + '_d' + str(i+1).zfill(2) + '_ACC.csv', index=True)
        hr_.to_csv(save_path + '%s' %participant + '_d' + str(i+1).zfill(2) + '_HR.csv', index=True)

In [93]:
p_num = 0

for p in participants:
    E4_split(p,p_num)
    p_num = p_num + 1 

In [107]:
labels = pd.read_csv(working_path + 'responses.csv', index_col=0)

#adding labels per partcipant and document
for g in sorted(glob.glob(output_path + "E4_mapped/*")):
    
    e4_data = pd.read_csv(g, index_col=0)
    e4_data["participant"] = g[28:31]
    e4_data["document"] = g[32:35]
    
    label_data = pd.merge(e4_data, labels, how="left", left_on=['participant', 'document'], right_on=['participant', 'document'])
    label_data.to_csv(output_path + 'E4_mapped/' + str(g[28:43])) 