In [1]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from scipy.signal import cheby2, sosfilt
from datetime import datetime, timedelta
from IPython.display import display

In [4]:
os.chdir('C:\\Users\\katgm\\Rutgers University\\Michelle Chen - Rutgers_Neuropsych_Lab\\COVID_Fatigue\\RC_award\\Data\\Empatica')
os.getcwd()

def concatenate_data(SubjID):
    dirs = os.listdir('.\\' + SubjID)
    
    #remove baseline or zipped file if they exist
    if (SubjID + '_baseline') in dirs:
        dirs.remove(SubjID + '_baseline')
    if (SubjID + '_zipped') in dirs:
        dirs.remove(SubjID + '_zipped')
        
    df = pd.DataFrame()  
    for folder in dirs:
        wd = os.getcwd() + '\\' + SubjID + '\\' + folder + '\\'
        eda = pd.read_csv(wd + 'EDA.csv', header=None).to_numpy().flatten()
        bvp = pd.read_csv(wd + 'BVP.csv', header=None).to_numpy().flatten()
        acc = pd.read_csv(wd + 'ACC.csv', header=None).to_numpy()
        temp = pd.read_csv(wd + 'TEMP.csv', header=None).to_numpy().flatten()
        
        init_time = datetime.fromtimestamp(eda[0]) #they all have the same initial time
        
        #removing timestamp and sampling frequency rows
        eda = eda[2:]
        bvp = bvp[2:]
        acc = acc[2:]
        temp = temp[2:]
        
        eda_interval = timedelta(seconds=1/4)
        bvp_interval = timedelta(seconds=1/64)
        acc_interval = timedelta(seconds=1/32)
        temp_interval = timedelta(seconds=1/4)
        
        eda_timestamps = [init_time + i * eda_interval for i in range(len(eda))]
        bvp_timestamps = [init_time + i * bvp_interval for i in range(len(bvp))]
        acc_timestamps = [init_time + i * acc_interval for i in range(len(acc))]
        temp_timestamps = [init_time + i * temp_interval for i in range(len(temp))]
        
        # Create a new DataFrame with the timestamps and the original data columns
        eda_df = pd.DataFrame(data = eda, columns=['EDA (4 Hz)'])
        eda_df['timestamp'] = eda_timestamps
        
        bvp_df = pd.DataFrame(data = bvp, columns=['BVP (64 Hz)'])
        bvp_df['timestamp'] = bvp_timestamps
        
        acc_df = pd.DataFrame(data = acc, columns=['ACC X (32 Hz)', 'ACC Y (32 Hz)', 'ACC Z (32 Hz)'])
        acc_df['timestamp'] = acc_timestamps
        
        temp_df = pd.DataFrame(data = temp, columns=['TEMP (4 Hz)'])
        temp_df['timestamp'] = temp_timestamps
        
        merged_df = pd.merge(bvp_df, eda_df, on='timestamp', how='left').merge(acc_df, on='timestamp', how='left').merge(temp_df, on='timestamp', how='left')
        
        df = pd.concat([df, merged_df])
    
    new_cols = ['timestamp','BVP (64 Hz)', 'EDA (4 Hz)','TEMP (4 Hz)','ACC X (32 Hz)','ACC Y (32 Hz)','ACC Z (32 Hz)',]
    df = df[new_cols]
    df = df.ffill(axis=0)
    
    return df
    


In [None]:
cov4 = concatenate_data("Cov4")

In [9]:
display(cov4)

Unnamed: 0,timestamp,BVP (64 Hz),EDA (4 Hz),TEMP (4 Hz),ACC X (32 Hz),ACC Y (32 Hz),ACC Z (32 Hz)
0,2023-02-28 10:03:33.000000,-0.00,0.000000,19.37,60.0,22.0,-2.0
1,2023-02-28 10:03:33.015625,-0.00,0.000000,19.37,60.0,22.0,-2.0
2,2023-02-28 10:03:33.031250,-0.00,0.000000,19.37,59.0,20.0,-5.0
3,2023-02-28 10:03:33.046875,-0.00,0.000000,19.37,59.0,20.0,-5.0
4,2023-02-28 10:03:33.062500,-0.00,0.000000,19.37,61.0,18.0,-13.0
...,...,...,...,...,...,...,...
3617719,2023-03-08 11:58:44.859375,0.49,0.032035,25.29,-60.0,-2.0,9.0
3617720,2023-03-08 11:58:44.875000,1.86,0.032035,25.29,-60.0,-2.0,9.0
3617721,2023-03-08 11:58:44.890625,3.27,0.032035,25.29,-60.0,-2.0,9.0
3617722,2023-03-08 11:58:44.906250,4.50,0.032035,25.29,-60.0,-2.0,9.0


In [8]:
nwd = 'C:\\Users\\Owner\\Rutgers University\\Michelle Chen - Rutgers_Neuropsych_Lab\\COVID_Fatigue\\RC_award\\Data\\Concatenated_Data\\'

cov4.to_csv(nwd + 'Cov4_remote.csv', index=False)
cov8.to_csv(nwd + 'Cov8_remote.csv', index=False)
cov14.to_csv(nwd + 'Cov14_remote.csv', index=False)