In [56]:
import pandas as pd
import numpy as np
import csv
import glob
import matplotlib.pyplot as plt
import datetime
from decimal import Decimal

In [43]:
output_path = r'/Users/dancohen/Dropbox/E4 stuff/test data/EDA_All_Data'

Helper function that returns a list of floats iterated upwards by sample rate


In [44]:
def iterate_sample_rate(df_length, fs):
    # returns list of floats starting from zero iterated upward by sample rate
    result = []
    curr_time = 0.0
    result.append(curr_time)
    
    for i in range(df_length-1):
        curr_time += 1.0/fs
        result.append(curr_time)
    return result

In [96]:
file_path = r'/Users/dancohen/Dropbox/E4 stuff/test data'
output_path = r'/Users/dancohen/Dropbox/E4 stuff/test data/IBI_All_Data'
all_folders = glob.glob(file_path + "/PRF*")

Helper function that finds the number of seconds differenc between (t_0, a unix timestamp) and a datetime

In [97]:
def t_0_diff_seconds(t_0, date_time):
    return (date_time - pd.to_datetime(t_0, unit='s', infer_datetime_format = True)).total_seconds()

Helper function that takes a dataframe with a column in seconds and normalizes the values in that column to start at 0.00 seconds

In [254]:
def normalize_df(start_df, col_label):
    #Make a copy here to prevent chaining assignment in the dataframe
    start_seconds = start_df.iloc[0][0]
    cp = start_df.copy()
    cp[col_label] = start_df.apply(lambda x: x[col_label]-start_seconds, axis=1)
    cp = cp.reset_index(drop=True)
    #cp = cp.set_index(col_label)
    return cp

Helper function to calculate summary stats for IBI

In [255]:
def ibi_sum_stats(data_col):
    nn_50 = np.sum(np.abs(np.diff(data_col)) > 0.05)*1
    rmssd = np.sqrt(np.mean(np.square(np.diff(data_col))))
    sdnn = np.std(data_col)
    
    return nn_50, rmssd, sdnn

In [263]:
ibi_list = []
ibi_1_2 = []
ibi_2_3 = []
ibi_3_end = []

for folder in all_folders:
    spl = folder.split('/')
    
    #Folder names are 'PRF###' (participant ID) and we are interested in the ID
    part_id = spl[len(spl)-1][3:]
    
    # Grab the csv containing the timestamps that mark the start and end times of the scripts
    script_times = pd.read_csv(folder+"/tags.csv", header=None)
    script_times = script_times.apply(lambda x: pd.to_datetime(x, unit='s'))

        
    ibi_df_raw = pd.read_csv(folder+"/IBI.csv")
    starting_timestamp = ibi_df_raw.columns[0]
    
    try:
        begin_s1 = t_0_diff_seconds(starting_timestamp, script_times.loc[1].iat[0])
        end_s1 = t_0_diff_seconds(starting_timestamp, script_times.loc[2].iat[0])
        
        begin_s2 = t_0_diff_seconds(starting_timestamp, script_times.loc[3].iat[0])    
        end_s2 = t_0_diff_seconds(starting_timestamp, script_times.loc[4].iat[0])

        begin_s3 = t_0_diff_seconds(starting_timestamp, script_times.loc[5].iat[0])
        end_s3 = t_0_diff_seconds(starting_timestamp, script_times.loc[6].iat[0])
    except Exception as err:
        print("Error occurred parsing tags.csv for PRF{}.  Error: {}. This file will be skipped.".format(part_id, err))
        continue
    
    start_seconds = ibi_df_raw.loc[0].iat[0]
    #Normalize T0 down to 0 for the rest of the column.  This will make plotting data against each other easier later
    normalized_time = ibi_df_raw[starting_timestamp].apply(lambda x: x-start_seconds)

    ibi_df_raw[starting_timestamp] = normalized_time
    
    ibi_df = ibi_df_raw.rename(columns={starting_timestamp: "Time_After_T0", " IBI":part_id})
    
    df_1_2 = ibi_df[(ibi_df['Time_After_T0'] >= end_s1) & (ibi_df['Time_After_T0'] < begin_s2)]
    df_2_3 = ibi_df[(ibi_df['Time_After_T0'] >= end_s2) & (ibi_df['Time_After_T0'] < begin_s3)]
    df_3_end = ibi_df[(ibi_df['Time_After_T0'] >= end_s3)]
    
    #Normalize each frame's timestamp down to 0.00 seconds once we know where the timing cutoffs for each frame are
    #See lines above
    norm_1_2 = df_1_2.iloc[0][1]
    cp_1_2 = normalize_df(df_1_2, 'Time_After_T0')
    #Generate summary stats for each participant for each S1_2, S2_3, S3_end
    nn_50, rmssd, sdnn = ibi_sum_stats(cp_1_2[part_id])
    cp_1_2 = cp_1_2.transpose()
    cp_1_2['NN50'] = nn_50
    cp_1_2['RMSSD'] = rmssd
    cp_1_2['SDNN'] = sdnn
    
    norm_2_3 = df_2_3.iloc[0][1]
    cp_2_3 = normalize_df(df_2_3, 'Time_After_T0')
    nn_50, rmssd, sdnn = ibi_sum_stats(cp_2_3[part_id])
    cp_2_3 = cp_2_3.transpose()
    cp_2_3['NN50'] = nn_50
    cp_2_3['RMSSD'] = rmssd
    cp_2_3['SDNN'] = sdnn
    
    norm_3_end = df_3_end .iloc[0][1]
    cp_3_end  = normalize_df(df_3_end, 'Time_After_T0')
    nn_50, rmssd, sdnn = ibi_sum_stats(cp_3_end[part_id])
    cp_3_end = cp_3_end.transpose()
    cp_3_end['NN50'] = nn_50
    cp_3_end['RMSSD'] = rmssd
    cp_3_end['SDNN'] = sdnn
    
    ibi_1_2.append(cp_1_2)
    ibi_2_3.append(cp_2_3)
    ibi_3_end.append(cp_3_end)
    ibi_list.append(ibi_df.transpose())
    
    #plt.figure(figsize=(30, 7))
    #plt.plot(ibi_df['Time_After_T0'], ibi_df[part_id] )
    #plt.title("PRF{}".format(part_id))
    #plt.ylabel('IBI')
    #plt.xlabel('Time After T0')
    #plt.savefig(output_path+"/IBI{}.pdf".format(part_id))
    #plt.show()

Error occurred parsing tags.csv for PRF020.  Error: 'the label [6] is not in the [index]'. This file will be skipped.


In [266]:
df_1_2 = pd.concat(ibi_1_2)
df_2_3 = pd.concat(ibi_2_3)
df_3_end = pd.concat(ibi_3_end)

In [267]:
df_2_3

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,180,181,182,183,184,185,186,NN50,RMSSD,SDNN
Time_After_T0,0.0,0.640654,1.328186,9.500435,10.109838,10.719241,11.344269,11.984924,12.594327,13.20373,...,171.039079,171.742236,172.445394,173.164177,173.929837,174.726748,175.476782,21,0.036177,0.047658
012,0.640654,0.640654,0.687531,0.609403,0.609403,0.609403,0.625029,0.640654,0.609403,0.609403,...,0.734409,0.703157,0.703157,0.718783,0.76566,0.796911,0.750034,21,0.036177,0.047658
Time_After_T0,0.0,5.922146,7.015946,8.000366,8.859781,9.672318,10.672364,21.797873,22.594784,23.422947,...,,,,,,,,54,0.117402,0.103147
205,0.703157,1.0938,1.0938,0.98442,0.859414,0.812537,1.000046,0.87504,0.796911,0.828163,...,,,,,,,,54,0.117402,0.103147
Time_After_T0,0.0,0.703157,1.406315,2.140723,2.859506,3.609541,4.359575,5.109609,5.859644,6.640929,...,,,,,,,,6,0.054741,0.045245
002,0.734409,0.703157,0.703157,0.734409,0.718783,0.750034,0.750034,0.750034,0.750034,0.781286,...,,,,,,,,6,0.054741,0.045245


In [270]:
df_3_end

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,53,54,55,56,57,58,59,NN50,RMSSD,SDNN
Time_After_T0,0.0,0.65628,1.31256,4.515831,5.187737,5.844017,6.500297,7.187829,7.890986,8.547266,...,37.986113,38.736148,39.501808,40.220591,40.954999,41.689408,42.439442,4,0.034583,0.044505
012,0.609403,0.65628,0.65628,0.687531,0.671906,0.65628,0.65628,0.687531,0.703157,0.65628,...,0.734409,0.750034,0.76566,0.718783,0.734409,0.734409,0.750034,4,0.034583,0.044505
Time_After_T0,0.0,0.640654,14.750675,15.578838,20.610318,21.391604,22.17289,23.15731,23.844842,29.548228,...,,,,,,,,7,0.18131,0.1194
205,0.671906,0.640654,0.921917,0.828163,0.87504,0.781286,0.781286,0.98442,0.687531,1.031297,...,,,,,,,,7,0.18131,0.1194
Time_After_T0,0.0,0.703157,1.437566,2.171974,2.890757,3.60954,4.3752,5.156486,5.90652,6.594052,...,,,,,,,,12,0.085411,0.065446
002,0.76566,0.703157,0.734409,0.734409,0.718783,0.718783,0.76566,0.781286,0.750034,0.687531,...,,,,,,,,12,0.085411,0.065446


In [271]:
df_1_2.to_csv(output_path+"/IBI_1_2.csv", float_format='%.6f')
df_2_3.to_csv(output_path+"/IBI_2_3.csv", float_format='%.6f')
df_3_end.to_csv(output_path+"/IBI_3_end.csv", float_format='%.6f')

In [272]:
df = pd.concat(ibi_list)
df = df.transpose()

In [273]:
df.to_csv(output_path+"/IBI_All_Participants.csv", float_format='%.6f')