In [4]:
import os
import numpy as np
import pandas as pd
from datetime import datetime

In [174]:
def convert_to_seconds(time):
    if '.' in time:
        time = datetime.strptime(time, "%H%M%S.%f")
        total_seconds = time.hour * 3600 + time.minute * 60 + time.second + time.microsecond / 1e6
    else:
        time = datetime.strptime(time, "%H%M%S")
        total_seconds = time.hour * 3600 + time.minute * 60 + time.second
    return total_seconds



def segment_ecg_data(ecg_data, start_time, end_time, num_segments, target_len=3000):
    segmented_data = []

    start_idx = ecg_data[ecg_data.time >= start_time].index[0]
    end_idx = ecg_data[ecg_data.time <= end_time + 1].index[-1]
    segment_data = ecg_data.iloc[start_idx:end_idx]
    for i in range(len(segment_data) // target_len):
        segment = segment_data.iloc[i * target_len: (i + 1) * target_len]
        segmented_data.append(segment)

    return segmented_data

def pair_segments(rest_segments, offset_rest_segments, test_segments, subject):
    paired_segments = []

    num = 0
    for i in range(0, 2 * (len(rest_segments)//2), 2):
        seg1 = rest_segments[i]
        seg2 = rest_segments[i+1]
        paired_segments.append((subject + "-" + str(num), seg1, seg2, 0))
        num += 1
    for j in range(min(len(test_segments), len(offset_rest_segments), (len(rest_segments)//2))):
        seg1 = offset_rest_segments[j]
        seg2 = test_segments[j]
        paired_segments.append((subject + "-" + str(num), seg1, seg2, 1))
        num += 1
    return paired_segments
        

def read_data(folder, subject):
    ecg_file_path = os.path.join(folder, subject, "BitalinoECG.txt")
    triggers_file_path = os.path.join(folder, subject, "Triggers.txt")

    ecg_data = pd.read_csv(ecg_file_path, sep='\t', names=['ecg', 'time', 'nothing'], engine='python')
    ecg_data = ecg_data.drop(columns=['nothing'])

    ecg_data.time = ecg_data.time.apply(lambda x: convert_to_seconds(str(x)))
    normalize_time = ecg_data.iloc[0,1]
    ecg_data.time = ecg_data.time-normalize_time
    # print(ecg_data)

    trigger_data = pd.read_csv(triggers_file_path, sep='\t', names=['name', 'start', 'end'], engine='python')
    trigger_data.start = trigger_data.start.apply(lambda x: convert_to_seconds(str(x))) - normalize_time
    trigger_data.end = trigger_data.end.apply(lambda x: convert_to_seconds(str(x))) - normalize_time

    rest_trigger = trigger_data[trigger_data.name == 'BIOFEEDBACK-REST']
    test_trigger = trigger_data[trigger_data.name == trigger_data.name.iloc[1]]

    if 'BIOFEEDBACK-REST' not in trigger_data.name.values:
        print(f"Skipping subject {subject}: 'BIOFEEDBACK-REST' not found.")
        return []
    
    rest_segments = segment_ecg_data(ecg_data, rest_trigger.start.values[0], rest_trigger.end.values[0], 10)
    offset_rest_segments = segment_ecg_data(ecg_data, rest_trigger.start.values[0] + 15, rest_trigger.start.values[0] + 200, 5)
    test_segments = segment_ecg_data(ecg_data, test_trigger.start.values[0], test_trigger.start.values[0] + 200, 5)
    
    subject_paired_segments = pair_segments(rest_segments, offset_rest_segments, test_segments, subject)
    
    return subject_paired_segments

def save_ecg_data_to_file(all_data, ecg_file_path):
    flattened_data = []
    for subject, seg1, seg2, label in all_data:
        print(subject)
        ecg1 = seg1.ecg.values
        time1 = seg1.time.values
        ecg2 = seg2.ecg.values
        time2 = seg2.time.values
        
        for i in range(len(ecg1)):
            flattened_data.append({
                'subject': subject,
                'ecg1': ecg1[i],
                'time1': time1[i],
                'ecg2': ecg2[i],
                'time2': time2[i],
                'label': label
            })
        df = pd.DataFrame(flattened_data)
        df.to_csv(r'./even_segmented_data.csv', index=False)
    

folder = "./electrocardiogram-skin-conductance-and-respiration-from-spider-fearful-individuals-watching-spider-video-clips-1.0.0"

subjects = [f for f in os.listdir(folder) if f.startswith("VP")]
# print(subjects)

all_data = []

for sub in subjects:
    print(sub)
    subject_data = read_data(folder, sub)
    all_data = all_data + subject_data

save_ecg_data_to_file(all_data, "even_segmented_data.csv")


VP05
VP02
VP33
VP69
VP56
VP51
VP32
VP35
VP03
VP50
VP68
VP57
VP61
VP59
VP66
VP44
VP75
VP72
VP26
Skipping subject VP26: 'BIOFEEDBACK-REST' not found.
VP17
VP73
VP74
VP80
VP42
VP45
VP11
VP29
VP20
VP18
VP27
VP63
VP64
VP06
VP39
VP30
VP08
VP54
VP53
VP65
VP62
VP36
VP09
VP38
VP14
VP47
VP78
VP71
VP76
VP15
VP12
VP24
VP23
VP77
VP48
VP70
VP79
VP41
VP05-0
VP05-1
VP05-2
VP05-3
VP05-4
VP05-5
VP05-6
VP05-7
VP05-8
VP05-9
VP02-0
VP02-1
VP02-2
VP02-3
VP02-4
VP02-5
VP02-6
VP02-7
VP02-8
VP02-9
VP33-0
VP33-1
VP33-2
VP33-3
VP33-4
VP33-5
VP33-6
VP33-7
VP33-8
VP33-9
VP69-0
VP69-1
VP69-2
VP69-3
VP69-4
VP69-5
VP69-6
VP69-7
VP69-8
VP69-9
VP56-0
VP56-1
VP56-2
VP56-3
VP56-4
VP56-5
VP56-6
VP56-7
VP56-8
VP56-9
VP51-0
VP51-1
VP51-2
VP51-3
VP51-4
VP51-5
VP51-6
VP51-7
VP51-8
VP51-9
VP32-0
VP32-1
VP32-2
VP32-3
VP32-4
VP32-5
VP32-6
VP32-7
VP32-8
VP32-9
VP35-0
VP35-1
VP35-2
VP35-3
VP35-4
VP35-5
VP35-6
VP35-7
VP35-8
VP35-9
VP03-0
VP03-1
VP03-2
VP03-3
VP03-4
VP03-5
VP50-0
VP50-1
VP50-2
VP50-3
VP50-4
VP50-5
VP50-6
VP50-7
VP50