In [1]:
import os
import pickle
import numpy as np
import pandas as pd
import scipy.signal as scisig
from scipy.signal import welch
from collections import Counter

In [2]:
class SubjectData:
    def __init__(self, main_path, subject_number):
        self.name = f'S{subject_number}'
        with open(os.path.join(main_path, self.name, self.name + '.pkl'), 'rb') as file:
            self.data = pickle.load(file, encoding='latin1')
        self.bvp = self.data['signal']['wrist']['BVP']
        self.acc = self.data['signal']['wrist']['ACC']
        self.labels = self.data['label']

In [3]:
def bvp_to_hrv(bvp_signal, fs):
    # Detect peaks
    peaks, _ = scisig.find_peaks(bvp_signal, distance=int(fs * 0.4))

    if len(peaks) < 3:
        return pd.DataFrame()
    
    # คำนวณ IBI
    ibi = np.diff(peaks) / fs * 1000 # ms

    # คำนวณ HR
    hr = (60 * 1000) / ibi # bpm

    # SDNN
    sdnn = np.std(ibi) if len(ibi) > 1 else np.nan


    # Alighn HR/IBI timestamsp (start at 2nd Beats)
    timestamps = peaks[1:] / fs

    return pd.DataFrame({
        'timestamps': pd.to_datetime(timestamps, unit='s'),
        'HR': hr,
        'SDNN': sdnn,
    })

In [None]:
def feature_extract(subject_id, WINDOW_IN_SECONDS):
    subject = SubjectData(main_path="../../data/raw/WESAD", subject_number=subject_id)

    # Signals
    bvp = subject.bvp.flatten()
    acc = subject.acc
    labels = subject.labels

    # Windows
    window_len = fs_dict['label'] * WINDOW_IN_SECONDS
    total_len = len(labels)
    n_windows = total_len // window_len

    all_window = []

    for i in range(n_windows):
        start = i * window_len
        end = (i + 1) * window_len

        # timestamp
        start_sec = start / fs_dict['label']
        timestamp = pd.Timestamp("2025-04-30") + pd.to_timedelta(start_sec, unit='s')

        # majority vote label
        label_window = labels[start:end]
        label_window = [l for l in label_window if l in [1, 2, 3, 4]]
        if len(label_window) == 0:
            label = -1
        else:
            label = Counter(label_window).most_common(1)[0][0]

        # --- ACC ---
        acc_window = acc[start * fs_dict['ACC'] // fs_dict['label']: end * fs_dict['ACC'] // fs_dict['label'], :]
        if acc_window.shape[0] == 0: continue
        acc_x, acc_y, acc_z = acc_window[:, 0], acc_window[:, 1], acc_window[:, 2]
        net_acc = np.sqrt(acc_x ** 2 + acc_y ** 2 + acc_z ** 2)
        acc_features = {
            'ACC_x_mean': np.mean(acc_x),
            'ACC_y_mean': np.mean(acc_y),
            'ACC_z_mean': np.mean(acc_z),
            'net_acc_mean': np.mean(net_acc),
            'net_acc_std': np.std(net_acc)
        }

        # --- BVP / HRV ---
        bvp_window = bvp[start * fs_dict['BVP'] // fs_dict['label']: end * fs_dict['BVP'] // fs_dict['label']]
        hrv_df = bvp_to_hrv(bvp_window, fs_dict['BVP'])
        if hrv_df.empty: continue
        hrv_mean = hrv_df[['HR', 'SDNN']].mean()

        data = {
            'timestamp': timestamp,
            **acc_features,
            'HR': hrv_mean['HR'],
            'SDNN': hrv_mean['SDNN'],
            'label': label,
            'subject': subject_id
        }
        all_window.append(data)

    df = pd.DataFrame(all_window)
    df.to_csv(f'{save_path}/S{subject_id}.csv', index=False)
    print(f'Subject {subject_id} processed with {len(df)} samples.')

In [5]:
# Sampling rates
fs_dict = {'ACC': 50, 'BVP': 64, 'label': 700}
subject_id = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17]
WINDOW_IN_SECONDS = [1, 10, 15, 30]

# Save path
main_path = "../../data/processed/apple_watch_format/"

In [6]:
for wind_size in WINDOW_IN_SECONDS:
    for id in subject_id:
        save_path = main_path + f'{wind_size}s/'
        feature_extract(id, WINDOW_IN_SECONDS=wind_size)

Subject 2 processed with 61 samples.
Subject 3 processed with 64 samples.
Subject 4 processed with 30 samples.
Subject 5 processed with 81 samples.
Subject 6 processed with 25 samples.


KeyboardInterrupt: 