In [7]:
import os
import pickle
import numpy as np
import pandas as pd
import scipy.signal as scisig
import scipy.stats
from collections import Counter

## Feature engineering

In [9]:
# Sampling rates
fs_dict = {'ACC': 32, 'BVP': 32, 'label': 700}
WINDOW_IN_SECONDS = 30

# Save path
save_path = "../data/processed/apple_watch_format"

### Loading subject data

In [10]:
class SubjectData:
    def __init__(self, main_path, subject_number):
        self.name = f'S{subject_number}'
        with open(os.path.join(main_path, self.name, self.name + '.pkl'), 'rb') as file:
            self.data = pickle.load(file, encoding = 'latin1')
        self.labels = self.data['label']
        self.bvp = self.data['signal']['wrist']['BVP']
        self.acc = self.data['signal']['wrist']['ACC']

### Calculate HR, IBI, HRV from BVP

In [11]:
def bvp_to_hrv(bvp_signal, fs):
    peaks, _ = scisig.find_peaks(bvp_signal, distance=fs*0.4)
    ibi = np.diff(peaks) / fs*1000 # IBI in ms ^_^
    hr = (60 * 1000) / ibi
    rr_diff = np.diff(ibi)
    rmssd = np.sqrt(np.mean(rr_diff ** 2)) if len(rr_diff) > 0 else np.nan
    timestamps = peaks[1:] / fs

    return pd.DataFrame({
        'timestamps': pd.to_datetime(timestamps, unit='s'),
        'HR': hr,
        'IBI': ibi,
        'HRV': [rmssd] * len(hr)
    })

### Add ACC into dataframe

In [14]:
def apple_watch_format(subject_id):
    subject = SubjectData(main_path = "../data/raw/WESAD", subject_number = subject_id)
    bvp = subject.bvp.flatten()
    acc = subject.acc
    labels = subject.labels

    window_len = fs_dict['label'] * WINDOW_IN_SECONDS
    total_len = len(labels)
    n_windows = total_len // window_len

    all_window = []

    for i in range(n_windows):
        start = i * window_len
        end = (i+1) * window_len

        # majority vote label
        label_window = labels[start:end]
        label_window = [l for l in label_window if l in [1, 2, 3]]
        if len(label_window) == 0:
            continue
        label = Counter(label_window).most_common(1)[0][0]
        focus = 1 if label == 2 else 0

        # ACC window
        acc_window = acc[start * fs_dict['ACC'] // fs_dict['label']: end * fs_dict['ACC'] // fs_dict['label'], :]
        acc_x = acc_window[:, 0]
        acc_y = acc_window[:, 1]
        acc_z = acc_window[:, 2]
        net_acc = np.sqrt(acc_x**2 + acc_y**2 + acc_z**2)

        # ACC stat
        acc_features = {
            'ACC_x_mean': np.mean(acc_x),
            'ACC_y_mean': np.mean(acc_y),
            'ACC_z_mean': np.mean(acc_z),
            'net_acc_mean': np.mean(net_acc),
            'net_acc_std': np.std(net_acc)
        }

        # BVP window
        bvp_window = bvp[start * fs_dict['BVP'] // fs_dict['label']: end * fs_dict['BVP'] // fs_dict['label']]
        hrv_df = bvp_to_hrv(bvp_window, fs_dict['BVP'])
        if hrv_df.empty:
            continue
        hrv_mean = hrv_df[['HR', 'IBI', 'HRV']].mean()

        data = {
            **acc_features,
            'HR': hrv_mean['HR'],
            'IBI': hrv_mean['IBI'],
            'HRV': hrv_mean['HRV'],
            'label': focus,
            'subject': subject_id
        }

        all_window.append(data)

    df = pd.DataFrame(all_window)
    df.to_csv(f'{save_path}/S{subject_id}_apple_watch.csv', index=False)
    
    print(f'Subject {subject_id} processed with {len(df)} samples.')


In [15]:
for subject_id in [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17]:
    apple_watch_format(subject_id)

Subject 2 processed with 74 samples.
Subject 3 processed with 76 samples.
Subject 4 processed with 75 samples.
Subject 5 processed with 76 samples.
Subject 6 processed with 76 samples.
Subject 7 processed with 76 samples.
Subject 8 processed with 77 samples.
Subject 9 processed with 75 samples.
Subject 10 processed with 78 samples.
Subject 11 processed with 79 samples.
Subject 13 processed with 78 samples.
Subject 14 processed with 78 samples.
Subject 15 processed with 78 samples.
Subject 16 processed with 76 samples.
Subject 17 processed with 78 samples.
