## Main script to perform heart rate estimation of wearable PPG

This script uses both PPG and accelerometer and performs the following steps:
1. Loading all metadata of PPG and IMU
2. Query on data availability + synchronization
3. Loading relevant segment sensor data using tsdf wrapper (start for loop over synchronized segment indices)
4. Synchronize the data (correct indices etc)
5. Data preprocessing
6. Feature extraction
7. Classification


## Architecture overview
The script implements the following steps:
 - Step 1: IMU and PPG preprocessing
 - Step 2: IMU and PPG feature extraction
 - Step 3: Signal quality assessment

In [1]:
# Automatically reload modules
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
from datetime import datetime
from pathlib import Path
import json
import os

import tsdf
import dbpd

# Import your tsdf handling library, assumed to be written by you
# from tsdf_handler import tsdf_scan_meta, load_tsdf_metadata_from_path, tsdf_values_idx, load_ndarray_from_binary, save_tsdf_data


In [2]:
# Module methods

def tsdf_scan_meta(tsdf_data_full_path):
    """
    For each given TSDF directory, transcribe TSDF metadata contents to a list of dictionaries.
    This function is specific for a toolbox data structure mimicking the given MATLAB code.
    
    Parameters:
    - tsdf_data_full_path: Full path to the directory containing TSDF metadata files.
    
    Returns:
    - tsdf: List of dictionaries with metadata from each JSON file in the directory.
    """
    tsdf = []
    
    # Collect all metadata JSON files in the specified directory
    meta_list = list(Path(tsdf_data_full_path).rglob('*_meta.json'))
    for meta_file in meta_list:
        with open(meta_file, 'r') as file:
            json_obj = json.load(file)
            meta_data = {
                'tsdf_meta_fullpath': str(meta_file),
                'subject_id': json_obj['subject_id'],
                'start_iso8601': json_obj['start_iso8601'],
                'end_iso8601': json_obj['end_iso8601']
            }
            tsdf.append(meta_data)
    
    return tsdf

In [3]:
# Constants
UNIX_TICKS_MS = 1000.0
FS_PPG = 30  # Sampling rate for PPG
FS_IMU = 100  # Sampling rate for IMU

# Paths
raw_data_root = '../../../tests/data/1.sensor_data/'
ppp_data_path_ppg = os.path.join(raw_data_root, 'PPG')
ppp_data_path_imu = os.path.join(raw_data_root, 'IMU')

In [None]:

# 1. Loading all metadata of PPG and IMU
meta_ppg = tsdf_scan_meta(ppp_data_path_ppg)
meta_imu = tsdf_scan_meta(ppp_data_path_imu)

# 2. Query on data availability + synchronization
segment_ppg, segment_imu = synchronization(meta_ppg, meta_imu)  # Define `synchronization`

# 3. Loading relevant segment sensor data
n = 1  # Assuming we're only looking at the first synchronized segment
meta_path_ppg = meta_ppg[segment_ppg[n]].tsdf_meta_fullpath
meta_path_imu = meta_imu[segment_imu[n]].tsdf_meta_fullpath

metadata_list_ppg, data_list_ppg = load_tsdf_metadata_from_path(meta_path_ppg)
metadata_list_imu, data_list_imu = load_tsdf_metadata_from_path(meta_path_imu)

# Extract indices for time and samples
time_idx_ppg = tsdf_values_idx(metadata_list_ppg, 'time')
time_idx_imu = tsdf_values_idx(metadata_list_imu, 'time')
values_idx_ppg = tsdf_values_idx(metadata_list_ppg, 'samples')
values_idx_imu = tsdf_values_idx(metadata_list_imu, 'samples')

# Process time data
datetime_ppg = datetime.strptime(metadata_list_ppg[time_idx_ppg]['start_iso8601'], '%d-%b-%Y %H:%M:%S %Z')
datetime_imu = datetime.strptime(metadata_list_imu[time_idx_imu]['start_iso8601'], '%d-%b-%Y %H:%M:%S %Z')

ts_ppg = int(datetime_ppg.timestamp() * UNIX_TICKS_MS)
ts_imu = int(datetime_imu.timestamp() * UNIX_TICKS_MS)

# Calculating continuous time vectors
t_ppg = np.cumsum(data_list_ppg[time_idx_ppg]) + ts_ppg
t_imu = np.cumsum(data_list_imu[time_idx_imu]) + ts_imu

# 4. Data synchronization on right indices
ppg_indices, imu_indices = extract_overlapping_segments(t_ppg, t_imu)  # Define this function

# Update data vectors based on synchronized indices
v_ppg = data_list_ppg[values_idx_ppg][ppg_indices[0]:ppg_indices[1]]
v_imu = data_list_imu[values_idx_imu][imu_indices[0]:imu_indices[1]]

# 5. Data preprocessing
# Implement `preprocessing_ppg` and `preprocessing_imu` to suit your data format
v_ppg_pre, tr_ppg_pre = preprocessing_ppg(v_ppg, FS_PPG)
v_imu_pre, tr_imu_pre = preprocessing_imu(v_imu, FS_IMU)

# Save preprocessed data
location = "../../tests/data/2.preprocessed_data/ppg"
save_preprocessed_data(v_ppg_pre, tr_ppg_pre, v_imu_pre, tr_imu_pre, location)

# Feature extraction and Classification
# Assume feature extraction and classification functions are implemented
features_ppg, features_imu = extract_features(v_ppg_pre, v_imu_pre)
classification_results = classify_signals(features_ppg, features_imu)

# Save the classification results
save_classification_data(classification_results, location)

# You need to implement:
# - synchronization: to find overlapping segments between PPG and IMU data based on metadata
# - extract_overlapping_segments: to calculate the correct indices for synchronized data segments
# - preprocessing_ppg, preprocessing_imu: functions to preprocess the raw PPG and IMU data
# - extract_features: to extract relevant features from the preprocessed data
# - classify_signals: to perform the classification on the extracted features
# - save_preprocessed_data, save_classification_data: functions to save data to files in a suitable format
