## Main script to perform heart rate estimation of wearable imu

This script uses both PPG and accelerometer and performs the following steps:
1. Loading all metadata of PPG and IMU
2. Query on data availability + synchronization
3. Loading relevant segment sensor data using tsdf wrapper (start for loop over synchronized segment indices)
4. Synchronize the data (correct indices etc)
5. Data preprocessing
6. Feature extraction
7. Classification


## Architecture overview
The script implements the following steps:
 - Step 1: IMU and PPG preprocessing
 - Step 2: IMU and PPG feature extraction
 - Step 3: Signal quality assessment

In [1]:
# Automatically reload modules
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
from datetime import datetime
from pathlib import Path
import json
import os
from typing import List

import tsdf
import dbpd
from dbpd import DataColumns
from dbpd.ppg_preprocessing import tsdf_scan_meta, synchronization, extract_overlapping_segments


In [2]:
# Cell has the tag 'parameters'
path_to_data =  '../../../tests/data'

input_path_imu = os.path.join(path_to_data, '1.sensor_data', 'imu')
input_path_ppg = os.path.join(path_to_data, '1.sensor_data', 'ppg')
output_path = os.path.join(path_to_data, '2.preprocessed_data', 'ppg')

In [3]:
# Constants
sensor_imu = 'IMU'
sensor_ppg = 'PPG'

imu_meta_filename = f'{sensor_imu}_meta.json'
imu_values_filename = f'{sensor_imu}_samples.bin'
imu_time_filename = f'{sensor_imu}_time.bin'

ppg_meta_filename = f'{sensor_ppg}_meta.json'
ppg_values_filename = f'{sensor_ppg}_samples.bin'
ppg_time_filename = f'{sensor_ppg}_time.bin'

no_units = 'none'
acceleration_units = 'm/s^2'

d_channels_units_imu = {
    DataColumns.ACCELEROMETER_X: acceleration_units,
    DataColumns.ACCELEROMETER_Y: acceleration_units,
    DataColumns.ACCELEROMETER_Z: acceleration_units
}

d_channels_units_ppg = {
    DataColumns.PPG: no_units
}

# filtering
# sampling_frequency = 100
# lower_cutoff_frequency = 0.3
# filter_order = 4

# Constants
UNIX_TICKS_MS = 1000.0
FS_PPG = 30  # Sampling rate for PPG
FS_IMU = 100  # Sampling rate for IMU

## 1. Loading all metadata of PPG and IMU

In [4]:
meta_ppg = tsdf_scan_meta(input_path_ppg)
meta_imu = tsdf_scan_meta(input_path_imu)

## 2. Query on data availability + synchronization

Calculate PPG and IMU segments that describe the same data

In [5]:
segment_ppg, segment_imu = synchronization(meta_ppg, meta_imu)  # Define `synchronization`

ValueError: time data '2021-06-27T16:52:20Z' does not match format '%d-%b-%Y %H:%M:%S %Z'

## 3. Loading relevant segment sensor data

In [None]:
n = 0  # Assuming we're only looking at the first synchronized segment pair - the loop here should be over all segment pairs
meta_path_ppg = meta_ppg[segment_ppg[n]]['tsdf_meta_fullpath']
meta_path_imu = meta_imu[segment_imu[n]]['tsdf_meta_fullpath']

metadata_list_ppg = tsdf.load_metadata_from_path(meta_path_ppg)
metadata_list_imu = tsdf.load_metadata_from_path(meta_path_imu)

In [None]:
# Load PPG data
metadata_time_ppg = metadata_list_ppg[ppg_time_filename]
metadata_samples_ppg = metadata_list_ppg[ppg_values_filename]
df_ppg = tsdf.load_dataframe_from_binaries([metadata_time_ppg, metadata_samples_ppg], tsdf.constants.ConcatenationType.columns)

# Load IMU data
metadata_time_imu = metadata_list_imu[imu_time_filename]
metadata_samples_imu = metadata_list_imu[imu_values_filename]
df_imu = tsdf.load_dataframe_from_binaries([metadata_time_imu, metadata_samples_imu], tsdf.constants.ConcatenationType.columns)

In [None]:
df_ppg.head(2)

Unnamed: 0,time,green
0,0.0,649511
1,9.959961,648214


In [None]:
# Drop the gyroscope columns from the IMU data
cols_to_drop = df_imu.filter(regex='^rotation_').columns
df_imu.drop(cols_to_drop, axis=1, inplace=True)

df_imu = df_imu.rename(columns={f'acceleration_{a}': f'accelerometer_{a}' for a in ['x', 'y', 'z']})

df_imu.head(2)

Unnamed: 0,time,accelerometer_x,accelerometer_y,accelerometer_z
0,0.0,-1151,1200,-572
1,10.040039,-1120,1303,-532


In [None]:
from dbpd.constants import DataColumns, TimeUnit

# Extract indices for time and samples (TSDF should support extracting data based on the channel names)
time_idx_ppg = metadata_list_ppg["PPG_time.bin"]
time_idx_imu = metadata_list_imu["IMU_time.bin"]
values_idx_ppg = metadata_list_ppg["PPG_samples.bin"]
values_idx_imu = metadata_list_imu["IMU_samples.bin"]

start_time_ppg = datetime.strptime(time_idx_ppg.start_iso8601, '%d-%b-%Y %H:%M:%S %Z').timestamp()
df_imu[DataColumns.TIME] = dbpd.imu_preprocessing.transform_time_array(
    time_array=df_imu[DataColumns.TIME],
    scale_factor=1000, 
    input_unit_type = TimeUnit.difference_ms,
    output_unit_type = TimeUnit.absolute_ms,
    start_time = start_time_ppg)

start_time_imu = datetime.strptime(time_idx_imu.start_iso8601, '%d-%b-%Y %H:%M:%S %Z').timestamp()
df_ppg[DataColumns.TIME] = dbpd.imu_preprocessing.transform_time_array(
    time_array=df_ppg[DataColumns.TIME],
    scale_factor=1000, 
    input_unit_type = TimeUnit.difference_ms,
    output_unit_type = TimeUnit.absolute_ms,
    start_time = start_time_imu)


df_imu.tail(2)
# ts_ppg = int(datetime_ppg.timestamp() * UNIX_TICKS_MS)
# ts_imu = int(datetime_imu.timestamp() * UNIX_TICKS_MS)

# # Calculating continuous time vectors
# t_ppg = np.cumsum(data_list_ppg[time_idx_ppg]) + ts_ppg
# t_imu = np.cumsum(data_list_imu[time_idx_imu]) + ts_imu

Unnamed: 0,time,accelerometer_x,accelerometer_y,accelerometer_z
72945,1624806000.0,-502,-1075,-1689
72946,1624806000.0,-509,-1068,-1689


## 4. Data synchronization on right indices

In [None]:
print("Shape of the original data:", df_ppg.shape, df_imu.shape)

# Extract overlapping segments
df_ppg_overlapping, df_imu_overlapping = extract_overlapping_segments(df_ppg, df_imu)

print("Shape of the overlapping segments:", df_ppg_overlapping.shape, df_imu_overlapping.shape)

Shape of the original data: (64775, 2) (72947, 4)
Shape of the overlapping segments: (64775, 2) (64361, 4)


## 5. Data preprocessing

In [13]:
# The following method is failing
df_imu_proc = dbpd.imu_preprocessing.resample_data(
    time_abs_array=np.array(df_imu_overlapping[DataColumns.TIME]),
    values_unscaled=np.array(df_imu_overlapping[list(d_channels_units_imu.keys())]),
    scale_factors=metadata_samples_imu.scale_factors[0:3],
    resampling_frequency=FS_IMU,
    time_column=DataColumns.TIME)

# metadata_samples_ppg.scale_factors - the data specifies 1, but it is not an obligatory tsdf field, maybe it should be optional parameter in `resample_data`
df_ppg_proc = dbpd.imu_preprocessing.resample_data(
    df=df_ppg_overlapping,
    time_column=DataColumns.TIME,
    time_unit_type=TimeUnit.absolute_ms,
    unscaled_column_names = list(d_channels_units_ppg.keys()),
    scale_factors=metadata_samples_imu.scale_factors,
    resampling_frequency=FS_PPG,
    start_time = start_time_imu
    )

df_imu_proc = dbpd.imu_preprocessing.resample_data(
    df=df_imu_overlapping,
    time_column=DataColumns.TIME,
    time_unit_type=TimeUnit.absolute_ms,
    unscaled_column_names = list(d_channels_units_imu.keys()),
    scale_factors=metadata_samples_imu.scale_factors,
    resampling_frequency=FS_IMU,
    start_time = start_time_imu
    )

# Still TODO
# use the same sampling frequency for both PPG and IMU to test whether the resampling works (the shape should be the same)
# v_ppg_pre, tr_ppg_pre = preprocessing_ppg(v_ppg, FS_PPG)
# v_imu_pre, tr_imu_pre = preprocessing_imu(v_imu, FS_IMU)

# TODO: Save preprocessed data and compare with the expected results
# location = "../../tests/data/2.preprocessed_data/ppg"
# save_preprocessed_data(v_ppg_pre, tr_ppg_pre, v_imu_pre, tr_imu_pre, location)

Type of unscaled_column_names:  <class 'list'>


TypeError: unsupported operand type(s) for -: 'float' and 'type'

In [None]:
#  Feature extraction and Classification
# Assume feature extraction and classification functions are implemented
# features_ppg, features_imu = extract_features(v_ppg_pre, v_imu_pre)
# classification_results = classify_signals(features_ppg, features_imu)

# Save the classification results
# save_classification_data(classification_results, location)

# We need to implement:
# - synchronization: to find overlapping segments between PPG and IMU data based on metadata
# - extract_overlapping_segments: to calculate the correct indices for synchronized data segments
# - preprocessing_ppg, preprocessing_imu: functions to preprocess the raw PPG and IMU data
# - extract_features: to extract relevant features from the preprocessed data
# - classify_signals: to perform the classification on the extracted features
# - save_preprocessed_data, save_classification_data: functions to save data to files in a suitable format
