# Pre-process the IEEEPPG Dataset

In [40]:
import os
import sys
from pathlib import Path

# include the utils directory in the path
sys.path.append(os.path.abspath(os.path.join('../')))

import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots

In [37]:
from utils.ts_load import load_from_tsfile_to_dataframe
import numpy as np
# from scipy import signal
from scipy.stats import zscore
from sklearn.model_selection import train_test_split
import torch

## Load IEEEPPG dataset

In [11]:
x_train, y_train = load_from_tsfile_to_dataframe('../Dataset/ieeeppg/IEEEPPG_TRAIN.ts', True)

1791it [00:02, 742.56it/s]


In [12]:
# convert the dataframe to numpy array
x_train_numpy = np.empty((1768, 5, 1000))

for i in range(x_train.shape[0]):
    for j in range(x_train.shape[1]):
            
        x_train_numpy[i][j] = x_train.iloc[i,j].to_numpy()

In [13]:
x_test, y_test = load_from_tsfile_to_dataframe('../Dataset/ieeeppg/IEEEPPG_TEST.ts', True)

1351it [00:01, 835.90it/s]


In [14]:
# convert the dataframe to numpy array
x_test_numpy = np.empty((1328, 5, 1000))

for i in range(x_test.shape[0]):
    for j in range(x_test.shape[1]):
        x_test_numpy[i][j] = x_test.iloc[i,j].to_numpy()

In [15]:
index = 1760

sample = x_train.loc[index].to_numpy()
print("sample label:", y_train[index])

go.Figure().add_traces([
    go.Scatter(y=sample[0], name='dim_0'),
    go.Scatter(y=sample[1], name='dim_1'),
    go.Scatter(y=sample[2], name='dim_2'),
    go.Scatter(y=sample[3], name='dim_3'),
    go.Scatter(y=sample[4], name='dim_4')
])

sample label: 163.7214


## Get only the PPG signal

In [29]:
x_train_ppg = zscore(x_train_numpy[:,0:1,:], axis=2)
x_test_ppg = zscore(x_test_numpy[:,0:1,:], axis=2)

In [30]:
x_time = np.linspace(0, 8, 2880)
xp_time = np.linspace(0, 8, 1000)

x_train_interp = np.empty((x_train_ppg.shape[0], x_train_ppg.shape[1], 2880))
for i in range(x_train_ppg.shape[0]):
    x_train_interp[i,0] = np.interp(x_time, xp_time, x_train_ppg[i,0])

x_test_interp = np.empty((x_test_ppg.shape[0], x_test_ppg.shape[1], 2880))
for i in range(x_test_ppg.shape[0]):
    x_test_interp[i,0] = np.interp(x_time, xp_time, x_test_ppg[i,0])

In [31]:
x_test_interp.shape, x_train_interp.shape

((1328, 1, 2880), (1768, 1, 2880))

In [35]:
x_train_sample, x_val_sample, y_train_sample, y_val_sample = train_test_split(x_train_interp, y_train, test_size=0.2, random_state=1234)

In [36]:
x_train_sample.shape, y_train_sample.shape, x_val_sample.shape, y_val_sample.shape

((1414, 1, 2880), (1414,), (354, 1, 2880), (354,))

In [38]:
x_test_interp.shape, y_test.shape

((1328, 1, 2880), (1328,))

In [41]:
temp_dir = Path("ieeeppg-360hz")
os.makedirs(temp_dir, exist_ok=True)

torch.save({'samples': x_train_sample, 'labels': y_train_sample}, temp_dir / 'train.pt')
torch.save({'samples': x_val_sample, 'labels': y_val_sample}, temp_dir / 'val.pt')
torch.save({'samples': x_test_interp, 'labels': y_test}, temp_dir / 'test.pt')