# ECG Prediction using PPG signals

In [None]:
import os
import sys
import torch
from torch import nn
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

# Add the parent directory (i.e. transformer, means parent directory of 'scripts' and 'notebooks') to sys.path
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(project_root)

# Import the function
from scripts.basic_functions import *
from scripts.m1 import *

In [None]:
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

In [None]:
## Data Loader
def data_loader(subject, action):
    '''
    Automate input reading: select subject, action
    Read in csv file
    '''
    df_data = pd.read_csv(
        '../data/Finger/csv/s'+ str(subject) + '_' + str(action) + '.csv',
        sep=',',           # specify delimiter (default is ',')
        header=0,          # row number to use as column names (0 means the first row)
        na_values=['NA', ''],  # specify which values should be considered NaN
    )
    df_filtered = pd.DataFrame()

    # Sample data and sampling frequency
    fs = 500  
    # Define bandpass range for PPG 
    lowcut = 0.4
    highcut = 10

    df_filtered['ecg'] = bandpass_filter(df_data['ecg'], lowcut, highcut, fs, order=4)
    df_filtered['pleth_4'] = bandpass_filter(df_data['pleth_4'], lowcut, highcut, fs, order=4)
    df_filtered['pleth_5'] = bandpass_filter(df_data['pleth_5'], lowcut, highcut, fs, order=4)
    df_filtered['pleth_6'] = bandpass_filter(df_data['pleth_6'], lowcut, highcut, fs, order=4)
    return df_filtered

In [None]:
df_filtered = data_loader(subject=10, action='sit')
print(df_filtered)

In [None]:
# Initialize scalers for predictors and target
scaler_input = MinMaxScaler(feature_range=(0, 1))
scaler_target = MinMaxScaler(feature_range=(0, 1))

# Fit and transform predictors (pleth_4, pleth_5, pleth_6)
input_columns = ['pleth_4', 'pleth_5', 'pleth_6']
x_normalized = scaler_input.fit_transform(df_filtered[input_columns])

# Fit and transform target (ecg)
y_normalized = scaler_target.fit_transform(df_filtered[['ecg']])

# Convert to PyTorch tensors
x_data = torch.tensor(x_normalized, dtype=torch.float32)  # Shape: [samples, 3]
y_data = torch.tensor(y_normalized, dtype=torch.float32)  # Shape: [samples, 1]

# Reshape for sequence input
'''
change stepsize to 5, 10, 20?
sequences are shifted by 1 timestamp / sample per sequence! 
'''
sequence_length = 100  
num_sequences = len(df_filtered) - sequence_length + 1
subset = 0.001

x_sequences = torch.stack([x_data[i:i + sequence_length] for i in range(int(num_sequences*subset))])  # [num_sequences, seq_length, 3]
y_sequences = torch.stack([y_data[i:i + sequence_length] for i in range(int(num_sequences*subset))])  # [num_sequences, seq_length, 1]

# Split ratio 
train_ratio = 0.8
train_size = int(train_ratio * x_sequences.size(0))  # Number of training samples
val_size = x_sequences.size(0) - train_size          # Number of validation samples

# Slicing of the ratio
X_train, X_val = x_sequences[:train_size], x_sequences[train_size:]
y_train, y_val = y_sequences[:train_size], y_sequences[train_size:]

# Print shapes for verification
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_val shape: {X_val.shape}, y_val shape: {y_val.shape}")

In [None]:
y_val.size()

In [None]:
# Model initialization 
d_model = 32  # Embedding dimension
input_dim = 3  # 3 PPG signals (red, green, IR)
output_dim = 1  # 1 ECG target per time step
nhead = 4  # Attention heads
num_layers = 4  # Number of transformer layers
batch_size = 32  # Batch size


model = TransformerTimeSeries(input_dim=input_dim, output_dim=output_dim, d_model=d_model, nhead=nhead, num_layers=num_layers) 
output = model(x_sequences)

# Loss function: Mean Squared Error for regression tasks
loss_fn = nn.MSELoss()

# Optimizer: Adam optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

num_epochs = 5  # Number of epochs to train

# Training loop
for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    
    # Initialize running loss
    running_loss = 0.0

    # Iterate through the training data in batches
    for i in range(0, len(X_train), batch_size):
        # Get the current batch
        batch_X = X_train[i:i+batch_size]
        batch_y = y_train[i:i+batch_size]
        
        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass through the model
        predictions = model(batch_X)

        # Calculate loss (MSE between predicted ECG and actual ECG)
        loss = loss_fn(predictions, batch_y)

        # Backward pass (compute gradients)
        loss.backward()

        # Update the weights
        optimizer.step()

        # Update running loss
        running_loss += loss.item()

    # Calculate the average loss for the epoch
    avg_loss = running_loss / len(X_train)
    
    # Validation metrics (optional but useful)
    model.eval()  # Set model to evaluation mode
    with torch.no_grad():
        val_predictions = model(X_val)
        val_loss = loss_fn(val_predictions, y_val).item()
        val_rmse = torch.sqrt(torch.tensor(val_loss))
    
    print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {avg_loss:.4f} | Val RMSE: {val_rmse:.4f}")

In [None]:
torch.save(model.state_dict(), '../models/transformer_m1_ecg_model.pth')

In [None]:
# Reverse transform predicted ECG
val_predictions=val_predictions.squeeze(-1)
print(val_predictions.size())
predictions_original_scale = scaler_target.inverse_transform(val_predictions.numpy())
print(predictions_original_scale.shape)

# Reverse transform input if needed
y_val = y_val.squeeze(-1)
print(y_val.size())
inputs_original_scale = scaler_target.inverse_transform(y_val.numpy())
print(inputs_original_scale.shape)

In [None]:
len(inputs_original_scale[:200][0])

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Randomly select an index from the validation data
random_index = np.random.randint(0, len(val_predictions))

# Select the corresponding actual and predicted ECG signals
actual_ecg_random = y_val[random_index].numpy()  # Actual ECG signal
predicted_ecg_random = val_predictions[random_index].numpy()  # Predicted ECG signal

# Plot the actual and predicted ECG
plt.figure(figsize=(10, 5))
plt.plot(actual_ecg_random, label='Actual ECG')
plt.plot(predicted_ecg_random, label='Predicted ECG')
plt.title(f"ECG Prediction vs Actual (Sequence {random_index})")
plt.xlabel('Time Step')
plt.ylabel('ECG Signal')
plt.legend()
plt.show()


In [None]:
# Number of random sequences to plot
num_sequences = 5

# Create a plot
plt.figure(figsize=(10, 6))

for _ in range(num_sequences):
    random_index = np.random.randint(0, len(val_predictions))
    
    # Select the corresponding actual and predicted ECG signals
    actual_ecg_random = y_val[random_index].numpy()
    predicted_ecg_random = val_predictions[random_index].numpy()
    
    # Plot both actual and predicted ECG
    plt.plot(actual_ecg_random, label=f'Actual ECG {random_index}')
    plt.plot(predicted_ecg_random, label=f'Predicted ECG {random_index}', linestyle='dashed')

plt.title("ECG Predictions vs Actual for Random Sequences")
plt.xlabel('Time Step')
plt.ylabel('ECG Signal')
plt.legend()
plt.show()


In [None]:
# Calculate the average ECG for both actual and predicted
average_actual_ecg = np.mean(y_val.numpy(), axis=0)  # Average over all sequences
average_predicted_ecg = np.mean(val_predictions.numpy(), axis=0)  # Average over all predictions

# Plot the average ECG
plt.figure(figsize=(10, 5))
plt.plot(average_actual_ecg, label='Average Actual ECG')
plt.plot(average_predicted_ecg, label='Average Predicted ECG')
plt.title("Average ECG Prediction vs Actual")
plt.xlabel('Time Step')
plt.ylabel('ECG Signal')
plt.legend()
plt.show()


In [None]:
from sklearn.metrics import mean_squared_error

# Compute MSE for each sequence
mse_per_sequence = [mean_squared_error(y_val[i].numpy(), val_predictions[i].numpy()) for i in range(len(val_predictions))]

# Plot the MSE for each sequence
plt.figure(figsize=(10, 5))
plt.plot(mse_per_sequence, label='MSE per Sequence')
plt.title("MSE per Sequence in the Validation Set")
plt.xlabel('Sequence Index')
plt.ylabel('Mean Squared Error')
plt.legend()
plt.show()


In [None]:
# Calculate cumulative error or running average of MSE
running_mse = np.cumsum(mse_per_sequence) / (np.arange(len(mse_per_sequence)) + 1)

# Plot the running MSE
plt.figure(figsize=(10, 5))
plt.plot(running_mse, label='Running Average of MSE')
plt.title("Running Average of MSE Across Sequences")
plt.xlabel('Sequence Index')
plt.ylabel('Running Average MSE')
plt.legend()
plt.show()


In [None]:
# Plot the continuous signal
plt.figure(figsize=(12, 6))
plt.plot(inputs_original_scale, label='Actual ECG', alpha=0.7, linestyle='-', linewidth=1.5)
plt.plot(predictions_original_scale, label='Predicted ECG', alpha=0.7, linestyle='--', linewidth=1.5)
plt.title('ECG Signal: Actual vs Predicted (Continuous)')
plt.xlabel('Time Steps')
plt.ylabel('Amplitude')
plt.legend()
plt.grid(True)
plt.show()