In [None]:
import os
import io
import sys
import psutil
import random
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import torchinfo
from torchinfo import summary
from torch.utils.checkpoint import checkpoint
import torch.profiler
from torch.optim.lr_scheduler import ReduceLROnPlateau

from sklearn.preprocessing import MinMaxScaler
from scipy.spatial.distance import euclidean
from scipy.stats import pearsonr, spearmanr
from scipy.spatial.distance import euclidean

# Add the parent directory, i.e. transformer, means parent directory of 'scripts' and 'notebooks', to sys.path
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(project_root)

# Import classes and functions
from scripts.m1_functions import *
from scripts.m1_classes import *

In [None]:
device = select_device()

In [None]:
# Load the data
df_filtered = data_loader_filtered_single(subject=10, action='sit')
print(df_filtered.shape)

# Initialize scalers for predictors and target
scaler_input = MinMaxScaler(feature_range=(-1, 1))
scaler_target = MinMaxScaler(feature_range=(-1, 1))

# Fit and transform predictors (red ppg, ir ppg, green ppg)
input_columns = ['red ppg', 'ir ppg', 'green ppg']
x_normalized = scaler_input.fit_transform(df_filtered[input_columns])

# Fit and transform target (ecg)
y_normalized = scaler_target.fit_transform(df_filtered[['ecg']])

# Convert to PyTorch tensors
x_data = torch.tensor(x_normalized, dtype=torch.float32) # Shape: [samples, 3] 
y_data = torch.tensor(y_normalized, dtype=torch.float32)  # Shape: [samples, 1] 

# Reshape for sequence input, adjust stepsize and subset
sequence_length = 100
sequence_step_size = 10
num_sequences = len(df_filtered) - sequence_length + 1
subset = 1

x_sequences = torch.stack([x_data[i:i + sequence_length] for i in range(0, int(num_sequences*subset), int(sequence_step_size))])  # [num_sequences, seq_length, 3]
y_sequences = torch.stack([y_data[i:i + sequence_length] for i in range(0, int(num_sequences*subset), int(sequence_step_size))])  # [num_sequences, seq_length, 1]

# Split ratio 
train_ratio = 0.8
train_size = int(train_ratio * x_sequences.size(0))  # Number of training samples
val_size = x_sequences.size(0) - train_size          # Number of validation samples

# Slicing of the ratio
X_train, X_val = x_sequences[:train_size], x_sequences[train_size:]
y_train, y_val = y_sequences[:train_size], y_sequences[train_size:]

# Print shapes for verification
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_val shape: {X_val.shape}, y_val shape: {y_val.shape}")

# Model initialization 
d_model = 48  # Embedding dimension
input_dim = 3  # 3 PPG signals (red, green, IR)
output_dim = 1  # 1 ECG target per time step
nhead = 6  # Attention heads
num_layers = 4  # Number of transformer layers
batch_size = 16  # Batch size

# Initialize the Transformer model
model = EncoderTransformerTimeSeries(input_dim=input_dim, output_dim=output_dim, d_model=d_model, nhead=nhead, num_layers=num_layers).to(device) 
 
# Loss function: Mean Squared Error for regression tasks
loss_fn = nn.MSELoss()

# Optimizer: Adam optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

num_epochs = 10  # Number of epochs to train

# Clear any residual memory before training
torch.cuda.empty_cache()


### Training
# Training loop
for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0 # Initialize running loss

    # Iterate through the training data in batches
    for i in range(0, len(X_train), batch_size):
        # Get the current batch
        batch_X = X_train[i:i+batch_size].to(device)
        batch_y = y_train[i:i+batch_size].to(device)
        
        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass through the model
        predictions = model(batch_X)

        # Calculate loss (MSE between predicted ECG and actual ECG)
        loss = loss_fn(predictions, batch_y)

        # Backward pass (compute gradients)
        loss.backward()

        # Update the weights
        optimizer.step()

        # Update running loss
        running_loss += loss.item() * batch_X.size(0)

    # Calculate the average loss for the epoch
    avg_train_loss = running_loss / len(X_train)
    train_rmse = torch.sqrt(torch.tensor(avg_train_loss)) # MSE needs to be calculated at the end of each batch, scaled by batch size and the RMSE should calculated at the end of the epoch (metric)
    
    # Validation metrics with batching
    model.eval()  # Set model to evaluation mode
    total_val_loss = 0

    with torch.no_grad():
        for j in range(0, len(X_val), batch_size):
            # Get the current validation batch
            batch_X_val = X_val[j:j + batch_size].to(device)
            batch_y_val = y_val[j:j + batch_size].to(device)

            # Forward pass
            val_predictions = model(batch_X_val)

            # Calculate loss for this batch
            val_loss = loss_fn(val_predictions, batch_y_val)

            # Accumulate total validation loss
            total_val_loss += val_loss.item() * batch_X_val.size(0)  # Weighted by batch size, necessary and if so why not for X_batch?

    # Average validation loss over all samples
    avg_val_loss = total_val_loss / len(X_val) 
    val_rmse = torch.sqrt(torch.tensor(avg_val_loss)) # MSE needs to be calculated at the end of each batch, scaled by batch size and the RMSE should calculated at the end of the epoch (metric)

    # Clear any residual memory before start of new epoch
    torch.cuda.empty_cache()

    print(f"Memory usage: {psutil.virtual_memory().percent}%")
    print(f"Epoch {epoch+1}/{num_epochs} | Train RMSE Loss: {train_rmse:.4f} | Val RMSE: {val_rmse:.4f}")


# Save the model
torch.save(model.state_dict(), '../models/enoder_m2_ecg_model_all_data.pth')


### Validation
# Initialize storage for aggregated predictions and actual values
ecg_predictions = []
ecg_actuals = []
ppg = []

# Iterate over the validation set in batches
model.eval()  # Ensure the model is in evaluation mode
with torch.no_grad():
    for j in range(0, len(X_val), batch_size):
        # Get the current validation batch
        batch_X_val = X_val[j:j + batch_size].to(device)
        batch_y_val = y_val[j:j + batch_size].to(device)

        # Forward pass to get predictions
        batch_predictions = model(batch_X_val)

        # Store predictions and actuals
        ecg_predictions.append(batch_predictions.cpu())  # Move to CPU for numpy/scaler operations
        ecg_actuals.append(batch_y_val.cpu())
        ppg.append(batch_X_val.cpu())

# Concatenate all batches
ecg_predictions = torch.cat(ecg_predictions, dim=0)
ecg_actuals = torch.cat(ecg_actuals, dim=0)
ppg = torch.cat(ppg, dim=0)

# Reverse transform predicted ECG to original scale
ecg_predictions = ecg_predictions.squeeze(-1)
ecg_predictions_original_scale = scaler_target.inverse_transform(ecg_predictions.numpy())

# Reverse transform actual ECG to original scale
ecg_actuals = ecg_actuals.squeeze(-1)
ecg_actuals_original_scale = scaler_target.inverse_transform(ecg_actuals.numpy())

# Reverse transform ppg to orignial scale
ppg = ppg.squeeze(-1)
#ppg_original_scale = scaler_input.inverse_transform(ppg[input_columns].numpy())


### Evaluation metrics
# Predictions and actual values (already scaled back to original scale)
ecg_predictions_original_scale_flattened = ecg_predictions_original_scale.flatten()  # Flatten to 1D if necessary
ecg_actuals_original_scale_flattened = ecg_actuals_original_scale.flatten()

# Calculate the range of the actual data for normalization
actual_range = np.ptp(ecg_actuals_original_scale)  # Peak-to-peak (max - min)

# Euclidean Distance
euclidean_distance = euclidean(ecg_predictions_original_scale_flattened, ecg_actuals_original_scale_flattened)

# Dynamic Time Warping (DTW)
downsampling_factor = 10
batch_size = 10 
dtw_distance = compute_batched_dtw(ecg_predictions_original_scale_flattened, ecg_actuals_original_scale_flattened, batch_size, downsampling_factor)
# dtw_distance = alignment.distance

# Pearson Correlation
pearson_corr, _ = pearsonr(ecg_predictions_original_scale_flattened, ecg_actuals_original_scale_flattened)

# Spearman Correlation
spearman_corr, _ = spearmanr(ecg_predictions_original_scale_flattened, ecg_actuals_original_scale_flattened)

# Mean Squared Error (MSE)
mse = np.mean((ecg_predictions_original_scale_flattened - ecg_actuals_original_scale_flattened) ** 2)

# Root Mean Squared Error (RMSE)
rmse = np.sqrt(mse)

# Mean Absolute Error (MAE)
mae = np.mean(np.abs(ecg_predictions_original_scale_flattened - ecg_actuals_original_scale_flattened))

# Normalized Root Mean Squared Error (NRMSE)
nrmse = rmse / actual_range

# Normalized Mean Absolute Error (NMAE)
nmae = mae / actual_range

# Print metrics
metrics = {
    "Euclidean Distance": euclidean_distance,
    "DTW Distance": dtw_distance,
    "Pearson Correlation": pearson_corr,
    "Spearman Correlation": spearman_corr,
    "MSE": mse,
    "RMSE": rmse,
    "MAE": mae,
    "NRMSE": nrmse,
    "NMAE": nmae,
}

for metric, value in metrics.items():
    print(f"{metric}: {value:.4f}")


In [None]:
### Creating graphs
# Randomly select an index from the validation data
random_index = np.random.randint(0, len(ecg_predictions_original_scale))

# Select the corresponding actual and predicted ECG signals
ecg_predictions_random = ecg_predictions_original_scale[random_index]  # Predicted ECG signal
ecg_actuals_random = ecg_actuals_original_scale[random_index]  # Actual ECG signal
#ppg_random = ppg_original_scale[random_index] # PPG Signal

# Plot the actual and predicted ECG
plt.figure(figsize=(10, 5))
plt.plot(ecg_actuals_random, label='Actual ECG')
plt.plot(ecg_predictions_random, label='Predicted ECG')
plt.plot(ppg_random, label='PPG signal')
plt.title(f"ECG Prediction vs Actual (Sequence {random_index})")
plt.xlabel('Time Step')
plt.ylabel('ECG Signal')
plt.legend()



In [3]:
df_filtered = data_loader_filtered()
df_original = data_loader_original()
df_filtered_single = data_loader_filtered_single(subject=10, action='sit')
df_original_single = data_loader_original_single(subject=10, action='sit')

In [None]:
df_filtered

In [None]:
# Define the subjects and action you want to filter
selected_subjects = [1,2,3,4,5,6]  # Replace with desired subject IDs
selected_action = 'sit'    # Replace with the desired action

# Filter the DataFrame
df_custom = df_filtered[(df_filtered['subject'].isin(selected_subjects)) & (df_filtered['action'] == selected_action)]

# Reset the index of the new DataFrame
df_custom = df_custom.reset_index(drop=True)

# Verify the result
print(df_custom.head())
print(f"New DataFrame shape: {df_custom.shape}")
df_custom

In [4]:
start = 253000
stop = 255000

In [None]:
# Define the time points
time = df_custom.index  # Assuming your dataframe has a time-based index

# Plot the normalized ECG time-series
plt.figure(figsize=(10, 6))
plt.plot(time[start:stop], df_custom['ecg'][start:stop], label="Normalized ECG Signal", color='blue')
plt.title("Normalized ECG Time-Series")
plt.xlabel("Time")
plt.ylabel("Normalized Value")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
plt.figure(figsize=(12, 8))

# Plot all PPG signals and ECG on the same graph
plt.subplot(2, 1, 1)
plt.plot(time[start:stop], df_custom['red ppg'][start:stop], label=" Red PPG", color='red')
plt.plot(time[start:stop], df_custom['green ppg'][start:stop], label=" Green PPG", color='green')
plt.plot(time[start:stop], df_custom['ir ppg'][start:stop], label=" IR PPG", color='purple')
plt.title(" PPG Signals")
plt.xlabel("Time")
plt.ylabel(" Value")
plt.legend()

# Plot ECG signal
plt.subplot(2, 1, 2)
plt.plot(time[start:stop], df_custom['ecg'][start:stop], label=" ECG", color='blue')
plt.title(" ECG Signal")
plt.xlabel("Time")
plt.ylabel(" Value")
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
df_custom_normalized, scalers = normalization_group_action(df_custom)
df_custom_normalized

In [None]:
plt.figure(figsize=(12, 8))

# Plot all PPG signals and ECG on the same graph
plt.subplot(2, 1, 1)
plt.plot(time[start:stop], df_custom_normalized['red ppg'][start:stop], label="Normalized Red PPG", color='red')
plt.plot(time[start:stop], df_custom_normalized['green ppg'][start:stop], label="Normalized Green PPG", color='green')
plt.plot(time[start:stop], df_custom_normalized['ir ppg'][start:stop], label="Normalized IR PPG", color='purple')
plt.title("Normalized PPG Signals")
plt.xlabel("Time")
plt.ylabel("Normalized Value")
plt.legend()

# Plot ECG signal
plt.subplot(2, 1, 2)
plt.plot(time[start:stop], df_custom_normalized['ecg'][start:stop], label="Normalized ECG", color='blue')
plt.title("Normalized ECG Signal")
plt.xlabel("Time")
plt.ylabel("Normalized Value")
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
# Define the time points
time = df_custom_normalized.index  # Assuming your dataframe has a time-based index

# Plot the normalized ECG time-series
plt.figure(figsize=(10, 6))
plt.plot(time[253000:255000], df_custom_normalized['ecg'][253000:255000], label="Normalized ECG Signal", color='blue')
plt.title("Normalized ECG Time-Series")
plt.xlabel("Time")
plt.ylabel("Normalized Value")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Ratios for train, validation, and test splits
train_ratio = 0.7
val_ratio = 0.2
test_ratio = 0.1

sequence_length = 1000
sequence_step_size = 100
subset = 1

# Generate sequences
x_data, y_data = sequences(df_custom_normalized, sequence_length, sequence_step_size, subset)

# Calculate sizes for each subset
total_samples = x_data.size(0)
train_size = int(train_ratio * total_samples)
val_size = int(val_ratio * total_samples)
test_size = total_samples - train_size - val_size  # Remaining samples go to the test set

# Split the data
X_train = x_data[:train_size]
y_train = y_data[:train_size]

X_val = x_data[train_size:train_size + val_size]
y_val = y_data[train_size:train_size + val_size]

X_test = x_data[train_size + val_size:]
y_test = y_data[train_size + val_size:]


# Print shapes for verification
print(f"x_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"x_val shape: {X_val.shape}, y_val shape: {y_val.shape}")
print(f"x_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

In [None]:
X_train.shape

In [None]:
# Model initialization 
d_model = 64  # Embedding dimension
input_dim = 3  # 3 PPG signals (red, green, IR)
output_dim = 1  # 1 ECG target per time step
nhead = 2  # Attention heads
num_layers = 2  # Number of transformer layers
batch_size = 8  # Batch size

seed = 42

# Convert tensors to Datasets
train_dataset = PreprocessedDataset(X_train, y_train)
val_dataset = PreprocessedDataset(X_val, y_val)

# Create DataLoaders with a reproducible generator
gen = torch.Generator(device=device)
gen.manual_seed(seed)

# Create DataLoaders for each set
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0, generator=gen)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

# Initialize the Transformer model
model = EncoderTransformerTimeSeries(input_dim=input_dim, output_dim=output_dim, d_model=d_model, nhead=nhead, num_layers=num_layers).to(device) 

X_train_sample = X_train[:1]
y_train_sample = y_train[:1]

# Call the torchinfo summary method
summary_txt = summary(model, input_data=X_train_sample, depth=1, device=device)
print(summary_txt)

In [None]:
c = 0
for batch_X, batch_y in train_loader:
    batch_X = batch_X.to(device)
    batch_y = batch_y.to(device)
    c +=1
    
print(batch_X.shape)
print(c)

In [None]:
batch_X.shape

In [None]:
# Loss function: Mean Squared Error for regression tasks
loss_fn = nn.MSELoss()

# Optimizer: Adam optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=0.001,betas=(0.9,0.999),eps=1e-08,weight_decay=0.01,amsgrad=False)

# Number of epochs to train
num_epochs = 10

# Initialize a learning rate scheduler
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, min_lr=1e-6)


# Clear any residual memory before training
torch.cuda.empty_cache()

# Arrays for storing losses and epochs
training_loss = np.array([])
validation_loss = np.array([])
epochs = np.array([])
best_models = np.array([])

# Early stopping and checkpoint parameters
patience = 10
min_delta = 1e-4
best_val_loss = float('inf')
early_stop_counter = 0


### Training
# Training loop
for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0 # Initialize running loss
    # Iterate through the batches in the train_loader to load the data in batches
    for batch_X, batch_y in train_loader:
        batch_X = batch_X.to(device)
        batch_y = batch_y.to(device)

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass through the model
        predictions = model(batch_X)

        # Calculate loss (MSE between predicted ECG and actual ECG)
        loss = loss_fn(predictions, batch_y)

        # Backward pass (compute gradients)
        loss.backward()

        # Update the weights
        optimizer.step()

        # Update running loss
        running_loss += loss.item() * batch_X.size(0)
    

    # Calculate the average loss for the epoch
    avg_train_loss = running_loss / len(X_train)
    train_rmse = torch.sqrt(torch.tensor(avg_train_loss)) # MSE needs to be calculated at the end of each batch, scaled by batch size and the RMSE should calculated at the end of the epoch (metric)
    training_loss = np.append(training_loss, train_rmse.cpu())

    print(f"Training of epoch {epoch+1} done, starting validation!")
    # Validation metrics with batching
    model.eval()  # Set model to evaluation mode
    total_val_loss = 0

    with torch.no_grad():
        # Iterate through the batches in the val_loader to load the data in batches
        for batch_X_val, batch_y_val in val_loader:
            batch_X_val = batch_X_val.to(device)
            batch_y_val = batch_y_val.to(device)
        
            # Forward pass
            val_predictions = model(batch_X_val)

            # Calculate loss for this batch
            val_loss = loss_fn(val_predictions, batch_y_val)

            # Accumulate total validation loss
            total_val_loss += val_loss.item() * batch_X_val.size(0)  # Weighted by batch size


        # Average validation loss over all samples
        avg_val_loss = total_val_loss / len(X_val) 
        val_rmse = torch.sqrt(torch.tensor(avg_val_loss)) # MSE needs to be calculated at the end of each batch, scaled by batch size and the RMSE should calculated at the end of the epoch (metric)
        validation_loss = np.append(validation_loss, val_rmse.cpu())

        # Step the learning rate scheduler with the validation loss
        scheduler.step(avg_val_loss)

        # Early stopping
        if avg_val_loss < best_val_loss - min_delta:
            best_val_loss = avg_val_loss
            early_stop_counter = 0
            # Save checkpoint
            #checkpoint_path = f"{checkpoints_folder}/epoch{epoch+1}.pth"
            #save_checkpoint(model, optimizer, epoch + 1, avg_val_loss, checkpoint_path)

            # Save the model
            #torch.save(model.state_dict(), f"../models/{model_family}{model_name}_trained_model_epoch{epoch+1}.pth")

            # Save the epoch of this best model
            best_models = np.append(best_models, int(epoch+1))

            epochs = np.append(epochs, int(epoch+1))
            print(f"Checkpoint saved at epoch {epoch + 1}.")
        else:
            epochs = np.append(epochs, int(epoch+1))
            early_stop_counter += 1

        if early_stop_counter >= patience:
            print(f"Early stopping triggered after {epoch + 1} epochs.")
            break

        #print(f"Memory usage: {psutil.virtual_memory().percent}%")
        print(f"Epoch {epoch + 1}/{num_epochs} | Train RMSE: {train_rmse:.4f} | Val RMSE: {val_rmse:.4f} | Current LR: {optimizer.param_groups[0]['lr']:.6f}")

        # Clear any residual memory before start of new epoch
        torch.cuda.empty_cache()

In [18]:
df_test = df_custom

In [None]:
df_custom

In [None]:
df_test

In [None]:
df_filtered

In [None]:
scalers

In [None]:
### Validation
# Initialize storage for aggregated predictions and actual values
ecg_predictions = []
ecg_actuals = []
ppg = []
subjects = []  # Store subject info for each batch
actions = []   # Store action info for each batch

# Loss function: Mean Squared Error for regression tasks
loss_fn = nn.MSELoss()

# Test Loss
test_loss = np.array([])
running_test_loss = 0

# Convert tensors to Datasets
test_dataset = PreprocessedDataset(X_test, y_test)
# Create DataLoaders for each set
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

# Iterate over the validation set in batches
model.eval()  # Ensure the model is in evaluation mode
with torch.no_grad():
    # Iterate through the batches in the test_loader to load the data in batches
    for batch_idx, (batch_X_test, batch_y_test) in enumerate(test_loader):
        # Move the batch data to the device (GPU or CPU)
        batch_X_test = batch_X_test.to(device)
        batch_y_test = batch_y_test.to(device)
        
        # Get the start and end index of the current batch in df_test
        start_idx = batch_idx * batch_size
        end_idx = start_idx + len(batch_X_test)
        
        # Retrieve the corresponding (subject, action) pair for this batch from df_test
        batch_subjects = df_test.iloc[start_idx:end_idx]['subject'].values
        batch_actions = df_test.iloc[start_idx:end_idx]['action'].values

        # Forward pass to get predictions
        batch_predictions = model(batch_X_test)

        # Calculate loss for this batch
        loss = loss_fn(batch_predictions, batch_y_test)

        # Accumulate total validation loss
        running_test_loss += loss.item() * batch_X_test.size(0)

        # Store predictions, actuals, subjects, and actions
        ecg_predictions.append(batch_predictions.cpu())  # Move to CPU for numpy/scaler operations
        ecg_actuals.append(batch_y_test.cpu())
        ppg.append(batch_X_test.cpu())
        subjects.extend(batch_subjects)
        actions.extend(batch_actions)

# Average the test loss over all samples
avg_test_loss = running_test_loss / len(X_test)
test_rmse = torch.sqrt(torch.tensor(avg_test_loss))
test_loss = np.append(test_loss, test_rmse.cpu())

# Concatenate all batches
ecg_predictions = torch.cat(ecg_predictions, dim=0)
ecg_actuals = torch.cat(ecg_actuals, dim=0)
ppg = torch.cat(ppg, dim=0)

# Initialize lists for original scale data
ecg_predictions_original_scale = []
ecg_actuals_original_scale = []
ppg_original_scale = []

# Process each sequence
for i in range(len(ecg_predictions)):
    # Get subject and action for the current sequence
    subject = subjects[i]
    action = actions[i]

    # Retrieve the correct scalers
    scaler_input = scalers[(subject, action)]['input_scaler']
    scaler_target = scalers[(subject, action)]['target_scaler']

    # Inverse transform predictions and actuals for the current sequence
    ecg_pred = ecg_predictions[i].squeeze(-1).numpy()  # Shape: [sequence_length]
    ecg_act = ecg_actuals[i].squeeze(-1).numpy()       # Shape: [sequence_length]
    ppg_seq = ppg[i].numpy()                          # Shape: [sequence_length, 3]

    ecg_predictions_original_scale.append(scaler_target.inverse_transform(ecg_pred.reshape(-1, 1)).flatten())
    ecg_actuals_original_scale.append(scaler_target.inverse_transform(ecg_act.reshape(-1, 1)).flatten())
    ppg_original_scale.append(scaler_input.inverse_transform(ppg_seq))

# Convert back to arrays
ecg_predictions_original_scale = np.array(ecg_predictions_original_scale)
ecg_actuals_original_scale = np.array(ecg_actuals_original_scale)
ppg_original_scale = np.array(ppg_original_scale)

# Separate PPG channels 
red_ppg = ppg_original_scale[:, :, 0]  # Red PPG
ir_ppg = ppg_original_scale[:, :, 1]   # IR PPG
green_ppg = ppg_original_scale[:, :, 2]  # Green PPG


### Normalized Evaluation metrics
# Predictions and actual values (normalized and flattened)
ecg_predictions_arr = np.array(ecg_predictions).flatten()
ecg_actuals_arr = np.array(ecg_actuals).flatten()

# Calculate the range of the actual data for normalization
actual_range_normalized = np.ptp(ecg_actuals)  # Peak-to-peak (max - min)

# Euclidean Distance
euclidean_distance_normalized = euclidean(ecg_predictions_arr, ecg_actuals_arr)

# Dynamic Time Warping (DTW)
downsampling_factor_dtw = 10
batch_size_dtw = 10 
dtw_distance_normalized = compute_batched_dtw(ecg_predictions_arr, ecg_actuals_arr, batch_size_dtw, downsampling_factor_dtw)
# dtw_distance = alignment.distance

# Pearson Correlation
pearson_corr_normalized, _ = pearsonr(ecg_predictions_arr, ecg_actuals_arr)

# Spearman Correlation
spearman_corr_normalized, _ = spearmanr(ecg_predictions_arr, ecg_actuals_arr)

# Mean Squared Error (MSE)
mse_normalized = np.mean((ecg_predictions_arr - ecg_actuals_arr) ** 2)

# Mean Absolute Error (MAE)
mae_normalized = np.mean(np.abs(ecg_predictions_arr - ecg_actuals_arr))

# Root Mean Squared Error (RMSE)
rmse_normalized = np.sqrt(mse_normalized)

# Normalized Root Mean Squared Error (NRMSE)
nrmse_normalized = rmse_normalized / actual_range_normalized

# Normalized Mean Absolute Error (NMAE)
nmae_normalized = mae_normalized / actual_range_normalized

# Print metrics
metrics_normalized = {
    "Training_loss": training_loss,
    "Validation_loss": validation_loss,
    "Test_loss": test_loss,
    "Epochs": epochs, 
    "Euclidean Distance": euclidean_distance_normalized,
    "DTW Distance": dtw_distance_normalized,
    "Pearson Correlation": pearson_corr_normalized,
    "Spearman Correlation": spearman_corr_normalized,
    "MSE": mse_normalized,
    "MAE": mae_normalized,
    "RMSE": rmse_normalized,
    "NRMSE": nrmse_normalized,
    "NMAE": nmae_normalized,
    # "Parameters": config['parameters'],  # Add config file entries
    # "General": config['general'],
    # "Output": config['output'],
}

for metric, value in metrics_normalized.items():
    #print(f"{metric}: {value:.4f}")
    print(f"{metric}: {value}")

In [None]:
ecg_predictions_original_scale


In [None]:
### Plots
# Call a plot(...) method to create them
# Randomly select an index from the validation data
plt.figure(figsize=(10, 6))
plt.plot(epochs, training_loss,  label='Training Loss')
plt.plot(epochs, validation_loss, label='Validation Loss')
plt.yscale('log')
plt.title(f"Training and Validation Loss")
plt.xlabel('Epochs')
plt.ylabel('MSE Loss')
#plt.xticks(epochs)
plt.legend()

#plt.savefig(f"{results_folder}/{model_name}_loss_functions.png")

    # Repeat test loss across all epochs for visualization
test_losses = [test_loss] * len(epochs)

# Plot the losses
plt.figure(figsize=(10, 6))
plt.plot(epochs, training_loss, label="Training Loss", marker='o')
plt.plot(epochs, validation_loss, label="Validation Loss", marker='o')
plt.plot(epochs, test_losses, label="Test Loss", linestyle='--', color='red')

# Add labels, title, legend
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training, Validation, and Test Loss")
plt.legend()

#plt.savefig(f"{results_folder}/{model_name}_test_loss.png")

#random_index = np.random.randint(0, len(ecg_predictions_original_scale))
random_index = 1
ppg_scaling_factor = 1

# Select the corresponding actual and predicted ECG signals
ecg_predictions_random = ecg_predictions_original_scale[random_index]  # Predicted ECG signal
ecg_actuals_random = ecg_actuals_original_scale[random_index]  # Actual ECG signal

# Set the opacity value of alpha for the ppg signals
alpha = 0.3

# Plot the actual and predicted ECG
plt.figure(figsize=(10, 5))
plt.plot(ecg_actuals_random, label='Actual ECG')
plt.plot(ecg_predictions_random, label='Predicted ECG')
plt.title(f"ECG Prediction vs Actual (Sequence {random_index})")
plt.xlabel('Time Step')
plt.ylabel('ECG Signal')
plt.legend()

#plt.savefig(f"{results_folder}/{model_name}_random_seq.png")

# Plot the actual and predicted ECG with the input ppg signals
plt.figure(figsize=(10, 5))
plt.plot(ecg_actuals_random, label='Actual ECG')
plt.plot(ecg_predictions_random, label='Predicted ECG')
plt.plot(ppg_scaling_factor*red_ppg[random_index], label="Red PPG", alpha=alpha)
plt.plot(ppg_scaling_factor*ir_ppg[random_index], label="IR PPG", alpha=alpha)
plt.plot(ppg_scaling_factor*green_ppg[random_index], label="Green PPG", alpha=alpha)
plt.title(f"ECG Prediction vs Actual (Sequence {random_index}) with PPG signals")
plt.xlabel('Time Step')
plt.ylabel('ECG Signal')
plt.legend()

#plt.savefig(f"{results_folder}/{model_name}_random_seq_ppg.png")

print("Evaluation finished!")