In [1]:
import numpy as np
import pandas as pd
import os

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder, StandardScaler

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, Conv1D, MaxPooling1D, Flatten
from tensorflow.keras.utils import to_categorical

# Biosppy for signal processing
from biosppy.signals import ecg

# Constants
SAMPLING_RATE = 500  # Hz
ecg_folder = "../../../../Datasets/12-lead electrocardiogram database/ECGData"
diagnostics_file = "../../../../Datasets/12-lead electrocardiogram database/Diagnostics.xlsx"

# Label mapping
rhythm_mapping = {
    'AFIB': 'AFIB',
    'AF': 'AFIB',
    'SVT': 'GSVT',
    'AT': 'GSVT',
    'SAAWR': 'GSVT',
    'ST': 'GSVT',
    'AVNRT': 'GSVT',
    'AVRT': 'GSVT',
    'SB': 'SB',
    'SR': 'SR',
    'SA': 'SR'
}


def extract_comprehensive_features(ecg_signal):
    """
    Extract comprehensive features from entire ECG signal
    """
    # Process ECG signal
    try:
        ecg_processed = ecg.ecg(signal=ecg_signal, sampling_rate=SAMPLING_RATE, show=False)

        # R-peak related features
        rpeaks = ecg_processed['rpeaks']
        heart_rate = ecg_processed['heart_rate']

        # Time domain features
        features = [
            # Statistical features of raw signal
            np.mean(ecg_signal),
            np.std(ecg_signal),
            np.median(ecg_signal),
            np.min(ecg_signal),
            np.max(ecg_signal),

            # R-peak related features
            len(rpeaks),  # Number of R peaks
            np.mean(heart_rate) if len(heart_rate) > 0 else 0,
            np.std(heart_rate) if len(heart_rate) > 0 else 0,

            # R-R interval features
            np.mean(np.diff(rpeaks)) if len(rpeaks) > 1 else 0,
            np.std(np.diff(rpeaks)) if len(rpeaks) > 1 else 0,

            # Additional signal characteristics
            np.percentile(ecg_signal, 25),
            np.percentile(ecg_signal, 75),
        ]

        return features

    except Exception as e:
        print(f"Error processing signal: {e}")
        return [0] * 12


def prepare_dataset(ecg_folder, diagnostics_df):
    """
    Prepare dataset by processing whole ECG signals
    """
    signals = []
    signal_features = []
    signal_labels = []

    for _, row in diagnostics_df.iterrows():
        file_name = row['FileName']
        rhythm_label = row['Rhythm']

        # Skip if rhythm label is invalid
        if pd.isnull(rhythm_label) or rhythm_label not in rhythm_mapping.values():
            continue

        # Load ECG file (Lead 2)
        ecg_file = os.path.join(ecg_folder, f"{file_name}.csv")
        if not os.path.exists(ecg_file):
            continue

        # Load entire signal
        ecg_data = pd.read_csv(ecg_file, header=0).iloc[:, 1].values
        ecg_data = ecg_data.astype(float)

        # Extract features from whole signal
        features = extract_comprehensive_features(ecg_data)

        signals.append(ecg_data)
        signal_features.append(features)
        signal_labels.append(rhythm_label)

    return (np.array(signals),
            np.array(signal_features),
            np.array(signal_labels))


# Load diagnostics
diagnostics_df = pd.read_excel(diagnostics_file)
diagnostics_df['Rhythm'] = diagnostics_df['Rhythm'].map(rhythm_mapping)

# Prepare dataset
raw_signals, signal_features, signal_labels = prepare_dataset(ecg_folder, diagnostics_df)

# Encode labels
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(signal_labels)
onehot_labels = to_categorical(encoded_labels)

# Scale features
feature_scaler = StandardScaler()
scaled_features = feature_scaler.fit_transform(signal_features)

# Pad signals for neural network input
max_length = max(len(signal) for signal in raw_signals)
padded_signals = np.array([np.pad(signal, (0, max_length - len(signal))) for signal in raw_signals])


# Create model with multiple architectures
def create_hybrid_model(input_shape, num_classes):
    model = Sequential([
        # 1D Convolutional layers
        Conv1D(64, 3, activation='relu', input_shape=input_shape),
        MaxPooling1D(2),
        Conv1D(32, 3, activation='relu'),
        MaxPooling1D(2),

        # LSTM layer
        LSTM(50, return_sequences=False),

        # Dense layers with features
        Dense(64, activation='relu', input_shape=(scaled_features.shape[1],)),
        Dropout(0.3),
        Dense(32, activation='relu'),
        Dropout(0.2),

        # Output layer
        Dense(num_classes, activation='softmax')
    ])

    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    return model


# Stratified K-Fold Cross-Validation
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = []

for train_index, test_index in skf.split(padded_signals, signal_labels):
    # Split data
    X_train_signals = padded_signals[train_index]
    X_train_features = scaled_features[train_index]
    y_train = onehot_labels[train_index]

    X_test_signals = padded_signals[test_index]
    X_test_features = scaled_features[test_index]
    y_test = onehot_labels[test_index]

    # Create and train model
    model = create_hybrid_model(
        (X_train_signals.shape[1], 1),
        onehot_labels.shape[1]
    )

    # Training
    history = model.fit(
        [X_train_signals.reshape(-1, X_train_signals.shape[1], 1), X_train_features],
        y_train,
        epochs=50,
        batch_size=32,
        validation_split=0.2,
        verbose=1
    )

    # Evaluate
    y_pred = model.predict([
        X_test_signals.reshape(-1, X_test_signals.shape[1], 1),
        X_test_features
    ])
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_test_classes = np.argmax(y_test, axis=1)

    # Compute accuracy
    accuracy = np.mean(y_pred_classes == y_test_classes)
    cv_scores.append(accuracy)

    print("\nClassification Report:")
    print(classification_report(
        y_test_classes,
        y_pred_classes,
        target_names=label_encoder.classes_
    ))

# Print cross-validation results
print("\nCross-Validation Scores:", cv_scores)
print("Mean CV Accuracy:", np.mean(cv_scores))

2024-11-28 14:13:42.899166: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-28 14:13:42.963162: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-28 14:13:42.980816: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-28 14:13:43.106099: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  super().__init__(activity_regularizer=activity_regu

[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step


W0000 00:00:1732782355.362919   19010 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1732782355.363539   19010 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1732782355.364113   19010 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1732782355.364682   19010 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1732782355.365237   19010 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1732782355.365830   19010 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1732782355.366384   19010 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1732782355.366936   19010 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1732782355.367493   19010 gp


Classification Report:
              precision    recall  f1-score   support

        AFIB       0.71      0.89      0.79       445
        GSVT       0.86      0.67      0.75       462
          SB       0.94      0.91      0.93       778
          SR       0.80      0.82      0.81       445

    accuracy                           0.84      2130
   macro avg       0.83      0.82      0.82      2130
weighted avg       0.85      0.84      0.84      2130



W0000 00:00:1732782363.313446   19016 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1732782363.314087   19016 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1732782363.314721   19016 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1732782363.315346   19016 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1732782363.315983   19016 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1732782363.316629   19016 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1732782363.317337   19016 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1732782363.317995   19016 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1732782363.318700   19016 gp

[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step

Classification Report:
              precision    recall  f1-score   support

        AFIB       0.69      0.93      0.79       445
        GSVT       0.90      0.62      0.74       461
          SB       0.92      0.98      0.95       778
          SR       0.93      0.81      0.87       445

    accuracy                           0.86      2129
   macro avg       0.86      0.84      0.84      2129
weighted avg       0.87      0.86      0.85      2129



W0000 00:00:1732782754.892481   19016 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1732782754.893072   19016 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1732782754.893623   19016 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1732782754.894184   19016 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1732782754.894756   19016 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1732782754.895310   19016 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1732782754.895853   19016 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1732782754.896413   19016 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1732782754.896973   19016 gp

[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step

Classification Report:
              precision    recall  f1-score   support

        AFIB       0.71      0.93      0.80       445
        GSVT       0.87      0.64      0.74       461
          SB       0.93      0.97      0.95       778
          SR       0.92      0.80      0.86       445

    accuracy                           0.86      2129
   macro avg       0.86      0.84      0.84      2129
weighted avg       0.87      0.86      0.85      2129



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step

Classification Report:
              precision    recall  f1-score   support

        AFIB       0.72      0.93      0.81       445
        GSVT       0.93      0.62      0.75       461
          SB       0.91      0.99      0.95       778
          SR       0.89      0.81      0.85       445

    accuracy                           0.86      2129
   macro avg       0.86      0.84      0.84      2129
weighted avg       0.87      0.86      0.86      2129



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step

Classification Report:
              precision    recall  f1-score   support

        AFIB       0.71      0.93      0.80       445
        GSVT       0.88      0.66      0.75       462
          SB       0.96      0.93      0.95       777
          SR       0.87      0.88      0.88       445

    accuracy                           0.86      2129
   macro avg       0.85      0.85      0.84      2129
weighted avg       0.87      0.86      0.86      2129


Cross-Validation Scores: [0.8370892018779342, 0.8562705495537811, 0.8558008454673556, 0.8604978863316111, 0.85955847815876]
Mean CV Accuracy: 0.8538433922778885
