In [None]:
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report
from biosppy.signals import ecg
from biosppy.features import time
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv1D, MaxPooling1D, Flatten

# Random seed
tf.random.set_seed(6950)

# Constants
SAMPLING_RATE = 500  # Hz

# Rhythm mapping
rhythm_mapping = {
    'AFIB': 'AFIB',
    'AF': 'AFIB',
    'SVT': 'GSVT',
    'AT': 'GSVT',
    'SAAWR': 'GSVT',
    'ST': 'GSVT',
    'AVNRT': 'GSVT',
    'AVRT': 'GSVT',
    'SB': 'SB',
    'SR': 'SR',
    'SA': 'SR'
}


# --- Step 1: Load and Preprocess ECG Data ---
def prepare_dataset(ecg_folder, diagnostics_df):
    signals, signal_labels, ecg_metadata = [], [], []

    for _, row in diagnostics_df.iterrows():
        file_name = row['FileName']
        rhythm_label = row['Rhythm']

        if pd.isnull(rhythm_label) or rhythm_label not in rhythm_mapping.values():
            continue

        ecg_file = os.path.join(ecg_folder, f"{file_name}.csv")
        if not os.path.exists(ecg_file):
            continue

        # Read the CSV and extract lead II data
        lead_ii = pd.read_csv(ecg_file, header=0).iloc[:, 1].values

        # Clean the signal using BioSPPy
        cleaned = ecg.ecg(signal=lead_ii, sampling_rate=SAMPLING_RATE, show=False)
        signals.append(cleaned['filtered'])
        signal_labels.append(rhythm_label)
        ecg_metadata.append({'sampling_rate': 500})

    return np.array(signals), np.array(signal_labels), ecg_metadata


# --- Step 2: Extract Time-Domain Features ---
def extract_time_features(signals, metadata):
    all_features = []
    for signal, mdata in zip(signals, metadata):
        # Extract time-domain features
        time_feats = time.time(signal=signal, sampling_rate=mdata["sampling_rate"])
        # Convert the ReturnTuple to a list by directly accessing its elements
        all_features.append(list(time_feats))  # Convert ReturnTuple to a list

    return np.array(all_features)


# --- Step 3: Build and Train Models ---
def build_mlp(input_shape):
    model = Sequential([
        Dense(128, activation='relu', input_shape=(input_shape,)),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dropout(0.3),
        Dense(32, activation='relu'),
        Dense(4, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model


def build_cnn(input_shape):
    model = Sequential([
        Conv1D(64, 3, activation='relu', input_shape=input_shape),
        MaxPooling1D(2),
        Conv1D(128, 3, activation='relu'),
        MaxPooling1D(2),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.3),
        Dense(4, activation='softmax')  # Assuming 4 classes
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model


# --- Main Pipeline ---
# Load diagnostics
diagnostics_file = "../../../../Datasets/12-lead electrocardiogram database/Diagnostics.xlsx"
ecg_folder = "../../../../Datasets/12-lead electrocardiogram database/ECGData"
diagnostics_df = pd.read_excel(diagnostics_file)
diagnostics_df['Rhythm'] = diagnostics_df['Rhythm'].map(rhythm_mapping)

# Prepare dataset
ecg_signals, ecg_labels, ecg_metadata = prepare_dataset(ecg_folder, diagnostics_df)
features = extract_time_features(ecg_signals, ecg_metadata)

# Encode labels
label_encoder = LabelEncoder()
ecg_labels_encoded = label_encoder.fit_transform(ecg_labels)

# Standardize features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(features_scaled, ecg_labels_encoded, test_size=0.2, random_state=42)

# One-hot encode labels
y_train_oh = tf.keras.utils.to_categorical(y_train, num_classes=4)
y_test_oh = tf.keras.utils.to_categorical(y_test, num_classes=4)

# Train MLP
mlp = build_mlp(X_train.shape[1])
mlp.fit(X_train, y_train_oh, epochs=100, batch_size=32, verbose=1)
mlp_eval = mlp.evaluate(X_test, y_test_oh, verbose=0)
print(f"MLP Accuracy: {mlp_eval[1]}")
mlp_predictions = np.argmax(mlp.predict(X_test), axis=1)
print(classification_report(y_test, mlp_predictions, target_names=label_encoder.classes_))

# Train CNN
X_train_cnn = np.expand_dims(X_train, axis=2)  # Reshape for CNN
X_test_cnn = np.expand_dims(X_test, axis=2)
cnn = build_cnn(X_train_cnn.shape[1:])
cnn.fit(X_train_cnn, y_train_oh, epochs=100, batch_size=32, verbose=1)
cnn_eval = cnn.evaluate(X_test_cnn, y_test_oh, verbose=0)
print(f"CNN Accuracy: {cnn_eval[1]}")
cnn_predictions = np.argmax(cnn.predict(X_test_cnn), axis=1)
print(classification_report(y_test, cnn_predictions, target_names=label_encoder.classes_))

2024-12-01 23:15:16.491075: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-12-01 23:15:16.501849: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-12-01 23:15:16.505274: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-01 23:15:16.514259: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
