In [2]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
from scipy.signal import find_peaks, butter, filtfilt

# Constants
SAMPLING_RATE = 500  # Hz
ECG_FOLDER = "../../../Datasets/12-lead electrocardiogram database/ECGData"
DIAGNOSTICS_FILE = "../../../Datasets/12-lead electrocardiogram database/Diagnostics.xlsx"

# Rhythm Mapping
RHYTHM_MAPPING = {
    'AFIB': 'AFIB',
    'AF': 'AFIB',
    'SVT': 'GSVT',
    'AT': 'GSVT',
    'SAAWR': 'GSVT',
    'ST': 'GSVT',
    'AVNRT': 'GSVT',
    'AVRT': 'GSVT',
    'SB': 'SB',
    'SR': 'SR',
    'SA': 'SR'
}

# Hamilton-Tompkins QRS Detection
def preprocess_ecg(ecg_signal, sampling_rate=SAMPLING_RATE):
    """Preprocess ECG data: Bandpass filter and detect R-peaks."""
    # Bandpass filter (0.5 - 50 Hz)
    lowcut = 0.5
    highcut = 50.0
    nyquist = 0.5 * sampling_rate
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = butter(1, [low, high], btype='band')
    filtered_ecg = filtfilt(b, a, ecg_signal)

    # Derivative
    derivative = np.diff(filtered_ecg)

    # Squaring
    squared = derivative ** 2

    # Moving window integration
    window_size = int(0.150 * sampling_rate)  # 150ms
    integrated = np.convolve(squared, np.ones(window_size) / window_size, mode='same')

    # Find R-peaks
    r_peaks, _ = find_peaks(integrated, distance=sampling_rate * 0.6, height=np.mean(integrated))
    return r_peaks, filtered_ecg

def extract_features(ecg_data, r_peaks):
    """Extract features from ECG data and R-peaks."""
    if len(r_peaks) > 1:
        rr_intervals = np.diff(r_peaks) / SAMPLING_RATE * 1000  # in ms
        rr_mean = np.mean(rr_intervals)
        rr_std = np.std(rr_intervals)
    else:
        rr_mean = 0
        rr_std = 0

    ventricular_rate = 60000 / rr_mean if rr_mean > 0 else 0
    qrs_count = len(r_peaks)
    p2p_amplitude = np.max(ecg_data) - np.min(ecg_data)
    signal_energy = np.sum(ecg_data**2)

    return {
        "RRMean": rr_mean,
        "RRStd": rr_std,
        "VentricularRate": ventricular_rate,
        "QRSCount": qrs_count,
        "P2PAmplitude": p2p_amplitude,
        "SignalEnergy": signal_energy
    }

# Load and preprocess data
diagnostics_df = pd.read_excel(DIAGNOSTICS_FILE)
diagnostics_df['MappedRhythm'] = diagnostics_df['Rhythm'].map(RHYTHM_MAPPING)

features = []
labels = []

for idx, row in diagnostics_df.iterrows():
    try:
        ecg_file = os.path.join(ECG_FOLDER, row['FileName'] + '.csv')  # Add .csv extension
        if os.path.exists(ecg_file):
            ecg_data = pd.read_csv(ecg_file, skiprows=1, usecols=[1]).to_numpy().flatten()
            r_peaks, filtered_ecg = preprocess_ecg(ecg_data)
            feature = extract_features(filtered_ecg, r_peaks)
            features.append(feature)
            labels.append(row['MappedRhythm'])
    except Exception as e:
        print(f"Error processing file {row['FileName']}: {e}")

# Convert to DataFrame
features_df = pd.DataFrame(features)
features_df['Label'] = labels

# Encode labels
label_encoder = LabelEncoder()
features_df['EncodedLabel'] = label_encoder.fit_transform(features_df['Label'])

# Split data
X = features_df.drop(['Label', 'EncodedLabel'], axis=1).values
y = features_df['EncodedLabel'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=6950, stratify=y)

# Normalize features for MLP
X_train_norm = (X_train - np.mean(X_train, axis=0)) / np.std(X_train, axis=0)
X_test_norm = (X_test - np.mean(X_train, axis=0)) / np.std(X_train, axis=0)

# Decision Tree Classifier
dt_classifier = DecisionTreeClassifier(random_state=6950)
dt_classifier.fit(X_train, y_train)
dt_preds = dt_classifier.predict(X_test)
print("Decision Tree Classification Report:")
print(classification_report(y_test, dt_preds, target_names=label_encoder.classes_, digits=5))

# MLP Classifier
mlp_model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(len(label_encoder.classes_), activation='softmax')
])

mlp_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
mlp_model.fit(X_train_norm, y_train, epochs=200, batch_size=128, validation_split=0.2)

mlp_preds = np.argmax(mlp_model.predict(X_test_norm), axis=1)
print("MLP Classification Report:")
print(classification_report(y_test, mlp_preds, target_names=label_encoder.classes_, digits=5))

Decision Tree Classification Report:
              precision    recall  f1-score   support

        AFIB    0.54371   0.57303   0.55799       445
        GSVT    0.59570   0.59957   0.59763       462
          SB    0.96144   0.96144   0.96144       778
          SR    0.86124   0.80899   0.83430       445

    accuracy                        0.76995      2130
   macro avg    0.74052   0.73576   0.73784      2130
weighted avg    0.77390   0.76995   0.77168      2130

Epoch 1/200


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 31ms/step - accuracy: 0.5287 - loss: 1.2187 - val_accuracy: 0.7025 - val_loss: 0.7503
Epoch 2/200
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7173 - loss: 0.6862 - val_accuracy: 0.7477 - val_loss: 0.5681
Epoch 3/200
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7507 - loss: 0.5860 - val_accuracy: 0.7594 - val_loss: 0.5294
Epoch 4/200
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7709 - loss: 0.5401 - val_accuracy: 0.7576 - val_loss: 0.5129
Epoch 5/200
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7748 - loss: 0.5315 - val_accuracy: 0.7670 - val_loss: 0.5016
Epoch 6/200
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7758 - loss: 0.5179 - val_accuracy: 0.7752 - val_loss: 0.4933
Epoch 7/200
[1m54/54[0m [32m━━━━━━━━━━━━━━