In [1]:
import numpy as np
import pandas as pd
import os

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from tqdm import tqdm
import tensorflow as tf
# noinspection PyUnresolvedReferences
from tensorflow.keras.models import Sequential
# noinspection PyUnresolvedReferences
from tensorflow.keras.layers import Dense, Dropout, Conv1D, MaxPooling1D, Flatten, BatchNormalization

# Constants
SAMPLING_RATE = 500  # Hz
WINDOW_SIZE = 2 * SAMPLING_RATE
STEP_SIZE = 1 * SAMPLING_RATE
ecg_folder = "../../../../Datasets/12-lead electrocardiogram database/ECGData"
diagnostics_file = "../../../../Datasets/12-lead electrocardiogram database/Diagnostics.xlsx"

# Label mapping
rhythm_mapping = {
    'AFIB': 'AFIB',
    'AF': 'AFIB',
    'SVT': 'GSVT',
    'AT': 'GSVT',
    'SAAWR': 'GSVT',
    'ST': 'GSVT',
    'AVNRT': 'GSVT',
    'AVRT': 'GSVT',
    'SB': 'SB',
    'SR': 'SR',
    'SA': 'SR'
}

# Load diagnostics data
diagnostics_df = pd.read_excel(diagnostics_file)
diagnostics_df['Rhythm'] = diagnostics_df['Rhythm'].map(rhythm_mapping)

2024-11-28 07:39:52.528758: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-28 07:39:52.590734: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-28 07:39:52.608775: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-28 07:39:52.720743: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
diagnostics_df

Unnamed: 0,FileName,Rhythm,Beat,PatientAge,Gender,VentricularRate,AtrialRate,QRSDuration,QTInterval,QTCorrected,RAxis,TAxis,QRSCount,QOnset,QOffset,TOffset
0,MUSE_20180113_171327_27000,AFIB,RBBB TWC,85,MALE,117,234,114,356,496,81,-27,19,208,265,386
1,MUSE_20180112_073319_29000,SB,TWC,59,FEMALE,52,52,92,432,401,76,42,8,215,261,431
2,MUSE_20180111_165520_97000,SR,NONE,20,FEMALE,67,67,82,382,403,88,20,11,224,265,415
3,MUSE_20180113_121940_44000,SB,NONE,66,MALE,53,53,96,456,427,34,3,9,219,267,447
4,MUSE_20180112_122850_57000,AFIB,STDD STTC,73,FEMALE,162,162,114,252,413,68,-40,26,228,285,354
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10641,MUSE_20181222_204306_99000,GSVT,NONE,80,FEMALE,196,73,168,284,513,258,244,32,177,261,319
10642,MUSE_20181222_204309_22000,GSVT,NONE,81,FEMALE,162,81,162,294,482,110,-75,27,173,254,320
10643,MUSE_20181222_204310_31000,GSVT,NONE,39,MALE,152,92,152,340,540,250,38,25,208,284,378
10644,MUSE_20181222_204312_58000,GSVT,NONE,76,MALE,175,178,128,310,529,98,-83,29,205,269,360


In [3]:
def preprocess_ecg_data(ecg_folder, diagnostics_df):
    segments = []
    segment_labels = []

    for _, row in tqdm(diagnostics_df.iterrows(), total=diagnostics_df.shape[0]):
        file_name = row['FileName']
        rhythm_label = row['Rhythm']

        # Skip if rhythm label is invalid
        if pd.isnull(rhythm_label) or rhythm_label not in rhythm_mapping.values():
            print("Invalid rhythm label", rhythm_label)
            continue

        # Load ECG file
        ecg_file = os.path.join(ecg_folder, f"{file_name}.csv")
        if not os.path.exists(ecg_file):
            print("File not found", ecg_file)
            continue

        ecg_data = pd.read_csv(ecg_file, header=0).iloc[:, 0:12].values  # Shape: (n_samples, 12)
        ecg_data = ecg_data.astype(float)

        for start in range(0, len(ecg_data) - WINDOW_SIZE + 1, STEP_SIZE):
            segment = ecg_data[start:start + WINDOW_SIZE, :]  # Shape: (WINDOW_SIZE, 12)
            flat_segment = segment.flatten()  # Need a flat one for MLP etc
            segments.append(flat_segment)
            segment_labels.append(rhythm_label)

    return np.array(segments), np.array(segment_labels)


# Preprocess data
segments, segment_labels = preprocess_ecg_data(ecg_folder, diagnostics_df)

100%|██████████| 10646/10646 [01:03<00:00, 167.26it/s]


In [4]:
print(segments.shape)

(95814, 12000)


In [5]:
# Encode labels
label_encoder = LabelEncoder()
segment_labels_encoded = label_encoder.fit_transform(segment_labels)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(segments, segment_labels_encoded, test_size=0.2, random_state=42)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(76651, 12000) (19163, 12000) (76651,) (19163,)


In [6]:
# MLP
mlp = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    BatchNormalization(),
    Dropout(0.5),
    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(32, activation='relu'),
    Dense(len(label_encoder.classes_), activation='softmax')
])
mlp.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
mlp.fit(X_train, y_train, epochs=100, batch_size=1024, validation_data=(X_test, y_test))
y_pred_mlp = np.argmax(mlp.predict(X_test), axis=1)
print("MLP Classification Report:")
print(classification_report(y_test, y_pred_mlp, target_names=label_encoder.classes_))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1732758063.901434  250551 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1732758064.031253  250551 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1732758064.035045  250551 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1732758064.03998

Epoch 1/100


I0000 00:00:1732758101.993084  251397 service.cc:146] XLA service 0x7cf73c00ccc0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1732758101.993916  251397 service.cc:154]   StreamExecutor device (0): NVIDIA GeForce RTX 3070, Compute Capability 8.6
2024-11-28 07:41:42.095739: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-11-28 07:41:42.291527: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 8907




[1m16/75[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m0s[0m 11ms/step - accuracy: 0.2699 - loss: 2.1959

I0000 00:00:1732758103.869407  251397 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.3549 - loss: 1.7770

2024-11-28 07:41:50.527959: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 919824000 exceeds 10% of free system memory.
2024-11-28 07:41:52.541396: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 919824000 exceeds 10% of free system memory.


[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 131ms/step - accuracy: 0.3558 - loss: 1.7732 - val_accuracy: 0.6350 - val_loss: 0.9726
Epoch 2/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.5456 - loss: 1.1168 - val_accuracy: 0.7079 - val_loss: 0.7749
Epoch 3/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.6132 - loss: 0.9535 - val_accuracy: 0.7197 - val_loss: 0.7312
Epoch 4/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.6423 - loss: 0.8833 - val_accuracy: 0.7275 - val_loss: 0.7048
Epoch 5/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.6655 - loss: 0.8403 - val_accuracy: 0.7350 - val_loss: 0.6888
Epoch 6/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.6784 - loss: 0.8055 - val_accuracy: 0.7388 - val_loss: 0.6739
Epoch 7/100
[1m75/75[0m [32m━━━━━━━

2024-11-28 07:43:41.730520: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 919824000 exceeds 10% of free system memory.


[1m599/599[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
MLP Classification Report:
              precision    recall  f1-score   support

        AFIB       0.64      0.68      0.66      3963
        GSVT       0.82      0.78      0.80      4051
          SB       0.91      0.93      0.92      7037
          SR       0.81      0.77      0.79      4112

    accuracy                           0.81     19163
   macro avg       0.80      0.79      0.79     19163
weighted avg       0.82      0.81      0.81     19163



In [7]:
# Decision Tree
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train.reshape(X_train.shape[0], -1), y_train)
y_pred_dt = dt.predict(X_test.reshape(X_test.shape[0], -1))
print("Decision Tree Classification Report:")
print(classification_report(y_test, y_pred_dt, target_names=label_encoder.classes_))
print("Depth of the tree", dt.get_depth())
print("Leaf nodes of the tree", dt.get_n_leaves())

Decision Tree Classification Report:
              precision    recall  f1-score   support

        AFIB       0.35      0.35      0.35      3963
        GSVT       0.53      0.52      0.52      4051
          SB       0.73      0.74      0.73      7037
          SR       0.47      0.48      0.47      4112

    accuracy                           0.55     19163
   macro avg       0.52      0.52      0.52     19163
weighted avg       0.55      0.55      0.55     19163

Depth of the tree 95
Leaf nodes of the tree 9614


In [8]:
# Standardize input shape for CNN
X_train_cnn = X_train[..., np.newaxis]
X_test_cnn = X_test[..., np.newaxis]

In [9]:
print(X_train_cnn.shape, X_test_cnn.shape)

(76651, 12000, 1) (19163, 12000, 1)


In [10]:
# CNN
cnn = Sequential([
    Conv1D(64, kernel_size=3, activation='relu', input_shape=(X_train_cnn.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Conv1D(128, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])
cnn.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
cnn.fit(X_train_cnn, y_train, epochs=50, batch_size=64, validation_data=(X_test_cnn, y_test))
y_pred_cnn = np.argmax(cnn.predict(X_test_cnn), axis=1)
print("CNN Classification Report:")
print(classification_report(y_test, y_pred_cnn, target_names=label_encoder.classes_))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m1198/1198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - accuracy: 0.3493 - loss: 155.0322

2024-11-28 07:59:23.030230: W external/local_tsl/tsl/framework/bfc_allocator.cc:291] Allocator (GPU_0_bfc) ran out of memory trying to allocate 206.40MiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2024-11-28 07:59:23.030291: W external/local_tsl/tsl/framework/bfc_allocator.cc:291] Allocator (GPU_0_bfc) ran out of memory trying to allocate 206.40MiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2024-11-28 07:59:23.030306: W external/local_tsl/tsl/framework/bfc_allocator.cc:291] Allocator (GPU_0_bfc) ran out of memory trying to allocate 206.40MiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2024-11-28 07:59:23.030318: W external/local_tsl/tsl/framewor

[1m1198/1198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 56ms/step - accuracy: 0.3493 - loss: 154.9248 - val_accuracy: 0.3673 - val_loss: 1.2814
Epoch 2/50


2024-11-28 07:59:36.151961: W external/local_tsl/tsl/framework/bfc_allocator.cc:482] Allocator (GPU_0_bfc) ran out of memory trying to allocate 1.05GiB (rounded to 1130251520)requested by op 
2024-11-28 07:59:36.159893: I external/local_tsl/tsl/framework/bfc_allocator.cc:1039] BFCAllocator dump for GPU_0_bfc
2024-11-28 07:59:36.159941: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (256): 	Total Chunks: 4531, Chunks in use: 4531. 1.11MiB allocated for chunks. 1.11MiB in use in bin. 21.5KiB client-requested in use in bin.
2024-11-28 07:59:36.159954: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (512): 	Total Chunks: 21, Chunks in use: 21. 11.2KiB allocated for chunks. 11.2KiB in use in bin. 11.2KiB client-requested in use in bin.
2024-11-28 07:59:36.159961: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (1024): 	Total Chunks: 4, Chunks in use: 4. 4.2KiB allocated for chunks. 4.2KiB in use in bin. 4.0KiB client-requested in use in bin.
2024-1

ResourceExhaustedError: Graph execution error:

Detected at node StatefulPartitionedCall defined at (most recent call last):
  File "/home/denuvo-drm/miniconda3/envs/CompositeADLRecognition/lib/python3.10/runpy.py", line 196, in _run_module_as_main

  File "/home/denuvo-drm/miniconda3/envs/CompositeADLRecognition/lib/python3.10/runpy.py", line 86, in _run_code

  File "/home/denuvo-drm/miniconda3/envs/CompositeADLRecognition/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>

  File "/home/denuvo-drm/miniconda3/envs/CompositeADLRecognition/lib/python3.10/site-packages/traitlets/config/application.py", line 1075, in launch_instance

  File "/home/denuvo-drm/miniconda3/envs/CompositeADLRecognition/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 701, in start

  File "/home/denuvo-drm/miniconda3/envs/CompositeADLRecognition/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 205, in start

  File "/home/denuvo-drm/miniconda3/envs/CompositeADLRecognition/lib/python3.10/asyncio/base_events.py", line 603, in run_forever

  File "/home/denuvo-drm/miniconda3/envs/CompositeADLRecognition/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once

  File "/home/denuvo-drm/miniconda3/envs/CompositeADLRecognition/lib/python3.10/asyncio/events.py", line 80, in _run

  File "/home/denuvo-drm/miniconda3/envs/CompositeADLRecognition/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 534, in dispatch_queue

  File "/home/denuvo-drm/miniconda3/envs/CompositeADLRecognition/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 523, in process_one

  File "/home/denuvo-drm/miniconda3/envs/CompositeADLRecognition/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 429, in dispatch_shell

  File "/home/denuvo-drm/miniconda3/envs/CompositeADLRecognition/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 767, in execute_request

  File "/home/denuvo-drm/miniconda3/envs/CompositeADLRecognition/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 429, in do_execute

  File "/home/denuvo-drm/miniconda3/envs/CompositeADLRecognition/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 549, in run_cell

  File "/home/denuvo-drm/miniconda3/envs/CompositeADLRecognition/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3075, in run_cell

  File "/home/denuvo-drm/miniconda3/envs/CompositeADLRecognition/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3130, in _run_cell

  File "/home/denuvo-drm/miniconda3/envs/CompositeADLRecognition/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner

  File "/home/denuvo-drm/miniconda3/envs/CompositeADLRecognition/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3334, in run_cell_async

  File "/home/denuvo-drm/miniconda3/envs/CompositeADLRecognition/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3517, in run_ast_nodes

  File "/home/denuvo-drm/miniconda3/envs/CompositeADLRecognition/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3577, in run_code

  File "/tmp/ipykernel_250551/2600956276.py", line 13, in <module>

  File "/home/denuvo-drm/miniconda3/envs/CompositeADLRecognition/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/home/denuvo-drm/miniconda3/envs/CompositeADLRecognition/lib/python3.10/site-packages/keras/src/backend/tensorflow/trainer.py", line 318, in fit

  File "/home/denuvo-drm/miniconda3/envs/CompositeADLRecognition/lib/python3.10/site-packages/keras/src/backend/tensorflow/trainer.py", line 121, in one_step_on_iterator

Out of memory while trying to allocate 1130251416 bytes.
	 [[{{node StatefulPartitionedCall}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_one_step_on_iterator_49546]