In [None]:
import os
import numpy as np
import wfdb
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split

segment_length = 1280
data_folder = "training/"

def get_segment_label(ann, start_idx, end_idx):
    for i, sample in enumerate(ann.sample):
        if start_idx <= sample < end_idx:
            symbol = ann.symbol[i]
            if symbol != 'N':
                return 1
    return 0

def load_labeled_segments(folder_path, segment_length=1280):
    segments = []
    labels = []
    for file in os.listdir(folder_path):
        if file.endswith(".hea"):
            record_path = os.path.join(folder_path, os.path.splitext(file)[0])
            try:
                record = wfdb.rdrecord(record_path)
                ann = wfdb.rdann(record_path, 'atr')
                signal = record.p_signal[:, 0]

                for start in range(0, len(signal) - segment_length, segment_length):
                    end = start + segment_length
                    segment = signal[start:end]
                    label = get_segment_label(ann, start, end)
                    segments.append(segment)
                    labels.append(label)
            except Exception as e:
                print(f"Could not load {record_path}: {e}")
    return np.array(segments), np.array(labels)

def preprocess_signal(segments):
    normalized = (segments - np.mean(segments, axis=1, keepdims=True)) / np.std(segments, axis=1, keepdims=True)
    return normalized[..., np.newaxis]

def build_cnn_model(input_shape):
    model = models.Sequential([
        layers.Conv1D(32, 5, activation='relu', input_shape=input_shape),
        layers.MaxPooling1D(2),
        layers.Conv1D(64, 5, activation='relu'),
        layers.MaxPooling1D(2),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model


segments, labels = load_labeled_segments(data_folder, segment_length)
X = preprocess_signal(segments)
y = labels.reshape(-1, 1)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

model = build_cnn_model(X_train.shape[1:])
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))


model.save("models/ekg-anomaly-classifier.keras")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m609/609[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 46ms/step - accuracy: 0.7250 - loss: 0.5236 - val_accuracy: 0.8728 - val_loss: 0.3162
Epoch 2/10
[1m609/609[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 36ms/step - accuracy: 0.8883 - loss: 0.2869 - val_accuracy: 0.8913 - val_loss: 0.2781
Epoch 3/10
[1m609/609[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 35ms/step - accuracy: 0.9216 - loss: 0.2010 - val_accuracy: 0.9084 - val_loss: 0.2438
Epoch 4/10
[1m609/609[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 36ms/step - accuracy: 0.9505 - loss: 0.1285 - val_accuracy: 0.9088 - val_loss: 0.2625
Epoch 5/10
[1m609/609[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 56ms/step - accuracy: 0.9712 - loss: 0.0776 - val_accuracy: 0.8950 - val_loss: 0.3292
Epoch 6/10
[1m609/609[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 48ms/step - accuracy: 0.9846 - loss: 0.0450 - val_accuracy: 0.9057 - val_loss: 0.3504
Epoch 7/10
[1m6