## **ECG Diagnosis Code**

This code is based on the code developed here: https://doi.org/10.1038/s41467-020-15432-4

## Current Model

**Define Libraries**

In [20]:
from tensorflow.keras.layers import (
    Input, Conv1D, MaxPooling1D, Dropout, BatchNormalization, Activation, Add, Flatten, Dense)
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import (ModelCheckpoint, TensorBoard, ReduceLROnPlateau,
                                        CSVLogger, EarlyStopping)
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
import numpy as np
import h5py
import math
import pandas as pd
from tensorflow.keras.utils import Sequence
import numpy as np
import os

In [2]:
cwd = os.getcwd()

**Load the data**

In [5]:
path_to_hdf5 = cwd + '\\data\\ecg_tracings.hdf5'
dataset_name = 'tracings'
path_to_csv = cwd + '\\data\\gold_standard.csv'

labels = pd.read_csv(path_to_csv).values
f = h5py.File(path_to_hdf5, "r")
tracings = f[dataset_name].value
f.close()

  tracings = f[dataset_name].value


**Define the NN model**

In [6]:
class ResidualUnit(object):
    def __init__(self, n_samples_out, n_filters_out, kernel_initializer='he_normal',
                 dropout_keep_prob=0.8, kernel_size=17, preactivation=True,
                 postactivation_bn=False, activation_function='relu'):
        self.n_samples_out = n_samples_out
        self.n_filters_out = n_filters_out
        self.kernel_initializer = kernel_initializer
        self.dropout_rate = 1 - dropout_keep_prob
        self.kernel_size = kernel_size
        self.preactivation = preactivation
        self.postactivation_bn = postactivation_bn
        self.activation_function = activation_function

    def _skip_connection(self, y, downsample, n_filters_in):
        """Implement skip connection."""
        # Deal with downsampling
        if downsample > 1:
            y = MaxPooling1D(downsample, strides=downsample, padding='same')(y)
        elif downsample == 1:
            y = y
        else:
            raise ValueError("Number of samples should always decrease.")
        # Deal with n_filters dimension increase
        if n_filters_in != self.n_filters_out:
            # This is one of the two alternatives presented in ResNet paper
            # Other option is to just fill the matrix with zeros.
            y = Conv1D(self.n_filters_out, 1, padding='same',
                       use_bias=False, kernel_initializer=self.kernel_initializer)(y)
        return y

    def _batch_norm_plus_activation(self, x):
        if self.postactivation_bn:
            x = Activation(self.activation_function)(x)
            x = BatchNormalization(center=False, scale=False)(x)
        else:
            x = BatchNormalization()(x)
            x = Activation(self.activation_function)(x)
        return x

    def __call__(self, inputs):
        """Residual unit."""
        x, y = inputs
        n_samples_in = y.shape[1]
        downsample = n_samples_in // self.n_samples_out
        n_filters_in = y.shape[2]
        y = self._skip_connection(y, downsample, n_filters_in)
        # 1st layer
        x = Conv1D(self.n_filters_out, self.kernel_size, padding='same',
                   use_bias=False, kernel_initializer=self.kernel_initializer)(x)
        x = self._batch_norm_plus_activation(x)
        if self.dropout_rate > 0:
            x = Dropout(self.dropout_rate)(x)

        # 2nd layer
        x = Conv1D(self.n_filters_out, self.kernel_size, strides=downsample,
                   padding='same', use_bias=False,
                   kernel_initializer=self.kernel_initializer)(x)
        if self.preactivation:
            x = Add()([x, y])  # Sum skip connection and main connection
            y = x
            x = self._batch_norm_plus_activation(x)
            if self.dropout_rate > 0:
                x = Dropout(self.dropout_rate)(x)
        else:
            x = BatchNormalization()(x)
            x = Add()([x, y])  # Sum skip connection and main connection
            x = Activation(self.activation_function)(x)
            if self.dropout_rate > 0:
                x = Dropout(self.dropout_rate)(x)
            y = x
        return [x, y]


def get_model(n_classes, last_layer='sigmoid'):
    kernel_size = 16
    kernel_initializer = 'he_normal'
    signal = Input(shape=(4096, 12), dtype=np.float32, name='signal')
    x = signal
    x = Conv1D(64, kernel_size, padding='same', use_bias=False,
               kernel_initializer=kernel_initializer)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x, y = ResidualUnit(1024, 128, kernel_size=kernel_size,
                        kernel_initializer=kernel_initializer)([x, x])
    x, y = ResidualUnit(256, 196, kernel_size=kernel_size,
                        kernel_initializer=kernel_initializer)([x, y])
    x, y = ResidualUnit(64, 256, kernel_size=kernel_size,
                        kernel_initializer=kernel_initializer)([x, y])
    x, _ = ResidualUnit(16, 320, kernel_size=kernel_size,
                        kernel_initializer=kernel_initializer)([x, y])
    x = Flatten()(x)
    diagn = Dense(n_classes, activation=last_layer, kernel_initializer=kernel_initializer)(x)
    model = Model(signal, diagn)
    return model


**Load Parameters**

Loading the parameters for the model that were found in the paper. We will call this our first model

In [9]:
#There are 11 so need to figure out which one to use
path_to_model = cwd + '\\model\\model.hdf5'

loss = 'binary_crossentropy'
lr = 0.001
batch_size = 64
opt = Adam(lr)

model_1 = load_model(path_to_model, compile=False)
model_1.compile(loss=loss, optimizer=opt)

**Split Data (Make My own for my purposes)**

Don't have acess to all data (can only access 15%), so will break the data had into a train and validation set for better comparison with the simplified model

In [28]:
class ECGSequence(Sequence):
    @classmethod
    def get_seq(cls, tracings: np.ndarray, labels: np.ndarray=None, batch_size=8):
        train_seq = cls(tracings, labels, batch_size)
        return train_seq

    def __init__(self, tracings:np.ndarray, labels:np.ndarray=None, batch_size:int=8):
        if labels is None:
            self.y = None
        else:
            self.y = labels
        # Get tracings
        self.x = tracings
        self.batch_size = batch_size
        self.sample_count = tracings.shape[0]

    @property
    def n_classes(self):
        return self.y.shape[1]

    def __getitem__(self, idx):
        start = idx * self.batch_size
        end = start + self.batch_size
        if self.y is None:
            return np.array(self.x[start:end, :, :])
        else:
            return np.array(self.x[start:end, :, :]), np.array(self.y[start:end])

    def __len__(self):
        return math.ceil(self.sample_count / self.batch_size)

#Make a new sequence that just takes in data in an array and puts it into a sequence
#This way I can kfold on the arrays and make sequences each time

**Train Model**

We will also train the model with the data accessible for better comparison with the simplified model. We will call this the second model

In [27]:
def TrainModel2(train_seq, val_seq, loss, opt, callbacks):
    model_2 = get_model(train_seq.n_classes)
    model_2.compile(loss=loss, optimizer=opt)
    # Train neural network
    print('Training the second model')
    model_2.fit(train_seq,
        epochs=70,
        initial_epoch=0,  # If you are continuing a interrupted section change here
        callbacks=callbacks,
        validation_data=val_seq,
        verbose=1)
        
    return model_2

# Optimization settings
callbacks = [ReduceLROnPlateau(monitor='val_loss',
                                factor=0.1,
                                patience=7,
                                min_lr=lr / 100),
                EarlyStopping(monitor='val_loss', 
                            patience=9,  # Patience should be larger than the one in ReduceLROnPlateau
                            min_delta=0.00001)]
# Save the BEST and LAST model
'''callbacks += [ModelCheckpoint('./backup_model_last.hdf5'),
                ModelCheckpoint('./backup_model_best.hdf5', save_best_only=True)]'''


"callbacks += [ModelCheckpoint('./backup_model_last.hdf5'),\n                ModelCheckpoint('./backup_model_best.hdf5', save_best_only=True)]"

**Test Models**

Here will test both the old model and the newly trained model

## Simplified Model

Need to choose what model I want

## K-Fold

K-fold procedure for validation of the models

In [29]:
kf = KFold(n_splits=3, shuffle=True)

for train_index, test_index in kf.split(X = tracings[:,1,1], y = labels):

        X_train, X_test = tracings[train_index,:,:], tracings[test_index,:,:]
        y_train, y_test = labels[train_index], labels[test_index]

        train_seq = ECGSequence.get_seq(
                X_train, y_train, batch_size=64)

        test_seq = ECGSequence.get_seq(
                X_test, y_test, batch_size=64)

        #Train models
        model_2 = TrainModel2(train_seq, test_seq, loss, opt, callbacks)

        #Test models
        model_1_predict = model_1.predict(test_seq,  verbose=1)
        model_2_predict = model_2.predict(test_seq,  verbose=1)
        model_3_predict = 1

        #Find metric (use what they use)


Training the second model
Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
1/9 [==>...........................] - ETA: 0s - loss: 0.0132

KeyboardInterrupt: 