In [4]:
import os
import datetime
import pandas as pd 
import numpy as np
import shutil
import posixpath
import wfdb
import pywt
import seaborn
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

In [5]:
def denoise(data):
    # Wavelet transform
    coeffs = pywt.wavedec(data=data, wavelet='db5', level=9)
    cA9, cD9, cD8, cD7, cD6, cD5, cD4, cD3, cD2, cD1 = coeffs

    # Threshold denoising
    threshold = (np.median(np.abs(cD1)) / 0.6745) * (np.sqrt(2 * np.log(len(cD1))))
    cD1.fill(0)
    cD2.fill(0)
    for i in range(1, len(coeffs) - 2):
        coeffs[i] = pywt.threshold(coeffs[i], threshold)

    # Wavelet inverse transform, obtain the denoised signal
    rdata = pywt.waverec(coeffs=coeffs, wavelet='db5')
    return rdata


In [6]:
# Read ECG data and corresponding labels,
# and perform wavelet denoising on the data
def getDataSet(number, X_data, Y_data):
    ecgClassSet = ['N', 'A', 'V', 'L', 'R']

    # Reading ECG data records
    print("Being read" + number + " No. ECG data")
    record = wfdb.rdrecord('/content/drive/MyDrive/Colab Notebooks/ECG_rec/ecg_data/' + number, channel_names=['MLII'])
    data = record.p_signal.flatten()
    rdata = denoise(data=data)

    # Obtain the position of the R wave and the corresponding label in the ECG data record
    annotation = wfdb.rdann('/content/drive/MyDrive/Colab Notebooks/ECG_rec/ecg_data/' + number, 'atr')
    Rlocation = annotation.sample
    Rclass = annotation.symbol

    # Removal of before and after unstable data
    start = 10
    end = 5
    i = start
    j = len(annotation.symbol) - end

    # Because only the five NAVLR ECG types are selected, those data with specific labels needed for that record are selected, and the rest of the labeled points are discarded
    # X_data intercepts data points with a length of 300 before and after the R wave
    # Y_data converts NAVLR to 01234 in order
    while i < j:
        try:
            lable = ecgClassSet.index(Rclass[i])
            x_train = rdata[Rlocation[i] - 99:Rlocation[i] + 201]
            X_data.append(x_train)
            Y_data.append(lable)
            i += 1
        except ValueError:
            i += 1
    return


In [7]:
# Load the dataset and pre-process it
def loadData():
    numberSet = ['100', '101', '103', '105', '106', '107', '108', '109', '111', '112', '113', '114', '115',
                 '116', '117', '119', '121', '122', '123', '124', '200', '201', '202', '203', '205', '208',
                 '210', '212', '213', '214', '215', '217', '219', '220', '221', '222', '223', '228', '230',
                 '231', '232', '233', '234']
    dataSet = []
    lableSet = []
    for n in numberSet:
        getDataSet(n, dataSet, lableSet)

    # Turning numpy arrays, breaking the order
    dataSet = np.array(dataSet).reshape(-1, 300)
    lableSet = np.array(lableSet).reshape(-1, 1)
    train_ds = np.hstack((dataSet, lableSet))
    np.random.shuffle(train_ds)

    # Data sets and their label set
    X = train_ds[:, :300].reshape(-1, 300, 1)
    Y = train_ds[:, 300]

    # Test sets and their tag sets
    shuffle_index = np.random.permutation(len(X))
    test_length = int(RATIO * len(shuffle_index))
    test_index = shuffle_index[:test_length]
    train_index = shuffle_index[test_length:]
    X_test, Y_test = X[test_index], Y[test_index]
    X_train, Y_train = X[train_index], Y[train_index]
    return X_train, Y_train, X_test, Y_test



In [8]:
# Building CNN models
def buildModel():
    newModel = tf.keras.models.Sequential([
        tf.keras.layers.InputLayer(input_shape=(300, 1)),
        # First convolutional layer, 4 21x1 convolutional kernels
        tf.keras.layers.Conv1D(filters=4, kernel_size=21, strides=1, padding='SAME', activation='relu'),
        # First pooling layer, maximum pooling, four 3x1 convolutional kernels, step size 2
        tf.keras.layers.MaxPool1D(pool_size=3, strides=2, padding='SAME'),
        # Second convolutional layer, 16 23x1 convolutional kernels
        tf.keras.layers.Conv1D(filters=16, kernel_size=23, strides=1, padding='SAME', activation='relu'),
        # Second pooling layer, maximum pooling, four 3x1 convolutional kernels, step size 2
        tf.keras.layers.MaxPool1D(pool_size=3, strides=2, padding='SAME'),
        # Third convolutional layer, 32 25x1 convolutional kernels
        tf.keras.layers.Conv1D(filters=32, kernel_size=25, strides=1, padding='SAME', activation='relu'),
        # Third pooling layer, average pooling, four 3x1 convolutional kernels, step size 2
        tf.keras.layers.AvgPool1D(pool_size=3, strides=2, padding='SAME'),
        # Fourth convolutional layer, 64 27x1 convolutional kernels
        tf.keras.layers.Conv1D(filters=64, kernel_size=27, strides=1, padding='SAME', activation='relu'),
        # Flattening layer, convenient for full connection layer processing
        tf.keras.layers.Flatten(),
        # Fully connected layer, 128 nodes
        tf.keras.layers.Dense(128, activation='relu'),
        # Dropout Layer,dropout = 0.2
        tf.keras.layers.Dropout(rate=0.2),
        # Fully connected layer, 5 nodes
        tf.keras.layers.Dense(5, activation='softmax')
    ])
    return newModel


In [9]:
# Confusion Matrix
def plotHeatMap(Y_test, Y_pred):
    con_mat = confusion_matrix(Y_test, Y_pred)
    # Normalization
    plt.figure(figsize=(8, 8))
    seaborn.heatmap(con_mat, annot=True, fmt='.20g', cmap='Blues')
    plt.ylim(0, 5)
    plt.xlabel('Predicted labels')
    plt.ylabel('True labels')
    plt.show()

In [None]:
project_path = "D:\\python\\mit-bih_ecg_recognition\\"
log_dir = project_path + "logs\\" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
model_path = project_path + "ecg_model.h5"

RATIO = 0.3

# X_train,Y_train are all datasets and label sets
# X_test,Y_test are the split test set and label set
X_train, Y_train, X_test, Y_test = loadData()

if os.path.exists(model_path):
    model = tf.keras.models.load_model(filepath=model_path)
else:
    model = buildModel()
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    model.summary()
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
    model.fit(X_train, Y_train, epochs=30,
              batch_size=128,
              validation_split=RATIO,
              callbacks=[tensorboard_callback])
    model.save(filepath=model_path)

Y_pred = model.predict_classes(X_test)
plotHeatMap(Y_test, Y_pred)

Being read100 No. ECG data
Being read101 No. ECG data
Being read103 No. ECG data
Being read105 No. ECG data
Being read106 No. ECG data
Being read107 No. ECG data
Being read108 No. ECG data
Being read109 No. ECG data
Being read111 No. ECG data
Being read112 No. ECG data
Being read113 No. ECG data
Being read114 No. ECG data
Being read115 No. ECG data
Being read116 No. ECG data
Being read117 No. ECG data
Being read119 No. ECG data
Being read121 No. ECG data
Being read122 No. ECG data
Being read123 No. ECG data
Being read124 No. ECG data
Being read200 No. ECG data
Being read201 No. ECG data
Being read202 No. ECG data
Being read203 No. ECG data
Being read205 No. ECG data
Being read208 No. ECG data
Being read210 No. ECG data
Being read212 No. ECG data
Being read213 No. ECG data
Being read214 No. ECG data
Being read215 No. ECG data
Being read217 No. ECG data
Being read219 No. ECG data
Being read220 No. ECG data
Being read221 No. ECG data
Being read222 No. ECG data
Being read223 No. ECG data
B