In [1]:
import numpy as np
import sys, os
import wfdb
import pywt
import matplotlib.pyplot as plt
import pickle as pk
from collections import Counter
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score 
import keras
import tensorflow as tf
import keras.backend as K
from keras.models import Sequential
from keras.layers import *
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam


Using TensorFlow backend.


In [2]:
data_names = ['100', '101', '102', '103', '104', '105', '106', '107', 
              '108', '109', '111', '112', '113', '114', '115', '116', 
              '117', '118', '119', '121', '122', '123', '124', '200', 
              '201', '202', '203', '205', '207', '208', '209', '210', 
              '212', '213', '214', '215', '217', '219', '220', '221', 
              '222', '223', '228', '230', '231', '232', '233', '234']



In [3]:
#tamnoho de las ventanas derecha e izquierdas
widb = 99
wida = 160
n_samples = 650000 #<650000


In [4]:
labels = ['N', 'S', 'V', 'F', 'Q']
sub_labels = ['N', 'L', 'R', 'e', 'j', 'A', 'a', 'J', 'S', 'V', 'E', 'F', '/', 'f', 'Q']
sub = {'N':'N', 'L':'N', 'R':'N', 'e':'N', 'j':'N', 
       'A':'S', 'a':'S', 'J':'S', 'S':'S',
       'V':'V', 'E':'V',
       'F':'F',
       '/':'Q', 'f':'Q', 'Q':'Q'}
X = []
Y = []
for d in data_names:
    r=wfdb.rdrecord('./data/'+d)
    ann=wfdb.rdann('./data/'+d, 'atr', return_label_elements=['label_store', 'symbol'])
    if d!='114':
        sig = np.array(r.p_signal[:,0])
    else:
        sig = np.array(r.p_signal[:,1])
    sig_len = len(sig)
    sym = ann.symbol
    pos = ann.sample
    beat_len = len(sym)
    for i in range(beat_len):
        if sym[i] in labels and pos[i]-widb>=0 and pos[i]+wida+1<=sig_len:
            a = sig[pos[i]-widb:pos[i]+wida+1]
            if len(a) != 260:
                print("Length error")
                continue
            X.append(a)
            Y.append(labels.index(sub[sym[i]]))


In [5]:
X = np.array(X)
Y = np.array(Y)
print(X.shape)
print(Y.shape)
count = Counter(Y)
print(count)


(82986, 260)
(82986,)
Counter({0: 75020, 2: 7129, 3: 802, 4: 33, 1: 2})


In [6]:
seed_ = 200 #se puede cambiar para variar los datos y reproducir los resultados
data_len = len(X)
np.random.seed(seed_)
idx = list(range(data_len))


In [7]:
np.random.shuffle(idx)

train_len = int(data_len*0.6) # 60%
valid_len = int(data_len*0.2) # 20%
test_len = data_len-train_len-valid_len # 20%


In [8]:

X_train = X[idx][:train_len]
X_valid = X[idx][train_len:train_len+valid_len]
X_test = X[idx][train_len+valid_len:]
Y_train = Y[idx][:train_len]
Y_valid = Y[idx][train_len:train_len+valid_len]
Y_test = Y[idx][train_len+valid_len:]



print(X_train.shape)
print(X_valid.shape)
print(X_test.shape)
print(Counter(Y_train))
print(Counter(Y_valid))
print(Counter(Y_test))

(49791, 260)
(16597, 260)
(16598, 260)
Counter({0: 44982, 2: 4298, 3: 490, 4: 20, 1: 1})
Counter({0: 15027, 2: 1395, 3: 167, 4: 8})
Counter({0: 15011, 2: 1436, 3: 145, 4: 5, 1: 1})


In [9]:
X_train = np.expand_dims(X_train, axis=-1)
X_valid = np.expand_dims(X_valid, axis=-1)
X_test = np.expand_dims(X_test, axis=-1)


print(X_train.shape)
print(X_valid.shape)
print(X_test.shape)

(49791, 260, 1)
(16597, 260, 1)
(16598, 260, 1)


In [10]:
f_size = X_train.shape[1]
class_num = 5

lr = 0.01
batch_size=32

print(Y_train)
Y_train = keras.utils.to_categorical(Y_train, num_classes=class_num)
print(Y_train.shape)

[0 0 2 ... 0 0 0]
(49791, 5)


In [11]:
def make_model(model_type):
    model = Sequential()
    if model_type == '1D':
        model.add(Conv1D(10, 3, activation='relu', input_shape=(f_size,1)))
        model.add(MaxPooling1D(2))
        model.add(Conv1D(10, 3, activation='relu'))
        model.add(MaxPooling1D(2))
        model.add(Flatten())
        model.add(Dense(100, activation='relu'))
    elif model_type == '1D-large':
        model.add(Conv1D(50, 13, activation='relu', input_shape=(f_size,1)))
        model.add(MaxPooling1D(2))
        model.add(Conv1D(50, 13, activation='relu'))
        model.add(MaxPooling1D(2))
        model.add(Flatten())
        model.add(Dense(100, activation='relu'))
    elif model_type == 'LSTM':
        model.add(LSTM(64, return_sequences=True, dropout=0.1, input_shape=(f_size, 1)))
        model.add(LSTM(32, return_sequences=True, dropout=0.1))
        model.add(Flatten())
        model.add(Dense(128, activation='relu'))
        model.add(Dropout(0.2))
    elif model_type == 'BiLSTM':
        model.add(Bidirectional(LSTM(64, return_sequences=True, dropout=0.1), merge_mode='sum', input_shape=(f_size, 1)))
        model.add(Bidirectional(LSTM(32, return_sequences=True, dropout=0.1), merge_mode='sum'))
        model.add(Flatten())
        model.add(Dense(128, activation='relu'))
        model.add(Dropout(0.2))
    elif model_type == 'ConvLSTM':
        model = Sequential()
        model.add(Conv1D(10, 7, activation='relu', input_shape=(260,1)))
        model.add(MaxPooling1D(3))
        model.add(Conv1D(10 ,7, activation='relu'))
        model.add(MaxPooling1D(3))
        model.add(LSTM(32, return_sequences=True,recurrent_dropout=0.25))
        model.add(Flatten())
        model.add(Dense(32, activation='relu'))
        model.add(Dropout(0.2))


    model.add(Dense(class_num, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=lr), metrics=['accuracy'])
    return model
 

In [14]:
model = make_model('LSTM')
#model = make_model_cnn_lstm()
best_SE = 0
best_ACC = 0
patience = 30
pcnt = 0

best_model = make_model('LSTM')

bin_label = lambda x: min(1,x)
#model.build()
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 260, 64)           16896     
_________________________________________________________________
lstm_2 (LSTM)                (None, 260, 32)           12416     
_________________________________________________________________
flatten_3 (Flatten)          (None, 8320)              0         
_________________________________________________________________
dense_5 (Dense)              (None, 128)               1065088   
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 5)                 645       
Total params: 1,095,045
Trainable params: 1,095,045
Non-trainable params: 0
____________________________________________

In [None]:
save = 'best_train'
for e in range(1, 300+1):

    model.fit(X_train, Y_train, batch_size=batch_size, epochs=1, verbose=0)

    y_pred = model.predict(X_valid)
    y_pred = np.argmax(y_pred, axis=1)
    acc = np.sum(y_pred==Y_valid)/len(Y_valid)

    y_true = list(map(bin_label, Y_valid))
    y_pred = list(map(bin_label, y_pred))
    auc = roc_auc_score(y_true, y_pred)
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    SE = tp/(tp+fn)
    SP = tn/(fp+tn)

    if SE+acc > best_SE+best_ACC:
        best_SE, best_ACC = SE, acc
        best_model.set_weights(model.get_weights())
        pcnt = 0
    else:
        pcnt += 1
    
    print("Epoch: %d | SE: %.4f | Best SE: %.4f | ACC: %.4f | Best ACC: %.4f | AUC: %.4f | SP: %.4f" %(e, SE, best_SE, acc, best_ACC, auc, SP))
    if pcnt==patience:
        y_pred = best_model.predict(X_test)
        y_pred = np.argmax(y_pred, axis=1)
        acc = np.sum(y_pred==Y_test)/len(Y_test)
        y_true = list(map(bin_label, Y_test))
        y_pred = list(map(bin_label, y_pred))
        auc = roc_auc_score(y_true, y_pred)
        tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
        SE = tp/(tp+fn)
        SP = tn/(fp+tn)
        print("LSTM Test | SE: %.4f | ACC: %.4f | AUC: %.4f | SP: %.4f | valid SE: %.4f | valid ACC: %.4f" %(SE, acc, auc, SP, best_SE, best_ACC))
        with open("./result/"+save, "a") as fw:
            fw.write("SE: %.4f | ACC: %.4f | AUC: %.4f | SP: %.4f | valid SE: %.4f | valid ACC: %.4f\n" %(SE, acc, auc, SP, best_SE, best_ACC))
        break

model.save('LSTM_trained')


Epoch: 1 | SE: 0.9369 | Best SE: 0.9369 | ACC: 0.9820 | Best ACC: 0.9820 | AUC: 0.9645 | SP: 0.9921
Epoch: 2 | SE: 0.9146 | Best SE: 0.9369 | ACC: 0.9886 | Best ACC: 0.9820 | AUC: 0.9565 | SP: 0.9984
Epoch: 3 | SE: 0.9153 | Best SE: 0.9369 | ACC: 0.9889 | Best ACC: 0.9820 | AUC: 0.9569 | SP: 0.9986
Epoch: 4 | SE: 0.9013 | Best SE: 0.9369 | ACC: 0.9881 | Best ACC: 0.9820 | AUC: 0.9501 | SP: 0.9990
Epoch: 5 | SE: 0.9185 | Best SE: 0.9369 | ACC: 0.9892 | Best ACC: 0.9820 | AUC: 0.9584 | SP: 0.9983
Epoch: 6 | SE: 0.9102 | Best SE: 0.9369 | ACC: 0.9895 | Best ACC: 0.9820 | AUC: 0.9547 | SP: 0.9993
