In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from biosppy.signals import ecg 
import math
import keras
from keras.models import Model, Sequential
from keras.layers import *
from keras.callbacks import *
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import SelectFromModel
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

In [22]:

################ read data
y_traindata = pd.read_csv("y_train.csv", header=0)   
y_train = y_traindata.iloc[:,1].values


x_train = []
with open("X_train.csv") as f_train:
    for line in f_train.readlines()[1:]:
        s = list(map(int, line.split(',')[1:]))
        if len(s) < 17813:
            s.extend([0 for x in range(len(s), 17813)])
        x_train.append(s)
x_train = np.array(x_train)


################# split data
x_ktrain, x_ktest, y_ktrain, y_ktest = train_test_split(x_train, y_train, test_size=0.4, random_state=0)


################## downsampling

class0_idx = np.where(y_ktrain == 0)[0]

#for i range(k):
    
np.random.seed(0)
del_class0_idx = np.random.choice(class0_idx, size = len(class0_idx)-sum(y_ktrain == 2), replace=False) 
print('del_class0_idx:', del_class0_idx[:5])
x_ktrain = np.delete(x_ktrain, (del_class0_idx), axis = 0) 
y_ktrain = np.delete(y_ktrain, (del_class0_idx), axis = 0) 
print('downsampling shape:', sum(y_ktrain == 0), sum(y_ktrain == 1), sum(y_ktrain == 2), sum(y_ktrain == 3))



In [73]:
# ts (array) – Signal time axis reference (seconds).
# filtered (array) – Filtered ECG signal.
# rpeaks (array) – R-peak location indices.
# templates_ts (array) – Templates time axis reference (seconds).
# templates (array) – Extracted heartbeat templates.
# heart_rate_ts (array) – Heart rate time axis reference (seconds).
# heart_rate (array) – Instantaneous heart rate (bpm).
def feature_extraction(x):
    [ts, filtered_sig, rpeaks, temp_ts, temp, hr_ts, heart_rate]  = ecg.ecg(signal = x, sampling_rate=300.0, show=False)
    rpeaks = ecg.correct_rpeaks(signal=x, rpeaks=rpeaks, sampling_rate=300, tol=0.1)[0]
    extracted = np.zeros((temp.shape[0],(temp.shape[1]+4)))
    extracted[:,:temp.shape[1]] = temp
    rr_interval = np.diff(rpeaks)/300
    for i in range(1,len(rpeaks)-1):
        RR_curr =  rr_interval[i-1]
        RR_next = rr_interval[i]
        extracted[i,-4]=math.log(RR_curr)
        extracted[i,-3]=math.log(RR_next)        
    extracted[0,-3] =  math.log(rr_interval[0])
    extracted[-1,-4] = math.log(rr_interval[-1])  
    extracted[:,-2] =  np.mean(rr_interval)
    extracted[:,-1] = np.var(1/rr_interval)      
    return extracted, len(rpeaks)


In [85]:
[ts, filtered_sig, rpeaks, temp_ts, temp, hr_ts, heart_rate]  = ecg.ecg(signal = x_ktrain[0], sampling_rate=300.0, show=False)

In [None]:
list(np.mean(temp, axis=0))

In [94]:
filtered_sig.shape

(17813,)

In [74]:
train_data = np.apply_along_axis(feature_extraction, 1, x_ktrain)

In [75]:
test_data = np.apply_along_axis(feature_extraction, 1, x_ktest)

In [76]:
nn_input = []
nn_test = []
min_num = np.min((np.min(train_data[:,1]), np.min(test_data[:,1])))
for i in range(train_data[:,0].shape[0]):   
    nn_input.append(train_data[i,0][:min_num])
nn_input = np.array(nn_input)

for j in range(test_data[:,0].shape[0]):    
    nn_test.append(test_data[j,0][:min_num])
nn_test = np.array(nn_test)

In [77]:
print('train:', nn_input.shape)
print('test:', nn_test.shape)
nn_stand_input = []
nn_stand_test = []
scaler = StandardScaler()

stand_input = np.reshape(nn_input,(nn_input.shape[0]*min_num,nn_input.shape[2] ) )
stand_input = scaler.fit_transform(stand_input)
stand_input = stand_input.reshape((nn_input.shape[0],min_num, nn_input.shape[2] ))
print('stand_train:', stand_input.shape)

stand_test = np.reshape(nn_test,(nn_test.shape[0]*min_num,nn_test.shape[2] ) )
stand_test = scaler.transform(stand_test)
stand_test = stand_test.reshape((nn_test.shape[0],min_num, nn_test.shape[2] ))
print('stand_test:', stand_test.shape)

train: (2157, 7, 184)
test: (2047, 7, 184)
stand_train: (2157, 7, 184)
stand_test: (2047, 7, 184)


In [81]:
############# model construction
model = Sequential()
model.add(InputLayer((7,184,)))
model.add(LSTM(500, activation = None, return_sequences= False, return_state= False))
model.add(Dense(150, activation=None, kernel_initializer='random_uniform'))
model.add(Dense(50, activation=None, kernel_initializer='random_uniform'))
model.add(Dense(4, activation='softmax'))
optim = keras.optimizers.Adadelta()
model.compile(optimizer=optim,
          loss='categorical_crossentropy',
          metrics=['accuracy'])
model.summary()





_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_14 (LSTM)               (None, 500)               1370000   
_________________________________________________________________
dense_39 (Dense)             (None, 150)               75150     
_________________________________________________________________
dense_40 (Dense)             (None, 50)                7550      
_________________________________________________________________
dense_41 (Dense)             (None, 4)                 204       
Total params: 1,452,904
Trainable params: 1,452,904
Non-trainable params: 0
_________________________________________________________________


In [99]:
y_ktrain_encodeing = keras.utils.to_categorical(y_ktrain, 4)
print(y_ktrain[0])
model.fit(stand_input, y_ktrain_encodeing, epochs=100, verbose=1, batch_size=40, callbacks=[EarlyStopping(monitor='loss', patience=6)])

y_kpred = np.argmax(model.predict(stand_test), axis=1)

score = f1_score(y_ktest, y_kpred, average='micro')
print(score)

In [102]:
def feature_extraction(x):
    X = []

    [ts, filtered_sig, rpeaks, temp_ts, temp, hr_ts, heart_rate]  = ecg.ecg(signal = x, sampling_rate=300, show=False)
    rpeaks = ecg.correct_rpeaks(signal=x, rpeaks=rpeaks, sampling_rate=300, tol=0.1)
    
    peaks = x[rpeaks]
    if len(heart_rate) < 2:
        heart_rate = [0, 1]
    if len(hr_ts) < 2:
        hr_ts = [0, 1]
    
    X.append(np.mean(peaks))
    X.append(np.min(peaks))
    X.append(np.max(peaks))
    X.append(np.mean(np.diff(rpeaks)))
    X.append(np.min(np.diff(rpeaks)))
    X.append(np.max(np.diff(rpeaks)))
    X.append(np.mean(heart_rate))
    X.append(np.min(heart_rate))
    X.append(np.max(heart_rate))
    X.append(np.mean(np.diff(heart_rate)))
    X.append(np.min(np.diff(heart_rate)))
    X.append(np.max(np.diff(heart_rate)))
    X.append(np.mean(np.diff(hr_ts)))
    X.append(np.min(np.diff(hr_ts)))
    X.append(np.max(np.diff(hr_ts)))
    X.append(np.var(np.diff(hr_ts)))
    X.append(np.sum(filtered_sig))
    
    X += list(np.mean(temp, axis=0))
    X += list(np.min(temp, axis=0))
    X += list(np.max(temp, axis=0))
    X = np.array(X)
    

    return X



In [103]:
train_data = np.apply_along_axis(feature_extraction, 1, x_ktrain)
test_data = np.apply_along_axis(feature_extraction, 1, x_ktest)

In [106]:
print(x_ktrain.shape)
print(x_ktest.shape)

(2157, 17813)
(2047, 17813)
