In [1]:
import wfdb
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, GRU, Conv1D, MaxPooling1D, Flatten, Dense, GlobalAveragePooling1D, BatchNormalization, Activation, Add, Input
from tensorflow.keras.applications import MobileNet
from tensorflow.keras.optimizers import Adam
import xgboost as xgb
import lightgbm as lgb
import matplotlib.pyplot as plt
import pickle

In [2]:
normal_classes = ['N', 'L', 'R', 'e', 'j']
abnormal_classes = ['A', 'a', 'J', 'S']

beat_length = 50

mapping = {'N': 0, 'L': 0, 'R': 0, 'e': 0, 'j': 0,
        'A': 1, 'a': 1, 'J': 1, 'S': 1}

samples = []
sample_labels = []

for j in range(100,235):
    path = "../dataset/MIT-BIH/{}".format(j)
    try:
        signals, fields = wfdb.rdsamp(path)

        annotation = wfdb.rdann(path, 'atr')

        signal = signals[:, 0]  
        labels = annotation.symbol

        for i in range(len(labels)):
            if labels[i] in normal_classes or labels[i] in abnormal_classes:
                
                beat_start = annotation.sample[i]
                beat_end = annotation.sample[i+1] if i+1 < len(annotation.sample) else len(signal)
                beat = signal[beat_start:beat_end]

                
                if len(beat) < beat_length:
                    padded_beat = np.pad(beat, (0, beat_length - len(beat)), mode='constant')
                    samples.append(padded_beat)
                else:
                    truncated_beat = beat[:beat_length]
                    samples.append(truncated_beat)
                sample_labels.append(mapping[labels[i]])

        # for i in range(len(labels)):
        #     if labels[i] in normal_classes or labels[i] in abnormal_classes:
            
        #         beat_start = annotation.sample[i]-int(beat_length/2)
        #         beat_end = annotation.sample[i]+int(beat_length/2)
        #         beat = signal[beat_start:beat_end]
        #         samples.append(beat)
        #         sample_labels.append(mapping[labels[i]])    
        
    
    except:
        continue


X = np.array(samples)
y = np.array(sample_labels)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [3]:
samples[0]

array([ 0.84 ,  0.765,  0.52 ,  0.17 , -0.165, -0.365, -0.435, -0.425,
       -0.37 , -0.33 , -0.325, -0.335, -0.345, -0.33 , -0.325, -0.315,
       -0.31 , -0.32 , -0.335, -0.34 , -0.325, -0.345, -0.335, -0.33 ,
       -0.335, -0.33 , -0.325, -0.33 , -0.33 , -0.345, -0.355, -0.335,
       -0.325, -0.305, -0.32 , -0.32 , -0.33 , -0.34 , -0.335, -0.34 ,
       -0.345, -0.355, -0.355, -0.34 , -0.33 , -0.33 , -0.33 , -0.34 ,
       -0.35 , -0.325])

In [4]:
len(sample_labels)

93412

### Random Forest

In [5]:
clf = RandomForestClassifier(n_estimators=100, random_state=42)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

print(classification_report(y_test, y_pred))
# pickle.dump(rf, open('../model/randomForest', "wb"))

              precision    recall  f1-score   support

           0       0.99      1.00      0.99     18150
           1       0.90      0.53      0.67       533

    accuracy                           0.98     18683
   macro avg       0.94      0.77      0.83     18683
weighted avg       0.98      0.98      0.98     18683



### CNN

In [36]:
X_train_cnn = np.expand_dims(X_train, axis=-1).astype(np.float32)
X_test_cnn = np.expand_dims(X_test, axis=-1).astype(np.float32)
# X_train_cnn = np.asarray(X_train_cnn).astype(np.float32)
# X_test_cnn = np.asarray(X_test_cnn).astype(np.float32)

model_cnn = Sequential()
model_cnn.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(beat_length, 1)))
model_cnn.add(MaxPooling1D(pool_size=2))
model_cnn.add(Flatten())
model_cnn.add(Dense(units=64, activation='relu'))
model_cnn.add(Dense(units=1, activation='sigmoid'))


model_cnn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


model_cnn.fit(X_train_cnn, y_train, batch_size=32, epochs=10, validation_data=(X_test_cnn, y_test))

# _, accuracy = model_cnn.evaluate(X_test_cnn, y_test)
# print("CNN Accuracy:", accuracy)
y_pred = model_cnn.predict(X_test_cnn)
y_pred_final = [int(i) if i<0.5 else 1 for i in y_pred] 
print(classification_report(y_test, y_pred_final))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
              precision    recall  f1-score   support

           0       0.99      1.00      0.99     18150
           1       0.82      0.53      0.64       533

    accuracy                           0.98     18683
   macro avg       0.90      0.76      0.82     18683
weighted avg       0.98      0.98      0.98     18683



### GRU

In [37]:
X_train_gru = np.expand_dims(X_train, axis=-1)
X_test_gru = np.expand_dims(X_test, axis=-1)


model_gru = Sequential()
model_gru.add(GRU(units=64, input_shape=X_train_gru[0].shape))
model_gru.add(Dense(units=1, activation='sigmoid'))


model_gru.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model_gru.fit(X_train_gru, y_train, batch_size=32, epochs=10, validation_data=(X_test_gru, y_test))

# _, accuracy = model_gru.evaluate(X_test_gru, y_test)
# print("GRU Accuracy:", accuracy)
y_pred = model_gru.predict(X_test_gru)
y_pred_final = [int(i) if i<0.5 else 1 for i in y_pred] 
print(classification_report(y_test, y_pred_final))


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
              precision    recall  f1-score   support

           0       0.99      1.00      0.99     18150
           1       0.80      0.55      0.65       533

    accuracy                           0.98     18683
   macro avg       0.90      0.77      0.82     18683
weighted avg       0.98      0.98      0.98     18683



### SVM

In [13]:

X_train_svm = X_train.reshape(X_train.shape[0], -1)
X_test_svm = X_test.reshape(X_test.shape[0], -1)

svm = SVC()
svm.fit(X_train_svm, y_train)

y_pred_svm = svm.predict(X_test_svm)

print(classification_report(y_test, y_pred_svm))


              precision    recall  f1-score   support

           0       0.98      1.00      0.99     18150
           1       0.84      0.42      0.56       533

    accuracy                           0.98     18683
   macro avg       0.91      0.71      0.78     18683
weighted avg       0.98      0.98      0.98     18683



### XGBoost

In [14]:
X_train_xgb = X_train.reshape(X_train.shape[0], -1)
X_test_xgb = X_test.reshape(X_test.shape[0], -1)

xgboost = xgb.XGBClassifier()
xgboost.fit(X_train_xgb, y_train)

y_pred_xgb = xgboost.predict(X_test_xgb)

print(classification_report(y_test, y_pred_xgb))


              precision    recall  f1-score   support

           0       0.99      1.00      0.99     18150
           1       0.85      0.55      0.67       533

    accuracy                           0.98     18683
   macro avg       0.92      0.77      0.83     18683
weighted avg       0.98      0.98      0.98     18683



### LightGBM

In [15]:
X_train_lgb = X_train.reshape(X_train.shape[0], -1)
X_test_lgb = X_test.reshape(X_test.shape[0], -1)

lgbm = lgb.LGBMClassifier()
lgbm.fit(X_train_lgb, y_train)

y_pred_lgb = lgbm.predict(X_test_lgb)

print(classification_report(y_test, y_pred_lgb))


              precision    recall  f1-score   support

           0       0.99      1.00      0.99     18150
           1       0.85      0.53      0.65       533

    accuracy                           0.98     18683
   macro avg       0.92      0.77      0.82     18683
weighted avg       0.98      0.98      0.98     18683



### ResNet

In [38]:
def residual_block(x, filters, kernel_size, dilation_rate):
    y = Conv1D(filters=filters, kernel_size=kernel_size, dilation_rate=dilation_rate, padding='same')(x)
    y = BatchNormalization()(y)
    y = Activation('relu')(y)

    y = Conv1D(filters=filters, kernel_size=kernel_size, dilation_rate=dilation_rate, padding='same')(y)
    y = BatchNormalization()(y)

    if x.shape[-1] != filters:
        x = Conv1D(filters=filters, kernel_size=1, padding='same')(x)
    y = Add()([x, y])
    y = Activation('relu')(y)
    
    return y

def build_resnet(input_shape, num_classes):
    inputs = Input(shape=input_shape)

    x = Conv1D(filters=64, kernel_size=7, padding='same')(inputs)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = residual_block(x, filters=64, kernel_size=3, dilation_rate=1)
    x = residual_block(x, filters=64, kernel_size=3, dilation_rate=2)
    x = residual_block(x, filters=64, kernel_size=3, dilation_rate=4)

    x = GlobalAveragePooling1D()(x)
    outputs = Dense(units=num_classes, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=outputs)
    return model

X_train_resnet = np.expand_dims(X_train, axis=-1)
X_test_resnet = np.expand_dims(X_test, axis=-1)
input_shape = X_train_resnet.shape[1:]

model_resnet = build_resnet(input_shape, num_classes=2)


model_resnet.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model_resnet.fit(X_train_resnet, y_train, batch_size=32, epochs=10, validation_data=(X_test_resnet, y_test))

# _, accuracy = model_resnet.evaluate(X_test_resnet, y_test)
# print("ResNet Accuracy:", accuracy)
y_pred = model_resnet.predict(X_test_resnet)
# y_pred = y_pred.reshape(1,-1)
# y_pred_ok = y_pred[0]
# y_pred_ok
# y_pred_final = [int(i) if i<0.5 else 1 for i in y_pred_ok] 
# print(classification_report(y_test, y_pred_final))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


ValueError: Found input variables with inconsistent numbers of samples: [18683, 37366]

In [57]:
# y_pred_ok = y_pred
# y_pred_ok
# y_pred_final = [int(i) if i<0.5 else 1 for i in y_pred_ok] 
# print(classification_report(y_test, y_pred_final))
# y_pred = model_resnet.predict(X_test_resnet)
# y_pred.shape[0]
# len(y_test)

18683

In [60]:
# y_pred_0 = y_pred[:,0]
# y_pred_final = [int(i) if i<0.5 else 1 for i in y_pred_0] 
# print(classification_report(y_test, y_pred_final))
y_pred_1 = y_pred[:,1]
y_pred_final = [int(i) if i<0.5 else 1 for i in y_pred_1] 
print(classification_report(y_test, y_pred_final))

              precision    recall  f1-score   support

           0       0.28      0.01      0.02     18150
           1       0.01      0.30      0.02       533

    accuracy                           0.02     18683
   macro avg       0.14      0.16      0.02     18683
weighted avg       0.27      0.02      0.02     18683

              precision    recall  f1-score   support

           0       0.99      0.99      0.99     18150
           1       0.72      0.70      0.71       533

    accuracy                           0.98     18683
   macro avg       0.86      0.84      0.85     18683
weighted avg       0.98      0.98      0.98     18683



In [None]:

print(classification_report(y_test, y_pred_final))

### LSTM

In [61]:

X_train_lstm = np.expand_dims(X_train, axis=-1)
X_test_lstm = np.expand_dims(X_test, axis=-1)

model_lstm = Sequential()
model_lstm.add(LSTM(units=64, input_shape=X_train_lstm[0].shape))
model_lstm.add(Dense(units=1, activation='sigmoid'))

model_lstm.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model_lstm.fit(X_train_lstm, y_train, batch_size=32, epochs=10, validation_data=(X_test_lstm, y_test))

# _, accuracy = model_lstm.evaluate(X_test_lstm, y_test)
# print("LSTM Accuracy:", accuracy)
y_pred = model_lstm.predict(X_test_lstm)
y_pred_final = [int(i) if i<0.5 else 1 for i in y_pred] 
print(classification_report(y_test, y_pred_final))


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
              precision    recall  f1-score   support

           0       0.99      1.00      0.99     18150
           1       0.77      0.53      0.63       533

    accuracy                           0.98     18683
   macro avg       0.88      0.76      0.81     18683
weighted avg       0.98      0.98      0.98     18683

