In [1]:
# import the necessary modules
import pandas as pd
import numpy as np
import os
#waveform-database:A library of tools for reading, writing, and processing WFDB signals and annotations.
import wfdb 

%matplotlib inline
import matplotlib as mlp
import matplotlib.pyplot as plt

mlp.rc("xtick",labelsize=12)
mlp.rc("ytick",labelsize=12)
mlp.rc("axes",labelsize=14)

MAIN_PATH = "/home/moon/gitWorkspace/MachineLearning/ECG-Arrythmia"
DATA_PATH = "/home/moon/gitWorkspace/MachineLearning/ECG-Arrythmia/mit-database"
FIG_PATH="/home/moon/gitWorkspace/MachineLearning/ECG-Arrythmia"
FIGURE_PATH = os.path.join(FIG_PATH,"saved_figures")
os.makedirs("saved_figures",exist_ok=True)
os.chdir(DATA_PATH)

#a function to save plotted figures
def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(FIGURE_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)


In [2]:
sample = wfdb.io.rdsamp("100")


In [3]:
sample_MLII = sample[0][:,0]
sample_V5 = sample[0][:,1]


 ## Importing and Preparing Data

In [4]:
# RECORDS file contains all the record names of the corresponding patients
records = np.loadtxt(os.path.join(DATA_PATH,"RECORDS"), dtype=str)
print(records)
print(len(records))

['100' '101' '102' '103' '104' '105' '106' '107' '108' '109' '111' '112'
 '113' '114' '115' '116' '117' '118' '119' '121' '122' '123' '124' '200'
 '201' '202' '203' '205' '207' '208' '209' '210' '212' '213' '214' '215'
 '217' '219' '220' '221' '222' '223' '228' '230' '231' '232' '233' '234']
48


In [5]:
def read_data():
    dataset = []
    for record in records:
        temp = wfdb.rdrecord(record)
        dataset.append(temp.p_signal)
    return dataset

In [6]:
dataset = read_data()


## Valid and Invalid beats 

In [7]:
invalid_beat = [
    "[", "!", "]", "x", "(", ")", "p", "t", 
    "u", "`", "'", "^", "|", "~", "+", "s", 
    "T", "*", "D", "=",'"', "@", "B", "a", "J", "S",
    "r", "F", "e", "j", "n", "f", "Q", "?"
]

label_beats=[
    "A", "L", "/", "V", "R", "E", "N"
]

## Beat Segmentation

In [8]:
def classify(symbol):
    if symbol in label_beats:
        return 1
    else:
        return 0
    
def segment(signal_MLII, beat_loc):
    window=180
    count=1
    x=beat_loc-window
    y=beat_loc+window
    samp=signal_MLII[x:y]
    return samp

all_signals=[]
all_labels=[]
for record in records:
    temp=wfdb.rdrecord(record)
    annot_temp=wfdb.rdann(record, 'atr')
    ann_sample=annot_temp.sample
    ann_symbol=annot_temp.symbol
    fs=temp.fs
    signal=temp.p_signal
    signal_MLII=signal[:,0]
    for i, i_sample in enumerate(ann_sample):
        label_dec=classify(ann_symbol[i])
        segmentation=segment(signal_MLII, i_sample)
        if label_dec==1 and len(segmentation)==360:
            all_signals.append(segmentation)
            all_labels.append(ann_symbol[i])


In [9]:
# vstack Stacks arrays in sequence vertically (row wise).
all_signals_fin=np.vstack(all_signals)

In [10]:
print(all_signals_fin.shape)

(107141, 360)


## Visualizing Segmentated Data

In [11]:
labels_fin=pd.Series(all_labels)

## Check distribution of types of data

In [12]:
labels_fin.value_counts()

N    75011
L     8071
R     7255
V     7129
/     7023
A     2546
E      106
dtype: int64

## Resampling

In [13]:
len(all_signals_fin)
labels_array=np.array(all_labels)

In [14]:
df_0=all_signals_fin[labels_fin=='/']
df_1=all_signals_fin[labels_fin=='A']
df_2=all_signals_fin[labels_fin=='E']
df_3=all_signals_fin[labels_fin=='L']
df_4=all_signals_fin[labels_fin=='N']
df_5=all_signals_fin[labels_fin=='R']
df_6=all_signals_fin[labels_fin=='V']
y_df0=labels_array[labels_fin=='/']
print(len(df_0))
print(y_df0)
len(y_df0)

7023
['/' '/' '/' ... '/' '/' '/']


7023

In [15]:
from sklearn.utils import resample

In [16]:
df0_sampled=resample(df_0, replace=True, n_samples=20000, random_state=42)
df1_sampled=resample(df_1, replace=True, n_samples=20000, random_state=42)
df2_sampled=resample(df_2, replace=True, n_samples=20000, random_state=42)
df3_sampled=resample(df_3, replace=True, n_samples=20000, random_state=42)
df4_sampled=resample(df_4, replace=True, n_samples=20000, random_state=42)
df5_sampled=resample(df_5, replace=True, n_samples=20000, random_state=42)
df6_sampled=resample(df_6, replace=True, n_samples=20000, random_state=42)


In [17]:
y_0=['/']*len(df0_sampled)
y_1=['A']*len(df1_sampled)
y_2=['E']*len(df2_sampled)
y_3=['L']*len(df3_sampled)
y_4=['N']*len(df4_sampled)
y_5=['R']*len(df5_sampled)
y_6=['V']*len(df6_sampled)
    

In [18]:
X_final=[]
X_final.extend(df0_sampled)
X_final.extend(df1_sampled)
X_final.extend(df2_sampled)
X_final.extend(df3_sampled)
X_final.extend(df4_sampled)
X_final.extend(df5_sampled)
X_final.extend(df6_sampled)

In [19]:
y_final=[]
y_final.extend(y_0)
y_final.extend(y_1)
y_final.extend(y_2)
y_final.extend(y_3)
y_final.extend(y_4)
y_final.extend(y_5)
y_final.extend(y_6)

## Standarization of the data

In [20]:
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()

In [21]:
scaled=scaler.fit_transform(X_final)

In [22]:
def check_strat(y):
    dummy = pd.DataFrame(y,columns=["Labels"])
    print("Value distribution:\n")
    count = dummy["Labels"].value_counts()
    percent = dummy["Labels"].value_counts(normalize=True).mul(100).round(2)
    print(pd.concat([count,percent],axis=1,keys=["Counts","Percent"]))

## Splitting Data into Train and Test Set

In [23]:
from sklearn.model_selection import StratifiedShuffleSplit
strad=StratifiedShuffleSplit()

In [24]:
assin_strad=strad.split(scaled, y_final)

In [25]:
train_index, test_index=next(assin_strad)

In [26]:
train_data_scaled=scaled[train_index]

In [27]:
from sklearn.preprocessing import LabelEncoder
lab=LabelEncoder()

In [28]:
labels_final=lab.fit_transform(y_final)

In [29]:
lab.classes_

array(['/', 'A', 'E', 'L', 'N', 'R', 'V'], dtype='<U1')

In [30]:
train_label=labels_final[train_index]

In [31]:
assin_val=strad.split(train_data_scaled, train_label)

In [32]:
train_index_fin, val_index=next(assin_val)

In [33]:
X_val=train_data_scaled[val_index]

In [34]:
train_data_scaled_fin=train_data_scaled[train_index_fin]

In [35]:
train_labels_fin=train_label[train_index_fin]

In [36]:
y_val=train_label[val_index]

In [37]:
test_data_scaled=scaled[test_index]

In [38]:
test_labels=labels_final[test_index]

In [39]:
check_strat(test_labels)

Value distribution:

   Counts  Percent
0    2000    14.29
1    2000    14.29
2    2000    14.29
3    2000    14.29
4    2000    14.29
5    2000    14.29
6    2000    14.29


In [40]:
check_strat(train_label)

Value distribution:

   Counts  Percent
0   18000    14.29
1   18000    14.29
2   18000    14.29
3   18000    14.29
4   18000    14.29
5   18000    14.29
6   18000    14.29


In [41]:
check_strat(y_val)

Value distribution:

   Counts  Percent
0    1800    14.29
1    1800    14.29
2    1800    14.29
3    1800    14.29
4    1800    14.29
5    1800    14.29
6    1800    14.29


# CNN


In [42]:
import os
MODEL_PATH = os.path.join(FIG_PATH,"saved_models")
os.makedirs("saved_models",exist_ok=True)

import pickle

# a function to save trained models in pickle object
def save_model(name,model,extension=".pickle"):
    path = os.path.join(MODEL_PATH,name+extension)
    print("Saving Model : ",name)
    file = open(path,"wb")
    pickle.dump(model,file)
    file.close()

In [43]:
from tensorflow import keras
CNN_X_train=train_data_scaled_fin.reshape(len(train_data_scaled_fin), len(train_data_scaled_fin[0]), 1)
CNN_val=X_val.reshape(len(X_val), len(X_val[0]), 1)


2021-08-05 17:29:55.351489: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-08-05 17:29:55.351508: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [44]:
#def CNN_Classifier(X_train, y_train, X_test, y_test):
    

CNN_model = keras.Sequential()
CNN_model.add(keras.layers.Conv1D(64,kernel_size=3,input_shape=(360,1),activation="relu"))
CNN_model.add(keras.layers.MaxPool1D(pool_size=2))
CNN_model.add(keras.layers.Conv1D(filters=32,kernel_size=3,activation="relu"))
CNN_model.add(keras.layers.MaxPool1D(pool_size=2))
CNN_model.add(keras.layers.Flatten())
CNN_model.add(keras.layers.Dense(100,activation="relu"))
CNN_model.add(keras.layers.Dense(7,activation="softmax"))
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False)
opt=keras.optimizers.Adam(lr=0.0001)
CNN_model.compile(loss=loss, optimizer=opt, metrics=['accuracy'])
CNN_model.summary()
    #X_train=X_train.reshape(len(X_train), len(X_train[0]), 1)
    #X_test=X_test.reshape(len(X_test), len(X_test[0]), 1)
history=CNN_model.fit(CNN_X_train, train_labels_fin, epochs=25, batch_size=50, validation_data=(CNN_val, y_val))
    #max_val_acc=max(history.history['accuracy'])
    
    #return CNN_model.evaluate(X_test, y_test)[1]

2021-08-05 17:30:17.601706: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2021-08-05 17:30:17.601744: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303)
2021-08-05 17:30:17.601773: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (ann): /proc/driver/nvidia/version does not exist
2021-08-05 17:30:17.602084: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d (Conv1D)              (None, 358, 64)           256       
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 179, 64)           0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 177, 32)           6176      
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 88, 32)            0         
_________________________________________________________________
flatten (Flatten)            (None, 2816)              0         
_________________________________________________________________
dense (Dense)                (None, 100)               281700    
_________________________________________________________________
dense_1 (Dense)              (None, 7)                 7

2021-08-05 17:30:18.037916: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)
2021-08-05 17:30:18.059762: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 1689600000 Hz


Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [45]:
cnn_prediction = CNN_model.predict_classes(CNN_X_train)



In [46]:
CNN_X_train[1]

array([[-0.97539726],
       [-0.98010043],
       [-0.96508511],
       [-0.97103048],
       [-0.96730948],
       [-0.95337551],
       [-0.94940223],
       [-0.95536214],
       [-0.96117884],
       [-0.98827934],
       [-1.00615669],
       [-1.02589844],
       [-1.05716442],
       [-1.05447685],
       [-1.04010362],
       [-1.03693001],
       [-1.05788588],
       [-1.03376843],
       [-1.0569993 ],
       [-1.06728947],
       [-1.05193735],
       [-1.09636502],
       [-1.08905121],
       [-1.11897313],
       [-1.10948465],
       [-1.09872274],
       [-1.09959558],
       [-1.08522865],
       [-1.08258919],
       [-1.04062554],
       [-1.05030912],
       [-1.04811701],
       [-1.03246393],
       [-1.04255001],
       [-1.0645488 ],
       [-1.04790026],
       [-1.0562636 ],
       [-1.06640986],
       [-1.0400016 ],
       [-1.04251477],
       [-1.03291229],
       [-1.03679995],
       [-1.0144592 ],
       [-1.05998594],
       [-1.02183697],
       [-1

In [47]:
print(cnn_prediction[0:10])

[3 5 4 5 5 6 2 2 2 0]
