In [1]:
##Loading data
import os
os.environ["PTB_FOLDER_PATH"] = "/home/jupyter/data/files/ptbdb/1.0.0/"
import sys
sys.path.insert(0,"/home/jupyter/Cappy/ptd_data_handler")
sys.path.insert(0,"/home/jupyter/Cappy/signal_processing")
from ptb_data_formatter import *
from feature_extraction import *
from custom_processing import *
all_patient_data = get_formatted_ptb_data()
from ptb_xl_data_formatter import *
os.environ["PTB_XL_FOLDER_PATH"] = "/home/jupyter/data/physionet.org/files/ptb-xl/1.0.1"


KeyboardInterrupt: 

In [None]:
np.random.seed(42)

In [None]:
import pandas as pd

In [None]:
# Create individual df for each class       
norm_df = pd.DataFrame()
mi_df = pd.DataFrame()
for patient in all_patient_data.keys():
    for recording in all_patient_data[patient].keys():
        label = all_patient_data[patient][recording]["label"][0]
        signal = all_patient_data[patient][recording]["i"]
        
        # Signal processing has to be done here, as signals are diff lengths
        signal = clean_ecg_signal(signal, old_fs=1000)
        
        signal = pd.DataFrame(signal)
        if label == "Myocardial infarction":
            mi_df[patient+"_"+recording] = signal
        else:
            norm_df[patient+"_"+recording] = signal

In [None]:
norm_df

In [None]:
mi_df

In [None]:
# Delete any column in which any reading is NaN
norm_df = norm_df.dropna(axis=1)
mi_df = mi_df.dropna(axis=1)

In [None]:
norm_df

In [None]:
mi_df

In [None]:
# Shuffle the order of the ecg recordings
import random
norm_cols = norm_df.columns.to_list()
random.shuffle(norm_cols)
mi_cols = mi_df.columns.to_list()
random.shuffle(mi_cols)
norm_df = norm_df[norm_cols]
mi_df = mi_df[mi_cols]

In [None]:
print("Number of norm cases: ")
print(len(norm_df.columns))
print("Number of MI cases: ")
print(len(mi_df.columns))

In [None]:
beat_len = 512

mi_beats = []
for ecg in mi_df.columns:
    _mi_beats = []
    sig = mi_df[ecg].to_numpy()
    peaks = apply_pan_tompkins(sig, n_beats=8, standardize=True)
    for peak in peaks.keys():
        p = peaks[peak]
        _mi_beats.append(p)
        
    _mi_beats = np.array(_mi_beats)
    
    if np.shape(_mi_beats)[0] == 8:
        # Shuffle sequence of beats
        df = pd.DataFrame(np.reshape(_mi_beats, [8,beat_len]))
        df = df.sample(frac=1).reset_index(drop=True)
        _mi_beats = df.to_numpy()
        _mi_beats = np.reshape(_mi_beats, [8,beat_len])
        mi_beats.append(_mi_beats)

norm_beats = []
for ecg in norm_df.columns:
    _norm_beats = []
    sig = norm_df[ecg].to_numpy()
    peaks = apply_pan_tompkins(sig, n_beats=8, standardize=True)
    for peak in peaks.keys():
        p = peaks[peak]
        _norm_beats.append(p)
    
    _norm_beats = np.array(_norm_beats)
    
    if np.shape(_norm_beats)[0] == 8:
        # Shuffle sequence of beats
        df = pd.DataFrame(np.reshape(_norm_beats, [8,beat_len]))
        df = df.sample(frac=1).reset_index(drop=True)
        _norm_beats = df.to_numpy()
        _norm_beats = np.reshape(_norm_beats, [8,beat_len,1])
        norm_beats.append(_norm_beats)
    
mi_beats = np.reshape(mi_beats, [-1,512,1])
norm_beats = np.reshape(norm_beats, [-1,512,1])

print(np.shape(mi_beats))
print(np.shape(norm_beats))

In [None]:
# Dataset for model
from sklearn.utils import shuffle

# Separate into train and test set with balanced class labels
model_4_data_train = np.concatenate((mi_beats[0:int(0.8*len(mi_beats))], norm_beats[0:int(0.8*len(norm_beats))]))
model_4_data_test = np.concatenate((mi_beats[int(0.8*len(mi_beats)):], norm_beats[int(0.8*len(norm_beats)):len(norm_beats)]))

# Class labels: MI - 1, norm - 0
mi_label_model_4 = np.ones((2792,1))
norm_label_model_4 = np.zeros((1136,1))

model_4_labels_train = np.concatenate((mi_label_model_4[0:int(0.8*len(mi_beats))], norm_label_model_4[0:int(0.8*len(norm_beats))]))
model_4_labels_test = np.concatenate((mi_label_model_4[int(0.8*len(mi_beats)):], norm_label_model_4[int(0.8*len(norm_beats)):len(norm_beats)]))

# Shuffle across the first index using the same logic for both the label and the data
model_4_data_train, model_4_labels_train = shuffle(model_4_data_train, model_4_labels_train)
model_4_data_test, model_4_labels_test = shuffle(model_4_data_test, model_4_labels_test)

print(np.shape(model_4_data_train))
print(np.shape(model_4_labels_train))

print(np.shape(model_4_data_test))
print(np.shape(model_4_labels_test))

In [None]:
model_4_labels_test[1]

In [None]:
import tensorflow as tf

beat_length = 512
num_feats = 1

## Layer 0 - input
input = tf.keras.Input(shape=(beat_length,num_feats))

## Layers 1-6 - convolutional block
x = tf.keras.layers.Conv1D(32, 3,  padding='same', activation='relu')(input)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Conv1D(32, 3, padding='same', activation='relu')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.MaxPool1D(pool_size=2)(x)
x = tf.keras.layers.Dropout(0.5)(x)

## Layers 7-12 - convolutional block
x = tf.keras.layers.Conv1D(32, 3,  padding='same', activation='relu')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Conv1D(32, 3, padding='same', activation='relu')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.MaxPool1D(pool_size=2)(x)
x = tf.keras.layers.Dropout(0.5)(x)

## Layers 13-19 - convolutional block
x = tf.keras.layers.Conv1D(32, 3,  padding='same', activation='relu')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Conv1D(32, 3, padding='same', activation='relu')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.MaxPool1D(pool_size=2)(x)
x = tf.keras.layers.Dropout(0.5)(x)

## Layers 19-24 - convolutional block
x = tf.keras.layers.Conv1D(32, 3,  padding='same', activation='relu')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Conv1D(32, 3, padding='same', activation='relu')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.MaxPool1D(pool_size=2)(x)
x = tf.keras.layers.Dropout(0.5)(x)

## Layer 25 - flatten
x = tf.keras.layers.Flatten()(x)

## Layer 26 - dense
x = tf.keras.layers.Dense(32, activation='relu')(x)

## Layer 27 - batch norm
x = tf.keras.layers.BatchNormalization()(x)

## Layer 28 - dropout
x = tf.keras.layers.Dropout(0.5)(x)

## Layer 29 - dense
x = tf.keras.layers.Dense(32, activation='relu')(x)

## Layer 30 - batch norm
x = tf.keras.layers.BatchNormalization()(x)

## Layer 31 - dropout
x = tf.keras.layers.Dropout(0.5)(x)

## Layer 32 - dense
x = tf.keras.layers.Dense(16, activation='relu')(x)

## Layer 33 - batch norm
x = tf.keras.layers.BatchNormalization()(x)

## Layer 34 - dropout
x = tf.keras.layers.Dropout(0.5)(x)

## Layer 35 - output
output = tf.keras.layers.Dense(1, activation='softmax')(x)

cnn_model = tf.keras.Model(input, output, name="cnn_model")
cnn_model.summary()

In [None]:
cnn_model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=1e-3), loss=tf.keras.losses.BinaryCrossentropy(), metrics=[tf.keras.metrics.Accuracy()])



In [None]:
import datetime

callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)


In [None]:
%load_ext tensorboard
%tensorboard --logdir logs

In [None]:
cnn_model.fit(x=model_4_data_train, y=model_4_labels_train, validation_data=[model_4_data_test, model_4_labels_test], batch_size=32, epochs=10, verbose=True, callbacks=[callback, tensorboard_callback])



In [25]:
from sklearn.metrics import confusion_matrix
y_pred = cnn_model.predict(model_4_data_test)
cf_matrix = confusion_matrix(model_4_labels_test, y_pred)
print(cf_matrix)

[[  0 228]
 [  0 559]]
