In [1]:
from os import listdir, mkdir, system
from os.path import isfile, isdir, join, exists
import pandas as pd
import numpy as np
import keras
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from keras import Sequential
from keras.models import Model
from keras.layers import Conv1D, BatchNormalization, Dropout, MaxPooling1D, Flatten, Dense, LSTM, Input, Activation
from keras.callbacks import EarlyStopping
import itertools

# fix random seed for reproducibility
np.random.seed(7)
fs = 300
input_dir = 'One_Hot_Data/'
f = '3L_3.csv'

2023-06-22 02:15:02.455739: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-06-22 02:15:02.522654: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-06-22 02:15:02.523498: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Load Data

In [2]:
df = pd.read_csv(join(input_dir,f), header=None)
data = df.values
X = data[:,:-4]
X.shape
X = X.reshape(-1, X.shape[1], 1)
y = data[:,-4:]
print (X.shape)
print (y.shape)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1, shuffle=True)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=1, shuffle=True)


(3339, 899, 1)
(3339, 4)


# Modelling

## CNN
## CRNN
## ResNet

In [3]:
def build_cnn_model(Input_Shape, Output_Shape):    
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=(180), activation='relu', input_shape=(Input_Shape,1), 
                     padding='same', strides=1, kernel_initializer='he_normal'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(pool_size=2, strides=2))
    model.add(Conv1D(filters=16, kernel_size=(90), activation='relu', 
                     padding='same', strides=1, kernel_initializer='he_normal'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(pool_size=2, strides=2))
    model.add(Conv1D(filters=4, kernel_size=(45), activation='relu', 
                     padding='same', strides=1, kernel_initializer='he_normal'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(pool_size=2, strides=2))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(Output_Shape, activation='softmax'))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    
    model.summary()
    return model

In [4]:
def build_crnn_model(Input_Shape, Output_Shape):   
    model = Sequential()

    model.add(Conv1D(filters=5, kernel_size=(15), activation='relu', input_shape=(Input_Shape,1), 
                    padding='same', strides=1, kernel_initializer='he_normal'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(pool_size=2, strides=2))

    model.add(LSTM(5))

    model.add(Dense(5, activation='relu'))
    model.add(Dense(Output_Shape, activation='softmax'))
    model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])

    model.summary()
    return model


In [5]:
def build_resnet_model(Input_Shape, Output_Shape, layer):

    OUTPUT_CLASS = Output_Shape  # output classes
    k = 1  # increment every 4th residual block
    p = True  # pool toggle every other residual block (end with 2^8)
    convfilt = 64
    convstr = 1
    ksize = 16
    poolsize = 2
    poolstr = 2
    drop = 0.5

    input1 = Input(shape=(Input_Shape,1), name='input')

    ## First convolutional block (conv,BN, relu)
    x = Conv1D(filters=convfilt, kernel_size=ksize, padding='same', strides=convstr, kernel_initializer='he_normal')(input1)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    ## Second convolutional block (conv, BN, relu, dropout, conv) with residual net
    # Left branch (convolutions)
    x1 = Conv1D(filters=convfilt, kernel_size=ksize, padding='same', strides=convstr, kernel_initializer='he_normal')(x)
    x1 = BatchNormalization()(x1)
    x1 = Activation('relu')(x1)
    x1 = Dropout(drop)(x1)
    x1 = Conv1D(filters=convfilt, kernel_size=ksize, padding='same', strides=convstr, kernel_initializer='he_normal')(x1)
    x1 = MaxPooling1D(pool_size=poolsize, strides=poolstr)(x1)

    # Right branch, shortcut branch pooling
    x2 = MaxPooling1D(pool_size=poolsize, strides=poolstr)(x)

    # Merge both branches
    x = keras.layers.add([x1, x2])

    del x1, x2

    ## Main loop
    p = not p
    for l in range(layer):

        if (l % 4 == 0) and (l > 0):  # increment k on every fourth residual block
            k += 1
            # increase depth by 1x1 Convolution case dimension shall change
            xshort = Conv1D(filters=convfilt * k, kernel_size=1)(x)
        else:
            xshort = x
            # Left branch (convolutions)
        # notice the ordering of the operations has changed
        x1 = BatchNormalization()(x)
        x1 = Activation('relu')(x1)
        x1 = Dropout(drop)(x1)
        x1 = Conv1D(filters=convfilt * k, kernel_size=ksize, padding='same', strides=convstr, kernel_initializer='he_normal')(x1)
        x1 = BatchNormalization()(x1)
        x1 = Activation('relu')(x1)
        x1 = Dropout(drop)(x1)
        x1 = Conv1D(filters=convfilt * k, kernel_size=ksize, padding='same', strides=convstr, kernel_initializer='he_normal')(x1)
        if p:
            x1 = MaxPooling1D(pool_size=poolsize, strides=poolstr)(x1)

            # Right branch: shortcut connection
        if p:
            x2 = MaxPooling1D(pool_size=poolsize, strides=poolstr)(xshort)
        else:
            x2 = xshort  # pool or identity
        # Merging branches
        x = keras.layers.add([x1, x2])
        # change parameters
        p = not p  # toggle pooling

    # Final bit
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Flatten()(x)
    # x = Dense(1000)(x)
    # x = Dense(1000)(x)
    out = Dense(OUTPUT_CLASS, activation='softmax')(x)
    model = Model(inputs=input1, outputs=out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    model.summary()
    return model

# Build and Plot Model

In [7]:
# Name='CNN'
# model = build_cnn_model(X_train.shape[1], y_train.shape[1])

Name='CRNN'
model = build_crnn_model(X_train.shape[1], y_train.shape[1])

# Name='ResNet'
# model = build_resnet_model(X_train.shape[1], y_train.shape[1], 1)

keras.utils.plot_model(model, to_file='Saved_Model/'+f[0]+'_'+Name+'.pdf')

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_1 (Conv1D)           (None, 899, 5)            80        
                                                                 
 batch_normalization_1 (Batc  (None, 899, 5)           20        
 hNormalization)                                                 
                                                                 
 dropout_1 (Dropout)         (None, 899, 5)            0         
                                                                 
 max_pooling1d_1 (MaxPooling  (None, 449, 5)           0         
 1D)                                                             
                                                                 
 lstm_1 (LSTM)               (None, 5)                 220       
                                                                 
 dense_2 (Dense)             (None, 5)                

2023-06-22 02:15:54.001348: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-06-22 02:15:54.003701: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-06-22 02:15:54.005934: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

# Train Model

In [8]:
history = model.fit(X_train, y_train, batch_size=128, epochs=5, verbose=1, validation_data=(X_val, y_val), 
        callbacks=[EarlyStopping(monitor='val_loss', patience=2, verbose=1)])

Epoch 1/5


2023-06-22 02:16:11.792000: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-06-22 02:16:11.794344: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-06-22 02:16:11.796234: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus



2023-06-22 02:16:19.970969: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-06-22 02:16:19.975339: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-06-22 02:16:19.977305: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


# Save model

In [9]:
# serialize model to JSON
model_json = model.to_json()
with open('Saved_Model/'+f[0]+"_"+Name+".json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights('Saved_Model/'+f[0]+"_"+Name+".h5")
print("Saved model to disk")

Saved model to disk


# Load Model

In [10]:
from tensorflow.keras.models import model_from_json

# load json and create model
json_file = open('Saved_Model/'+f[0]+"_"+Name+".json", 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights('Saved_Model/'+f[0]+"_"+Name+".h5")
print("Loaded model from disk")
loaded_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

Loaded model from disk


2023-06-22 02:16:56.286120: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-06-22 02:16:56.288439: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-06-22 02:16:56.290326: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

# Predict test samples


In [11]:
new_y_pred = loaded_model.predict(X_test)
y_test_arg = np.argmax(np.round(y_test),axis=1)
y_pred_arg = np.argmax(np.round(new_y_pred),axis=1)
y_pred = np.zeros((new_y_pred.shape[0],new_y_pred.shape[1]))

2023-06-22 02:16:57.964936: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-06-22 02:16:57.967907: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-06-22 02:16:57.970068: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus



# Plot model training curves


In [12]:
accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(accuracy))
plt.figure(figsize=(8,4))
plt.plot(epochs, accuracy, label='Training accuracy')
plt.plot(epochs, val_accuracy, label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.savefig('Stats/'+f[0]+'_'+Name+'_acc.png',dpi=200)
plt.close()

plt.figure(figsize=(8,4))
plt.plot(epochs, loss, label='Training loss')
plt.plot(epochs, val_loss, label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.savefig('Stats/'+f[0]+'_'+Name+'_loss.png',dpi=200)
plt.close()


Plot Confusion Matrix

In [13]:
def plot_confusion_matrix(cm, name, Name, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=0)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        #print("Normalized confusion matrix")
    else:
        1#print('Confusion matrix, without normalization')

    #print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    # plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.savefig('Stats/'+name+'_'+Name+'_confmat.png',dpi=250)
    plt.close()

In [14]:
for i in range(y_pred.shape[0]):
    y_pred[i][y_pred_arg[i]] = 1

cnf_matrix = confusion_matrix(y_test_arg, y_pred_arg)

# Plot non-normalized confusion matrix
class_names = ['Normal','AFiB','Other', 'Noisy']
plt.figure(figsize=(8,5))
plot_confusion_matrix(cnf_matrix, name = f[0], Name=Name, classes=class_names, title='Confusion matrix')

In [20]:
# !pip install pycm
# print (classification_report(y_test, y_pred))


In [19]:
# # Plot confusion matrix
# from pycm import *
# cm = ConfusionMatrix(actual_vector=y_test_arg, predict_vector=y_pred_arg)
# totalt = cm.__dict__

# TP = totalt['TP']
# FP = totalt['FP']
# TN = totalt['TN']
# FN = totalt['FN']

# PPV = totalt['PPV']
# ACC = totalt['ACC']
# SEN = totalt['TPR']
# SPE = totalt['TNR']
# F1S = totalt['F1']
# AUC = totalt['AUC']

# # print ('TP,FP,TN,FN,Precision,Accuracy,Sensitivity,Specificity,F1Score,AUC')
# # print (TP[0], ',', FP[0], ',', TN[0], ',', FN[0], ',', '{:.2f}%'.format(PPV[0]*100), ',', '{:.2f}%'.format(ACC[0]*100), ',', '{:.2f}%'.format(SEN[0]*100), ',','{:.2f}%'.format(SPE[0]*100), ',','{:.2f}%'.format(F1S[0]*100), ',','{:.2f}%'.format(AUC[0]*100))
# # print (TP[1], ',', FP[1], ',', TN[1], ',', FN[1], ',', '{:.2f}%'.format(PPV[1]*100), ',','{:.2f}%'.format(ACC[1]*100), ',','{:.2f}%'.format(SEN[1]*100), ',','{:.2f}%'.format(SPE[1]*100), ',','{:.2f}%'.format(F1S[1]*100), ',','{:.2f}%'.format(AUC[1]*100))
# # print (TP[2], ',', FP[2], ',', TN[2], ',', FN[2], ',', '{:.2f}%'.format(PPV[2]*100), ',','{:.2f}%'.format(ACC[2]*100), ',','{:.2f}%'.format(SEN[2]*100), ',','{:.2f}%'.format(SPE[2]*100), ',','{:.2f}%'.format(F1S[2]*100), ',','{:.2f}%'.format(AUC[2]*100))




In [None]:
# model.add(Flatten())

# model.add(Dense(128, activation='relu'))
# model.add(Dense(y.shape[1], activation='softmax'))


# model.add(Conv2D(512, 3, strides=(32, 4), padding='same', input_shape=(1, 32, None)))
# model.add(Reshape((512, -1)))
# model.add(Permute((2, 1)))
# model = Sequential()
# model.add(LSTM(200, activation='relu', input_shape=(X.shape[1],1)))

In [None]:
# 81% recall. That means the model correctly identified 81% of the total bad loans. That’s pretty great. 
# But is this actually representative of how the model will perform? To find out, I’ll calculate the accuracy 
# and recall for the model on the test dataset I created initially.
# By oversampling before splitting into training and validation datasets, I “bleed” information from the
# validation set into the training of the model.

# To see how this works, think about the case of simple oversampling (where I just duplicate observations). 
# If I upsample a dataset before splitting it into a train and validation set, I could end up with the same 
# observation in both datasets. As a result, a complex enough model will be able to perfectly predict the value 
# for those observations when predicting on the validation set, inflating the accuracy and recall.

# When upsampling using SMOTE, I don’t create duplicate observations. However, because the SMOTE algorithm uses
# the nearest neighbors of observations to create synthetic data, it still bleeds information. If the nearest 
# neighbors of minority class observations in the training set end up in the validation set, their information 
# is partially captured by the synthetic data in the training set. Since I’m splitting the data randomly, we’d
# expect to have this happen. As a result, the model will be better able to predict validation set values than
# completely new data.
