In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.metrics import SparseCategoricalAccuracy
from tensorflow.keras.layers import Dense, Dropout, GRU, BatchNormalization
import load_functions as f

import sys
sys.path.append('..')
from Notebooks import ecg_cleaning as c

In [None]:
# File Path
path = '../data/physionet.org/files/ptb-xl/1.0.3/'
metadata = pd.read_csv('../data/cleaned_metadata.csv')

# Import data
full_data = f.load_signal(path, metadata)

In [None]:
# Data shape
full_data[0].shape, full_data[1].shape

In [None]:
# Only looking at Lead II

X = full_data[0][:,:,1]

y = full_data[1]

X.shape, y.shape

In [None]:
# Instantiate LabelEncoder
label = LabelEncoder()

# Fit target column
label.fit(y_sample)

# Transform target column 
y_sample = label.transform(y_sample)

In [None]:
# Low sampling frequency
sig_len = 1000
sampling_frequency = 100
time = np.arange(0, sig_len) / sampling_frequency

# Baseline and PLI removal

signal_bl = pd.DataFrame(X).apply(lambda x: c.baseline_removal(x, freq_start=0.1, freq_stop=1.5))
signal_pli = pd.DataFrame(signal_bl).apply(lambda x: c.high_freq_removal(x, freq_start=45))

In [None]:
X_train, X_test, y_train, y_test = train_test_split(signal_pli, y, test_size=0.2, stratify=y, random_state=42)

X_train.shape, y_train.shape

In [None]:
rnn_model = keras.Sequential([
    # the intermediate recurrent layers should return full sequences
    GRU(16, activation='relu', return_sequences=True),
    BatchNormalization(),
    Dropout(0.15),

    # the last recurrent layer only returns the final output
    GRU(16, activation='relu', return_sequences=False),
    BatchNormalization(),
    Dropout(0.15),

    # output layer
    Dense(16, activation='relu'),
    BatchNormalization(),
    Dropout(0.15),
    Dense(10, activation='softmax')],
)

In [None]:
# Compile our model with an optimizer, loss function and metric to look at
rnn_model.compile(
    # Optimizer
    optimizer=keras.optimizers.Adam(learning_rate=0.01),  # learning rate can be adjusted here
    # Loss function to minimize
    loss=keras.losses.SparseCategoricalCrossentropy(),
    # Metric used to evaluate model
    metrics=[keras.metrics.SparseCategoricalAccuracy()]
)

In [None]:
rnn_model.summary()

In [None]:
# X_train_seq = X_train.values.reshape(-1, 1000, 1).astype("float")

X_train_seq = X_train.values.reshape(-1, 1000, 1).astype("float")
X_test_seq = X_test.values.reshape(-1, 1000, 1).astype("float")

In [None]:
# Fit our model, adding a validation set as well 
history = rnn_model.fit(X_train_seq, 
                    y_train, 
                    batch_size=32,
                    epochs=30, 
                    validation_split=0.1,
                    callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss',
                                                             patience=2)]
)

In [None]:
# Evaluate the model using the test data and generate predictions
train_accuracy = history.history["sparse_categorical_accuracy"][-1]
result = rnn_model.evaluate(np.real(X_test_seq, y_test, verbose=0)
#result = rnn_model.evaluate(np.real(X_test.values.reshape(-1, 1000, 1)), y_test, verbose=0)

print(f"Train Accuracy: {train_accuracy:.4f}")
print(f"Test Accuracy: {result[1]:.4f}")

In [None]:
# Plotting loss function 
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'])
plt.show()

In [None]:
# Calculate the predicted labels for each test image.
predict_probas = rnn_model.predict(X_test_seq)
y_predict = np.argmax(predict_probas, axis=1)

# Create the confusion matrix using sklearn 
conf_mat = confusion_matrix(y_test, y_predict)

# Since we have many images, it is helpful to show our 
# results as fractions of the total number of images 
# for each class.
#normalized_conf_mat = conf_mat / conf_mat.sum(axis=1)

plt.figure(figsize = (9,7))
sns.heatmap(conf_mat,
            annot=True,
            cbar=False,
            cmap="rocket_r",
            linewidths=1
           )
plt.title('Confusion Matrix',size = 25,y=1.01)
plt.xlabel("Predicted Label", size = 20)
plt.ylabel("True Label", size = 20)
plt.show()