In [1]:
#!/usr/bin/env python

import numpy as np, os, sys
from scipy.io import loadmat
import joblib
from run_12ECG_classifier import load_12ECG_model, run_12ECG_classifier
from driver import get_classes, load_challenge_data
from get_12ECG_features import get_12ECG_features
sys.path.append(os.path.abspath("./datasets"))
sys.path.append(os.path.abspath("./evaluation-2020/"))
sys.path.append(os.path.abspath("./models/"))
sys.path.append(os.path.abspath("./util/"))

from helper import translate_x, translate_y, get_data_from_physionet2020Dataset
from physionet2020 import PhysioNet2020Dataset
from torch.utils.data import DataLoader
import time
import tensorflow as tf
import evaluate_12ECG_score
import argparse
import json
import keras
import random
import util
import tqdm
from datetime import datetime

Using TensorFlow backend.


In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "1"  # use id from $ nvidia-smi
tf.test.is_gpu_available()

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


True

In [3]:
# for tensorboard
%load_ext tensorboard
from tensorflow import keras

logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)

file_writer = tf.summary.create_file_writer(logdir + "/metrics")
file_writer.set_as_default()

In [4]:
train_records, val_records = PhysioNet2020Dataset.split_names_cv(
    "Training_WFDB", 5, 0
)

dev_x, dev_y = get_data_from_physionet2020Dataset(val_records)
train_x, train_y = get_data_from_physionet2020Dataset(train_records)
train_y = np.concatenate((train_y[:, :3], train_y[:, 4:]), axis=1)
dev_y = np.concatenate((dev_y[:, :3], dev_y[:, 4:]), axis=1)

X:  (3497, 4096, 12)
Y:  (3497, 9)
X:  (13946, 4096, 12)
Y:  (13946, 9)


In [5]:
# 1dAVb, RBBB, LBBB, SB, atrial fibrillation (AF), sinus tachycardia (ST).

In [6]:
# ("AF", "I-AVB", "LBBB", "Normal", "PAC", "PVC", "RBBB", "STD", "STE")

In [7]:
# brazil = [0, 1, 2, 3, 4, 5]
# physin = [1, 6, 2, 4/5, 0, 7/8]


In [8]:
print (train_y.shape)
print (dev_y.shape)


(13946, 8)
(3497, 8)


In [9]:
def epoch_evaluation(probs, labels):
    
    threshold = 0.2

    probs_class = probs
    probs_class[probs_class > threshold] = 1
    probs_class[probs_class <= threshold] = 0
    probs_normal = np.sum(probs_class, axis=1).T
    probs_normal = probs_normal.reshape(probs.shape[0], 1)
    probs_normal[probs_normal == 0] = -1
    probs_normal[probs_normal >= 1] = 0
    probs_normal[probs_normal == -1] = 1
    probs = np.concatenate((probs, probs_normal), axis=1)
    
    
    label_normal = np.sum(labels, axis=1).T
    label_normal = label_normal.reshape(probs.shape[0], 1)
    label_normal[label_normal == 0] = -1
    label_normal[label_normal >= 1] = 0
    label_normal[label_normal == -1] = 1
    labels = np.concatenate((labels, label_normal), axis=1)

    probs_class = probs
    probs_class[probs_class > threshold] = 0.99
    probs_class[probs_class <= threshold] = 0.01
    probs_test = probs_class

    sys.path.append(os.path.abspath("../physionet-challenge-2020/evaluation-2020/"))
    import evaluate_12ECG_score
#     auroc,auprc = evaluate_12ECG_score.compute_auc(labels, probs_test, 9)
    auroc,auprc = evaluate_12ECG_score.compute_auc(labels, probs_test, 9)

    probs_class = probs
    probs_class[probs_class > threshold] = 1
    probs_class[probs_class <= threshold] = 0
    probs_test = probs_class
#     accuracy,f_measure,f_beta,g_beta = evaluate_12ECG_score.compute_beta_score(labels, probs_test, 2, 9)
    accuracy,f_measure,f_beta,g_beta = evaluate_12ECG_score.compute_beta_score(labels, probs_test, 2, 9)

    print ([auroc,auprc,accuracy,f_measure,f_beta,g_beta])
    
    return {
        'auroc': auroc,
        'auprc': auprc,
        'accuracy': accuracy,
        'f_measure': f_measure,
        'f_beta': f_beta,
        'g_beta': g_beta
    }

class validationDataCallback(tf.keras.callbacks.Callback):
    def __init__(self, valid_data):
        super(validationDataCallback, self).__init__()
        self.valid_data = valid_data
    
    def on_epoch_end(self, epoch, logs=None):
        x_test = self.valid_data[0]
        y_test = self.valid_data[1]
        predictions = self.model.predict(x_test, batch_size=32, verbose=1)
        epoch_data = epoch_evaluation(predictions, y_test)
        for key, value in epoch_data.items():
            tf.summary.scalar(key, data=value, step=epoch)


In [10]:
from tensorflow.keras.models import load_model
from automodel import model
from tensorflow.keras.callbacks import (ModelCheckpoint,
                             TensorBoard, ReduceLROnPlateau,
                             CSVLogger, EarlyStopping)
from tensorflow.keras.optimizers import Adam
# model_6labels = load_model('../automatic-ecg-diagnosis/model.hdf5', compile=False)

# weight = model_6labels.get_weights()

# weight[-2] = np.hstack([weight[-2], np.random.randint(2, size=[weight[-2].shape[0], 2])])
# bias = np.zeros(8)
# bias[:6] = weight[-1]
# weight[-1] = bias

# model.set_weights(weight)

softmax 8


In [11]:
loss = 'binary_crossentropy'
lr = 0.001
batch_size = 64
opt = Adam(lr)

callbacks = [ReduceLROnPlateau(monitor='val_loss',
                               factor=0.1,
                               patience=7,
                               min_lr=lr / 100),
             EarlyStopping(patience=9,  # Patience should be larger than the one in ReduceLROnPlateau
                           min_delta=0.00001),
             validationDataCallback([dev_x, dev_y])]

In [12]:
# Train neural network
model.compile(loss='binary_crossentropy', optimizer=Adam())

model.fit(train_x, train_y,
        batch_size=batch_size,
        epochs=70,
        initial_epoch=0,  # If you are continuing a interrupted section change here
        validation_split=0.2,
        shuffle='batch',  # Because our dataset is an HDF5 file
        callbacks=callbacks,
        verbose=1)

Train on 11156 samples, validate on 2790 samples
Epoch 1/70
[0.4987007361869732, 0.11930319808496065, 0.7555863845810831, 0.07575707615536843, 0.1240094086545103, 0.04576723669271357]
Epoch 2/70
[0.4972550756488689, 0.11891116661353368, 0.7614659201638343, 0.07555166335691102, 0.1209154644354942, 0.04474075612038317]
Epoch 3/70
[0.4920154989279649, 0.11777467858435256, 0.7684354820063088, 0.07064429407881173, 0.11113844525406079, 0.041148607473816136]
Epoch 4/70
[0.49699459622652753, 0.11891766124774314, 0.7534889098085847, 0.0787610202527067, 0.1249398621267107, 0.04605429511821141]
Epoch 5/70
[0.49418042218951985, 0.11826641002732201, 0.7539788711071475, 0.08685147790843889, 0.1306306504219256, 0.048144664399230805]
Epoch 6/70
[0.5002785622061957, 0.11985590162351928, 0.8231790230502019, 0.05723173035125403, 0.0771730963844553, 0.030390718271065147]
Epoch 7/70
[0.5007894883636345, 0.12006751124848826, 0.8249792179336846, 0.05448280826025928, 0.07546791742267546, 0.029772636192042692]

<tensorflow.python.keras.callbacks.History at 0x7fa13c748828>

In [13]:
from sklearn.metrics import (confusion_matrix,
                             precision_score, recall_score, f1_score,
                             precision_recall_curve, average_precision_score)

def get_optimal_precision_recall(y_true, y_score):
    """Find precision and recall values that maximize f1 score."""
    n = np.shape(y_true)[1]
    opt_precision = []
    opt_recall = []
    opt_threshold = []
    for k in range(n):
        # Get precision-recall curve
        precision, recall, threshold = precision_recall_curve(y_true[:, k], y_score[:, k])
        # Compute f1 score for each point (use nan_to_num to avoid nans messing up the results)
        f1_score = np.nan_to_num(2 * precision * recall / (precision + recall))
        # Select threshold that maximize f1 score
        index = np.argmax(f1_score)
        opt_precision.append(precision[index])
        opt_recall.append(recall[index])
        t = threshold[index-1] if index != 0 else threshold[0]-1e-10
        opt_threshold.append(t)
    print (f1_score, index)
    return np.array(opt_precision), np.array(opt_recall), np.array(opt_threshold)

In [14]:
# y_score = model.predict(dev_x, batch_size=32, verbose=1)
# y_score

In [16]:
# opt_threshold = [0.15503691, 0.10315302, 0.02896456, 0.10009797, 0.07985777, 0.26683953, 0.12415724, 0.03354117]
# concatednate = []
# for threshold_index in range(len(opt_threshold)):
#     temp_y = y_score[:, threshold_index]
#     print (temp_y[temp_y > opt_threshold[threshold_index]].shape)
    
#     temp_y[temp_y > opt_threshold[threshold_index]] = 0.99
#     temp_y[temp_y <= opt_threshold[threshold_index]] = 0.01
#     concatednate.append(temp_y.T)

In [None]:
concatednate