In [1]:
%load_ext autoreload
%autoreload 2
import os
import sys
import h5py
import pdb
import time
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import roc_auc_score
from sklearn.linear_model import LogisticRegression
from scipy.io import loadmat

sys.path.insert(0, '../../../')
from ecg_AAAI.parse_dataset.readECG import loadECG
from ecg_AAAI.models.ecg_utils import get_all_adjacent_beats
from ecg_AAAI.models.supervised.ecg_fi_model_keras import build_fi_model 
from ecg_AAAI.models.supervised.ecg_fc import build_fc_model

from ecg_AAAI.models.gpu_utils import restrict_GPU_keras
from ecg_AAAI.models.supervised.eval import evaluate_AUC, evaluate_HR, risk_scores
from ecg_AAAI.models.supervised.ablation_helpers import *
import tftables
restrict_GPU_keras("3")
y_mode = "mi"
model_name = "fc"
splits = ["0", "1", "2", "3", "4"]
split_dir_prefix = "/home/divyas/ecg_AAAI/datasets/splits/split_"
batch_size = 60
day_thresh = 90
input_dim = 256
train_file = None
test_file = None

Using TensorFlow backend.
  matplotlib.use('Agg')


In [33]:
# Set up directory structure in case it's not there
fig_dir = "/home/divyas/ecg_AAAI/models/supervised/figs"

# Data

In [17]:
timer = 0
n_results = 1
result_dicts = []
block_size = 1500
split_num = "0"
if train_file:
    train_file.close()
    test_file.close()
split_dir = split_dir_prefix + split_num

train_file = h5py.File(split_dir + "/train.h5", "r")
test_file = h5py.File(split_dir + "/test.h5", "r")
print("Opened file")
train_y = get_labels(train_file, y_mode, day_thresh)
test_y = get_labels(test_file, y_mode, day_thresh)
print("Loaded labels")
train_pos_idxs = np.where(train_y == 1)[0]
x_train_pos = train_file['adjacent_beats'][list(train_pos_idxs)]
x_train_pos = reshape_X(x_train_pos)
y_train_pos = train_file[y_mode + '_labels'][list(train_pos_idxs)]
y_train_pos = thresh_labels(y_train_pos, day_thresh)
y_train_pos = np.array([[y_val]*3600 for y_val in y_train_pos]).flatten()

n_train_pos = len(train_pos_idxs)
batch_size = n_train_pos
n_batches = int(block_size/batch_size)
n_blocks = int(len(train_y)/block_size)
print("N blocks: ", n_blocks)
print("N batches: ", n_batches)

Opened file
Loaded labels
N blocks:  2
N batches:  7


# Model

In [15]:
from ecg_AAAI.models.supervised.ecg_cnn import build_cnn

m= build_cnn((input_dim, 1))
m.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
m.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Input (InputLayer)           (None, 256, 1)            0         
_________________________________________________________________
conv1d_23 (Conv1D)           (None, 128, 2)            258       
_________________________________________________________________
max_pooling1d_23 (MaxPooling (None, 32, 2)             0         
_________________________________________________________________
conv1d_24 (Conv1D)           (None, 16, 2)             258       
_________________________________________________________________
max_pooling1d_24 (MaxPooling (None, 4, 2)              0         
_________________________________________________________________
flatten_8 (Flatten)          (None, 8)                 0         
_________________________________________________________________
softmax (Dense)              (None, 1)                 9         
Total para

# Training

In [18]:
for i in range(n_results):
    for j in range(n_blocks):
        # Load 1000 patients into memory at a time
        x_train_block, y_train_block = get_block(train_file, i, block_size, y_mode, day_thresh)
        print("Finished loading Block #", j)
        for k in range(n_batches):
            x_train_neg, y_train_neg = get_block_batch(x_train_block, y_train_block, batch_size, k) 
            print("Done getting batch")
            x_train_batch = np.concatenate([x_train_neg, x_train_pos])
            y_train_batch = np.concatenate([y_train_neg, y_train_pos])
            m.fit(x=x_train_batch, y=y_train_batch, epochs=1, verbose=True, batch_size=80000)
    # Plotting
    if i % 1 == 0:
        print("Starting to plot: ")
        py_pred = get_preds(m, test_file)
        print("Finished test predictions")
        if len(patient_y) != len(py_pred):
            fixed_length = min(len(patient_y), len(py_pred))
            patient_y = patient_y[:fixed_length]
            py_pred = py_pred[:fixed_length]

        auc_score = roc_auc_score(patient_y, py_pred)
        fig_path = get_fig_path(y_mode, day_thresh, split_num)

        plt.hist(py_pred[np.where(all_y == 1)], color='red', alpha=.5, bins=20)
        plt.title("[" + y_mode +  " positive] distribution of risk scores (90 days) AUC = " + str(auc_score))
        plt.xlim(0, 1)
        plt.savefig(fig_path +"/epoch_" + str(i) + "_positive")
        plt.clf()

        plt.hist(py_pred[np.where(all_y != 1)], color='green', alpha=.5, bins=20)
        plt.title("[" + y_mode +  " negative] distribution of risk scores (90 days) AUC = " + str(auc_score))
        plt.xlim(0, 1)
        plt.savefig(fig_path +"/epoch_" + str(i) + "_negative")
        plt.clf()
        print("Finished plotting")
        result_dict = {'y_mode': y_mode, 'epoch': i, 'model': model_name, 'pauc': auc_score,
                       'day_thresh': day_thresh, 'split_num': split_num}
        result_dicts.append(result_dict)
        pd.DataFrame(result_dicts).to_csv("results_df")

Finished loading Block # 0
Done getting batch


NameError: name 'class_weight' is not defined

# Evaluation

In [15]:
py_pred = get_preds(m, test_file)


In [17]:
orig_test_y = test_file[y_mode + "_labels"][:]

In [29]:
thresh = np.percentile(py_pred, 75)
dicts = []
for d, pred in zip(orig_test_y, py_pred):
    o = 1 if d > 0 else 0
    r = 1 if pred > thresh else 0
    dicts.append({'duration': d, 'observed': o, 'risk': r})
data = pd.DataFrame(dicts)

In [30]:
T = data["duration"]
E = data["observed"]

from lifelines import NelsonAalenFitter
naf = NelsonAalenFitter()

naf.fit(T,event_observed=E)

<lifelines.NelsonAalenFitter: fitted with 1247 observations, 1149 censored>

In [31]:
from lifelines import CoxPHFitter
cph = CoxPHFitter()
cph.fit(data, duration_col='duration', event_col='observed', show_progress=True)


Iteration 1: norm_delta = 0.00495, step_size = 0.95000, ll = -354.53909, seconds_since_start = 0.0
Iteration 2: norm_delta = 0.00023, step_size = 0.95000, ll = -354.53778, seconds_since_start = 0.1
Iteration 3: norm_delta = 0.00001, step_size = 0.95000, ll = -354.53778, seconds_since_start = 0.1
Convergence completed after 3 iterations.


<lifelines.CoxPHFitter: fitted with 1247 observations, 1149 censored>

In [37]:
cph.hazards_['risk'][0]

0.01200816962966578

In [5]:
input_dim = 256
num_fc_0 = 2
half_dims = (int(input_dim/2), num_fc_0)
init_weights = np.concatenate([np.ones(half_dims), 
                               -1*np.ones(half_dims)])

In [6]:
init_weights.shape

(256, 2)

In [10]:
rand_bias = m.layers[2].get_weights()[1]
model.layers[2].set_weights([init_weights, rand_bias])

In [48]:
n_small = 320000
small_x = np.concatenate([x_train_batch[:n_small], x_train_batch[-1*n_small:]], axis=0)
small_y = np.concatenate([y_train_batch[:n_small], y_train_batch[-1*n_small:]], axis=0)

In [49]:
m.fit(small_x, small_y)

Epoch 1/1
129728/640000 [=====>........................] - ETA: 2:26 - loss: 0.5069 - acc: 0.7749

KeyboardInterrupt: 

In [47]:
x_train_batch.shape[0]

1360800