In [98]:
%load_ext autoreload
%autoreload 2
import os
import sys
import h5py
import pdb
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.metrics import roc_auc_score
from keras.optimizers import Adam

sys.path.insert(0, '../../../')
from ecg_AAAI.models.supervised.ecg_fc import build_fc_model
from ecg_AAAI.models.gpu_utils import restrict_GPU_keras
from ecg_AAAI.models.supervised.eval import evaluate_AUC, evaluate_HR, risk_scores
from ecg_AAAI.models.supervised.ablation_helpers import *
from ecg_AAAI.models.supervised.deepsets_model import create_phi, create_rho
restrict_GPU_keras("3")

split_dir_prefix = "/home/divyas/ecg_AAAI/datasets/splits/split_"
model_name = "deepset"

y_mode = "cvd"
n_fc_units = 2
split_num = ["0"]
batch_size = 60
day_thresh = 90
input_dim = 256
train_file = None
test_file = None

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Using GPU:3 with 0% of the memory


In [118]:
n_sets = 3000 # training set size
n_members = 3 # first 1000 beats
member_dim = 256 # two adjacent beat pairs

set_dims = (n_members, member_dim)
member_dims = (member_dim,1)

phi = create_phi(member_dims)
rho = create_rho(member_dims, phi, n_members)
adam = Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)

rho.compile(optimizer=adam, loss='mean_squared_error', metrics=['accuracy'])

In [119]:
rho.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_31 (InputLayer)           (None, 256, 1)       0                                            
__________________________________________________________________________________________________
input_32 (InputLayer)           (None, 256, 1)       0                                            
__________________________________________________________________________________________________
input_33 (InputLayer)           (None, 256, 1)       0                                            
__________________________________________________________________________________________________
model_11 (Model)                (None, 2)            1030        input_31[0][0]                   
                                                                 input_32[0][0]                   
          

# Data

In [120]:
n_results = 3
n_beats = 3
result_dicts = []
block_size = 1500
split_num = "0"
if train_file:
    train_file.close()
    test_file.close()
split_dir = split_dir_prefix + split_num

train_file = h5py.File(split_dir + "/train.h5", "r")
test_file = h5py.File(split_dir + "/test.h5", "r")
print("Opened file")
train_y = get_labels(train_file, y_mode, day_thresh)
test_y = get_labels(test_file, y_mode, day_thresh)
print("Loaded labels")
train_pos_idxs = np.where(train_y == 1)[0]
x_train_pos = train_file['adjacent_beats'][list(train_pos_idxs),:n_beats]
y_train_pos = train_file[y_mode + '_labels'][list(train_pos_idxs)]
y_train_pos = thresh_labels(y_train_pos, day_thresh)
#y_train_pos = np.array([[y_val]*n_beats for y_val in y_train_pos]).flatten()

n_train_pos = len(train_pos_idxs)
batch_size = n_train_pos
n_batches = int(block_size/batch_size + 1)
n_blocks = int(len(train_y)/block_size + 1)
print("N blocks: ", n_blocks)
print("N batches: ", n_batches)

Opened file
Loaded labels
N blocks:  3
N batches:  20


# Training

In [121]:
for i in range(n_results):
    print("Finished round: ", i)
    for j in range(n_blocks):
        # Load 1000 patients into memory at a time
        x_train_block, y_train_block = get_block(train_file, j, block_size, 
                                                 y_mode, day_thresh, n_beats = n_beats)
        print("Finished loading Block #", j)
        for k in range(n_batches):
            x_train_neg, y_train_neg = get_block_batch_deepsets(x_train_block, y_train_block, 
                                                       batch_size, k, n_beats=n_beats) 
            x_train_batch = np.concatenate([x_train_neg, x_train_pos])
            y_train_batch = np.concatenate([y_train_neg, y_train_pos])
            swapped = np.swapaxes(x_train_batch, 0, 1)
            swapped = np.expand_dims(swapped, 3)
            rho_input = [x for x in swapped]
            rho.fit(x=rho_input, y=y_train_batch, epochs=1, 
                  verbose=False, batch_size=80000)


Finished round:  0
Finished loading Block # 0
Finished loading Block # 1
Finished loading Block # 2
Finished round:  1
Finished loading Block # 0
Finished loading Block # 1
Finished loading Block # 2
Finished round:  2
Finished loading Block # 0
Finished loading Block # 1
Finished loading Block # 2


# Evaluation

In [85]:
py_pred = []
x_test = test_file['adjacent_beats'][:,:n_beats]
y_test = test_file[y_mode + '_labels'][:]
y_test = thresh_labels(y_test, day_thresh)

if len(y_test) != x_test.shape[0]:
    dim = min(len(y_test), x_test.shape[0])
    x_test = x_test[:dim]
    y_test = y_test[:dim]

In [87]:
swapped = np.swapaxes(x_test, 0, 1)
swapped = np.expand_dims(swapped, 3)
rho_test_input = [x for x in swapped]

In [88]:

len(rho_test_input), rho_test_input[0].shape, y_test.shape

(3, (1247, 256, 1), (1247,))

In [122]:
y_preds = rho.predict(rho_test_input)

In [123]:
roc_auc_score(y_test, y_preds)

0.6698699167936248