In [5]:
import numpy as np
import pandas as pd
import os
import json
import scipy.io as sio
import tqdm
import keras

In [2]:
TRAIN_DATA_PATH = "/home/jovyan/ecg/examples/cinc17/train.json"
DEV_DATA_PATH = "/home/jovyan/ecg/examples/cinc17/dev.json"

In [3]:
STEP = 256

In [4]:
def load_ecg(record):
    if os.path.splitext(record)[1] == '.npy':
        ecg = np.load(record)
    elif os.path.splitext(record)[1] == '.mat':
        ecg = sio.loadmat(record)['val'].squeeze()
    else:
        with open(record, 'r') as fid:
            ecg = np.fromfile(fid, dtype = np.int16)
    
    trunc_samp = STEP * len(ecg) // STEP    
    return ecg[:trunc_samp]

In [6]:
def load_dataset(data_json):
    with open(data_json, 'r') as fid:
        data = [json.loads(l) for l in fid]
    labels = []; ecgs = []
    
    for d in tqdm.tqdm(data):
        labels.append(d['labels'])
        ecgs.append(load_ecg(d['ecg']))
    
    return ecgs, labels

In [7]:
def compute_mean_std(x):
    x = np.hstack(x)
    return np.mean(x).astype(np.float32), np.std(x).astype(np.float32)

In [8]:
def get_data_info(ecg, labels):
    mean, std = compute_mean_std(ecg)
    classes = sorted(set(l for label in labels for l in label))
    int_to_class = dict(zip(range(len(classes)), classes))
    class_to_int = {c : i for i, c in int_to_class.items()}
    return mean, std, int_to_class, class_to_int

In [9]:
def pad(x, val = 0, dtype = np.float32):
    max_len = max(len(i) for i in x)
    padded = np.full((len(x), max_len), val, dtype = dtype)
    for e, i in enumerate(x):
        padded[e, : len(i)] = i
    return padded

In [10]:
def process(x, y, mean, std, class_to_int):
    # process x
    x = pad(x)
    x = (x - mean) / std
    x = x[:, :, None]
    
    # process y
    y = pad([[class_to_int[c] for c in s] for s in y], val = 3, dtype=np.int32)
    y = keras.utils.np_utils.to_categorical(y, num_classes = len(class_to_int))
    
    return x, y

In [11]:
ecgs_train, labels_train = load_dataset(TRAIN_DATA_PATH)

100%|██████████| 7676/7676 [00:13<00:00, 566.84it/s]


In [12]:
mean, std, int_to_class, class_to_int = get_data_info(ecgs_train, labels_train)

In [13]:
train_x, train_y = process(ecgs_train, labels_train, mean, std, class_to_int)

In [14]:
train_x.shape, train_y.shape

((7676, 18286, 1), (7676, 71, 4))

In [15]:
ecgs_dev, labels_dev = load_dataset(DEV_DATA_PATH)

100%|██████████| 852/852 [00:01<00:00, 562.69it/s]


In [16]:
mean, std, int_to_class, class_to_int = get_data_info(ecgs_dev, labels_dev)

In [17]:
dev_x, dev_y = process(ecgs_dev, labels_dev, mean, std, class_to_int)

In [18]:
dev_x.shape, dev_y.shape

((852, 18258, 1), (852, 71, 4))

## build model

In [19]:
from keras.models import Model
from keras.layers import Input
from keras.layers import Conv1D
from keras.layers import BatchNormalization
from keras.layers import Activation
from keras.layers import Add
from keras.layers import MaxPooling1D
from keras.layers.core import Lambda
from keras.layers import Dropout
from keras.layers.core import Dense, Activation
from keras.layers.wrappers import TimeDistributed
from keras import backend as K
from keras.optimizers import Adam

In [20]:
def zeropad(x):
    y = K.zeros_like(x)
    return K.concatenate([x, y], axis = 2)

def zeropad_output_shape(input_shape):
    shape = list(input_shape)
    assert len(shape) == 3
    shape[2] *= 2
    return tuple(shape)

def resnet_block(layer, num_filters, subsample_length, block_index, conv_increase_channels_at, conv_num_skip):
    shortcut = MaxPooling1D(pool_size = subsample_length)(layer)
    zero_pad = (block_index % conv_increase_channels_at) == 0 and block_index > 0
    
    if zero_pad is True:
        shortcut = Lambda(zeropad, output_shape = zeropad_output_shape)(shortcut)
        
    for i in range(conv_num_skip):
        if not (block_index == 0 and i == 0):
            layer = BatchNormalization()(layer)
            layer = Activation('relu')(layer)
            layer = Dropout(0.2)(layer)
        
        layer = Conv1D(filters = num_filters,
                      kernel_size = 16,
                      strides = subsample_length if i == 0 else 1,
                      padding = 'same',
                      kernel_initializer = 'he_normal')(layer)
    layer = Add()([shortcut, layer])
    return layer

In [21]:
def build_model():
    inputs = Input(shape = [None, 1], dtype = 'float32', name = 'inputs')
    
    # add resnet layer
    layer = Conv1D(filters = 32,
                  kernel_size = 16,
                  strides = 1 ,
                  padding = 'same',
                  kernel_initializer = 'he_normal')(inputs)
    
    layer = BatchNormalization()(layer)
    layer = Activation('relu')(layer)
    
    conv_subsample_lengths = [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2]
    for index, subsample_length in enumerate(conv_subsample_lengths): 
        num_filters = 2 ** (index // 4) * 32 # start conv's filter num is 32, and increase filter num every 4 layers
        layer = resnet_block(layer, num_filters, subsample_length, index, 4, 2) # 4 is increase channel step, 2 is conv layer num of one resnet block
        
    layer = BatchNormalization()(layer)
    layer = Activation('relu')(layer)
    
    # add output layer
    layer = TimeDistributed(Dense(4))(layer)
    output = Activation('softmax')(layer)
    
    model = Model(inputs = [inputs], outputs = [output])
    
    return model

In [23]:
model = build_model()

In [24]:
#model.summary()

In [28]:
optimizer = Adam(lr = 0.001, clipnorm = 1)
model.compile(loss = 'categorical_crossentropy', optimizer = optimizer, metrics = ['accuracy'])

In [25]:
stopping = keras.callbacks.EarlyStopping(patience = 8)

In [26]:
reduce_lr = keras.callbacks.ReduceLROnPlateau(factor = 0.1, patience = 2, min_lr = 0.001 * 0.001)

In [27]:
checkpointer = keras.callbacks.ModelCheckpoint('save', save_best_only = False)

In [None]:
model.fit(train_x[:10], train_y[:10], batch_size = 32, epochs = 10, 
          validation_data = (dev_x[:10], dev_y[:10]), callbacks = [reduce_lr, stopping])

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Train on 10 samples, validate on 10 samples
Epoch 1/10
