<a href="https://colab.research.google.com/github/hadaev8/physionet_2017_rcrnn/blob/master/tf_keras_RCNN_physionet_2017_cross_val.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
!rsync -Cav physionet.org::challenge-2017/validation ./ 2>&1 >/dev/null
!rsync -Cav physionet.org::challenge-2017/training ./ 2>&1 >/dev/null

In [0]:
import numpy as np
import pandas as pd
np.random.seed(1488)
import os
import tensorflow as tf
tf.logging.set_verbosity('ERROR')
print(tf.VERSION)
import tensorflow.keras as keras
import tensorflow.keras.backend as K
tf.set_random_seed(1488)

import scipy.io
from shutil import copy
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import class_weight
from sklearn.metrics import classification_report, f1_score
from sklearn.model_selection import StratifiedKFold

batch_size = 128
    
le = LabelEncoder()
data = pd.read_csv('training/REFERENCE-v3.csv', header=None, names=['name', 'label'])
y = le.fit_transform(data.label.values)
class_weights = class_weight.compute_class_weight('balanced', np.unique(y), y)
y = y.reshape(-1, 1)

def get_data(file):
    data = scipy.io.loadmat(file)['val'][0]
    return data.astype(float)

X = np.array([get_data('training/{}.mat'.format(i)) for i in data.name.values])

1.13.1


In [0]:
X = keras.preprocessing.sequence.pad_sequences(X, dtype='float32')
X = X.reshape(-1, X.shape[1], 1)

In [0]:
print('labels encoding for 0 1 2 3:', le.inverse_transform([0, 1, 2, 3]))

labels encoding for 0 1 2 3: ['A' 'N' 'O' '~']


In [0]:
def conv(i, filters=16, kernel_size=9, strides=1):
    i = keras.layers.Conv1D(filters=filters, kernel_size=kernel_size, strides=strides, padding='same')(i)#, activity_regularizer=keras.regularizers.l2(1e-6)
    i = keras.layers.BatchNormalization()(i)
    i = keras.layers.LeakyReLU()(i)
    i = keras.layers.SpatialDropout1D(0.1)(i)
    return i

def residual_unit(x, filters, layers=3):
    inp = x
    for i in range(layers):
        x = conv(x, filters)
    return keras.layers.add([x, inp])

def conv_block(x, filters, strides):
    x = conv(x, filters)
    #x = dense_residual_unit(x, filters)
    x = residual_unit(x, filters)
    if strides > 1:
        x = keras.layers.AveragePooling1D(strides, strides)(x)
    return x

def get_model():
    inp = keras.layers.Input(shape=(X.shape[1], 1), dtype=tf.float32)

    x = inp
    x = conv_block(x, 16, 4)
    x = conv_block(x, 16, 4)
    x = conv_block(x, 32, 4)
    x = conv_block(x, 32, 4)
    x = keras.layers.Masking(mask_value=0)(x)
    x = keras.layers.GRU(32, recurrent_dropout=0.1)(x)
    x = keras.layers.Dense(4, activation='softmax')(x)

    model = keras.models.Model(inp, x)
    return model

def make_dataset(X, y):
    data = tf.data.Dataset.from_tensor_slices((X, y))
    data = data.apply(tf.data.experimental.shuffle_and_repeat(buffer_size=len(y)))
    data = data.batch(batch_size, drop_remainder=False)
    data = data.prefetch(2)
    return data

In [0]:
folds = list(StratifiedKFold(n_splits=5, shuffle=True, random_state=1).split(X, y))

scores = []
for j, (train_idx, test_idx) in enumerate(folds):
    X_train, y_train, X_test, y_test = X[train_idx], y[train_idx], X[test_idx], y[test_idx]
    print('started {} fold'.format(j))
    K.clear_session()
    train_data = make_dataset(X_train, y_train)
    test_data = make_dataset(X_test, y_test)
    model = get_model()
    model.compile(loss='sparse_categorical_crossentropy', optimizer=keras.optimizers.Adam(clipnorm=1.), metrics=['accuracy'])

    h = model.fit(train_data,
              steps_per_epoch=len(y_train)//batch_size,
              validation_data=test_data,
              validation_steps=len(y_test)//batch_size,
              epochs=1000,
              verbose=0,
              class_weight=class_weights,
              callbacks=[
                keras.callbacks.TerminateOnNaN(),
                keras.callbacks.EarlyStopping(patience=50, verbose=1),
                keras.callbacks.ReduceLROnPlateau(factor=0.9, patience=2, verbose=0, min_lr=1e-6),
              ])
    model.save('final_model_{}.h5'.format(j))
    #copy('final_model_{}.h5'.format(j), 'drive/My Drive')
    y_pred = np.argmax(model.predict(X_test), axis=-1).reshape(-1, 1)
    print(np.equal(y_test, y_pred).mean())
    print(classification_report(y_test, y_pred))
    scores.append([np.equal(y_test, y_pred).mean(), f1_score(y_test, y_pred, average=None)])

started 0 fold
Epoch 00107: early stopping
0.8588166373755126
              precision    recall  f1-score   support

           0       0.75      0.80      0.78       152
           1       0.90      0.93      0.92      1016
           2       0.81      0.76      0.78       483
           3       0.69      0.61      0.65        56

    accuracy                           0.86      1707
   macro avg       0.79      0.77      0.78      1707
weighted avg       0.86      0.86      0.86      1707

started 1 fold
Epoch 00154: early stopping
0.8511137162954279
              precision    recall  f1-score   support

           0       0.77      0.86      0.81       152
           1       0.90      0.92      0.91      1015
           2       0.79      0.75      0.77       483
           3       0.62      0.50      0.55        56

    accuracy                           0.85      1706
   macro avg       0.77      0.76      0.76      1706
weighted avg       0.85      0.85      0.85      1706

starte

In [0]:
print('train set cross val metrics')
print('accuracy:', np.transpose(np.array(scores))[:1].mean())
print('F1 per class:', np.transpose(np.array(scores))[1:].mean())
print('F1 mean:', np.transpose(np.array(scores))[1:].mean()[:-1].mean())

train set cross val metrics
accuracy: 0.8597593078126808
F1 per class: [0.79611215 0.9169862  0.77750201 0.63253624]
F1 mean: 0.8302001192185081


In [0]:
data = pd.read_csv('validation/REFERENCE-v3.csv', header=None, names=['name', 'label'])
y_val = le.transform(data.label.values).reshape(-1, 1)
X_val = np.array([get_data('validation/{}.mat'.format(i)) for i in data.name.values])
X_val = keras.preprocessing.sequence.pad_sequences(X_val, maxlen=X.shape[1], dtype='float32')
X_val = X_val.reshape(-1, X_val.shape[1], 1)

y_val_pred = []
model = get_model()
for i in range(5):
    model.load_weights('final_model_{}.h5'.format(i))
    y_val_pred.append(model.predict(X_val))
y_val_pred = np.argmax(np.sum(np.array(y_val_pred), axis=0), axis=-1).reshape(-1, 1)
print('validation set metrics:')
print('accuracy:', np.equal(y_val, y_val_pred).mean())
print('F1 mean:', f1_score(y_val, y_val_pred, average=None)[:-1].mean())
print('classification report:\n', classification_report(y_val, y_val_pred))

validation set metrics:
accuracy: 0.9266666666666666
F1 mean: 0.9139330718278087
classification report:
               precision    recall  f1-score   support

           0       0.94      0.96      0.95        47
           1       0.95      0.95      0.95       148
           2       0.84      0.86      0.85        65
           3       1.00      0.93      0.96        40

    accuracy                           0.93       300
   macro avg       0.93      0.92      0.93       300
weighted avg       0.93      0.93      0.93       300

