# Classification Analysis for ECG Time-Series

> Copyright 2019 Dave Fernandes. All Rights Reserved.
> 
> Licensed under the Apache License, Version 2.0 (the "License");
> you may not use this file except in compliance with the License.
> You may obtain a copy of the License at
>
> http://www.apache.org/licenses/LICENSE-2.0
>  
> Unless required by applicable law or agreed to in writing, software
> distributed under the License is distributed on an "AS IS" BASIS,
> WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> See the License for the specific language governing permissions and
> limitations under the License.

## Overview
This notebook performs regression to predict sentiment and helpfulness scores from text reviews.
- Data for this analysis should be prepared using the `PreprocessECG.ipynb` notebook from this project.
- Original data is from: https://www.kaggle.com/coni57/model-from-arxiv-1805-00794

In [1]:
import numpy as np
import tensorflow as tf
import tensorflow.keras.layers as keras
import matplotlib.pyplot as plt
import pickle

tf.enable_eager_execution()

TRAIN_SET = './Data/train_balanced.pickle'
TEST_SET = './Data/test_set.pickle'

with open(TEST_SET, 'rb') as file:
    test_set = pickle.load(file)
    x_test = test_set['x']
    y_test = test_set['y']

with open(TRAIN_SET, 'rb') as file:
    train_set = pickle.load(file)
    x_train = train_set['x']
    y_train = train_set['y']

### Input functions for Estimator

In [2]:
def combined_dataset(features, labels):
    assert features.shape[0] == labels.shape[0]
    dataset = tf.data.Dataset.from_tensor_slices(({'time_series': features}, labels))
    return dataset

def class_for_element(features, labels):
    return labels

# For training
def train_input_fn():
    dataset = combined_dataset(x_train, y_train)
    return dataset.repeat().shuffle(500000).batch(200).prefetch(1)

# For evaluation and plotting predictions
def eval_input_fn():
    dataset = combined_dataset(x_test, y_test)
    return dataset.batch(1000).prefetch(1)

### Define the models
* The first model is taken from: https://arxiv.org/pdf/1805.00794.pdf

In [3]:
CNN_MODEL_DIR = './Models/CNN-Paper'

def conv_unit(unit, input_layer):
    s = '_' + str(unit)
    layer = keras.Conv1D(name='Conv1' + s, filters=32, kernel_size=5, strides=1, padding='same', activation='relu')(input_layer)
    layer = keras.Conv1D(name='Conv2' + s, filters=32, kernel_size=5, strides=1, padding='same', activation=None)(layer )
    layer = keras.Add(name='ResidualSum' + s)([layer, input_layer])
    layer = keras.Activation("relu", name='Act' + s)(layer)
    layer = keras.MaxPooling1D(name='MaxPool' + s, pool_size=5, strides=2)(layer)
    return layer

def conv_model(input_layer, mode, params):
    current_layer = keras.Conv1D(filters=32, kernel_size=5, strides=1)(input_layer)

    for i in range(5):
        current_layer = conv_unit(i + 1, current_layer)

    current_layer = keras.Flatten()(current_layer)
    current_layer = keras.Dense(32, name='FC1', activation='relu')(current_layer)
    current_layer = keras.Dense(32, name='FC2')(current_layer)
    logits = keras.Dense(5, name='Output')(current_layer)
    
    return logits

- RNN model

In [4]:
RNN_MODEL_DIR = './Models/GRU'
RNN_OUTPUT_UNITS = [128, 256]

def rnn_model(input_layer, mode, params):
    current_layer = tf.keras.layers.Masking(mask_value=0., input_shape=(187, 1), name='Masked')(input_layer)

    for i, size in enumerate(RNN_OUTPUT_UNITS):
        notLast = i + 1 < len(RNN_OUTPUT_UNITS)
        #current_layer = tf.keras.layers.LSTM(size, return_sequences=notLast, dropout=0.2, recurrent_dropout=0.0, name = 'LSTM' + str(i+1))(current_layer)
        current_layer = tf.keras.layers.GRU(size, return_sequences=notLast, dropout=0.2, recurrent_dropout=0.0, name = 'GRU' + str(i+1))(current_layer)
    
    current_layer = keras.Dense(64, name='Dense1', activation='relu')(current_layer)
    current_layer = keras.Dense(16, name='Dense2', activation='relu')(current_layer)
    logits = keras.Dense(5, name='Output', activation='relu')(current_layer)
    
    return logits

In [None]:
RNN_MODEL_DIR = './Models/RNN'
RNN_OUTPUT_UNITS = [64, 128]

def birnn_model(input_layer, mode, params):
    current_layer = tf.keras.layers.Masking(mask_value=0., input_shape=(187, 1), name='Masked')(input_layer)
    
    for i, size in enumerate(RNN_OUTPUT_UNITS):
        notLast = i + 1 < len(RNN_OUTPUT_UNITS)
        layer = tf.keras.layers.GRU(size, return_sequences=notLast, dropout=0.2, name = 'GRU' + str(i+1))
        current_layer = keras.Bidirectional(layer, name = 'BiRNN' + str(i+1))(current_layer)

    current_layer = keras.Dense(64, name='Dense1', activation='relu')(current_layer)
    current_layer = keras.Dense(16, name='Dense2', activation='relu')(current_layer)
    logits = keras.Dense(5, name='Output', activation='relu')(current_layer)
    
    return logits

### Estimator setup

In [None]:
# Initial learning rate
INITIAL_LEARNING_RATE = 0.001

# Learning rate decay per LR_DECAY_STEPS steps (1.0 = no decay)
LR_DECAY_RATE = 0.5

# Number of steps for LR to decay by LR_DECAY_RATE
LR_DECAY_STEPS = 4000

# Threshold for gradient clipping
GRADIENT_NORM_THRESH = 10.0

# Select model to train
MODEL_DIR = RNN_MODEL_DIR
MODEL_FN = birnn_model

def classifier_fn(features, labels, mode, params):
    is_training = mode == tf.estimator.ModeKeys.TRAIN
    input_layer = tf.feature_column.input_layer(features, params['feature_columns'])
    input_layer = tf.expand_dims(input_layer, -1)

    logits = MODEL_FN(input_layer, mode, params)

    # For prediction, exit here
    predicted_classes = tf.argmax(logits, 1)
    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            'class_ids': predicted_classes[:, tf.newaxis],
            'probabilities': tf.nn.softmax(logits),
            'logits': logits,
        }
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)

    # For training and evaluation, compute the loss (MSE)
    #labels = tf.reshape(labels, tf.shape(regression_layer))
    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)

    accuracy = tf.metrics.accuracy(labels=labels, predictions=predicted_classes, name='acc_op')
    """
    global_max_grad = None
    for grad in tf.gradients(loss, tf.trainable_variables()):
        max_grad = tf.reduce_max(grad)
        if global_max_grad is None:
            global_max_grad = max_grad
        else:
            global_max_grad = tf.maximum(global_max_grad, max_grad)
            
    max_gradient = tf.metrics.mean(tf.reshape(global_max_grad, [1, 1]), name='mean_maxgrad_op')
    #max_gradient = global_max_grad
    metrics = {'accuracy': accuracy, 'max_gradient': max_gradient}
    """
    metrics = {'accuracy': accuracy}
    tf.summary.scalar('accuracy', accuracy[1])
    #tf.summary.scalar('max_gradient', max_gradient)


    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics)

    # For training...
    global_step = tf.train.get_global_step()
    learning_rate = tf.train.exponential_decay(INITIAL_LEARNING_RATE, global_step, LR_DECAY_STEPS, LR_DECAY_RATE)

    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    #optimizer = tf.contrib.estimator.clip_gradients_by_norm(optimizer, GRADIENT_NORM_THRESH)
    
    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
    return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)

### Train model

In [None]:
feature_columns = [tf.feature_column.numeric_column('time_series', [187])]

regressor = tf.estimator.Estimator(
    model_fn=classifier_fn,
    model_dir=MODEL_DIR,
    params={
        'feature_columns': feature_columns,
    })

regressor.train(train_input_fn, steps=2000)

info = regressor.evaluate(input_fn=eval_input_fn)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': './Models/RNN', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0xb3d3ca748>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was final

### Compute metrics

In [None]:
import sklearn.metrics as skm

dataset_fn = eval_input_fn

predictions = regressor.predict(input_fn=dataset_fn)
y_predicted = []

for i, value in enumerate(predictions):
    y_predicted.append(value['class_ids'])
del predictions

y_test = np.reshape(y_test, (len(y_test), 1))

# Classification report
report = skm.classification_report(y_test, y_predicted)
print(report)

# Confusion matrix
conf = skm.confusion_matrix(y_test, y_predicted)
print(conf)