# Keras RNN Sequence Classification 

This data set is made available by MIT. It contains data about 9,026 heartbeat measurements. Each row represents a single measurement (captured on a timeline). There are a total of 187 data points (columns) for each measurement. Multiclass classification task: predict whether the measurement represents a normal heartbeat or other anomalies. 

## Goal

Use the data set **hearbeat.csv** to predict the column called **Target**. The input variables are columns labeled as **T1 to T187**. 

# Read and Prepare the Data

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd

data = pd.read_csv("heartbeat.csv")

data.shape

(9026, 188)

In [2]:
data.head()

Unnamed: 0,T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,...,T179,T180,T181,T182,T183,T184,T185,T186,T187,Target
0,0.987,0.892,0.461,0.113,0.149,0.19,0.165,0.162,0.147,0.138,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.0,0.918,0.621,0.133,0.105,0.125,0.117,0.0898,0.0703,0.0781,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.0,0.751,0.143,0.104,0.0961,0.0519,0.0442,0.0416,0.0364,0.0857,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.0,0.74,0.235,0.0464,0.0722,0.0567,0.0103,0.0155,0.0284,0.0155,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.0,0.626,0.276,0.325,0.431,0.39,0.394,0.358,0.374,0.362,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Separate Target

In [3]:
y = data['Target']
x = data.drop('Target', axis=1)

### Split into Train/Test

In [4]:
from sklearn.model_selection import train_test_split
train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.3)

### Determine Baseline Accuracy

In [5]:
# Find the majority class:
train_y.value_counts()

0.0    3655
4.0    1140
2.0    1021
1.0     391
3.0     111
Name: Target, dtype: int64

In [6]:
#Find the percentage of the majority class:
train_y.value_counts()/len(train_y)

0.0    0.578506
4.0    0.180437
2.0    0.161602
1.0    0.061887
3.0    0.017569
Name: Target, dtype: float64

**The baseline accuracy is 57.99% predicting 0 (a Normal heartbeat)**

### Data Transformation

In [7]:
#Convert target to array
train_y = np.array(train_y)
test_y = np.array(test_y)

#Convert target to integers
train_y = train_y.astype(np.int32)
test_y = test_y.astype(np.int32)

train_y[0:10]

array([0, 2, 4, 0, 1, 0, 2, 0, 2, 2])

In [8]:
#Convert input to array
train_x= np.array(train_x)
test_x= np.array(test_x)

#Convert input to float
train_x = train_x.astype(np.float32)
test_x = test_x.astype(np.float32)

train_x[0:10]

array([[1.    , 0.96  , 0.568 , ..., 0.    , 0.    , 0.    ],
       [0.    , 0.0216, 0.0711, ..., 0.    , 0.    , 0.    ],
       [0.693 , 0.592 , 0.483 , ..., 0.    , 0.    , 0.    ],
       ...,
       [1.    , 0.607 , 0.217 , ..., 0.    , 0.    , 0.    ],
       [0.823 , 0.799 , 0.487 , ..., 0.    , 0.    , 0.    ],
       [0.881 , 0.937 , 0.877 , ..., 0.    , 0.    , 0.    ]],
      dtype=float32)

In [9]:
# Add a third dimension to the input arrays
train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1))
test_x = np.reshape(test_x, (test_x.shape[0], test_x.shape[1], 1))

In [10]:
train_x.shape, train_y.shape

((6318, 187, 1), (6318,))

# LSTM Model

Note:

To account for the zero-padding this goes at the input layer:<br>
`tf.keras.layers.Masking(mask_value=0, input_shape=[n_steps, n_inputs])`


In [11]:
np.random.seed(42)
tf.random.set_seed(42)

#define steps and inputs
n_steps = 187
n_inputs = 1

#define model - 3 LSTM layers and a Dense layer
model = keras.models.Sequential([
    
    tf.keras.layers.Masking(mask_value=0, input_shape=[n_steps, n_inputs]),
    keras.layers.LSTM(10, return_sequences=True),
    keras.layers.LSTM(10, return_sequences=True),
    keras.layers.LSTM(10),
    keras.layers.Dense(5, activation='softmax')
])

In [12]:
from tensorflow.keras.callbacks import EarlyStopping

#define early-stopping parameters
earlystop = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto')

callback = [earlystop]

In [13]:
#define optimizer and learning rate
optimizer = keras.optimizers.Nadam(lr=0.001)

#compile the model
model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=['accuracy'])

#fit the model
history = model.fit(train_x, train_y, epochs=20,
                   validation_data = (test_x, test_y), callbacks=callback)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [14]:
# evaluate the model
scores = model.evaluate(test_x, test_y, verbose=0)
scores

# first is loss, second is accuracy

[0.6120646595954895, 0.8079763650894165]

In [15]:
# extract the accuracy from model.evaluate
print("%s: %.2f" % (model.metrics_names[0], scores[0]))
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

loss: 0.61
accuracy: 80.80%


# GRU Model

In [16]:
#define the model - 4 GRU layers
model = keras.models.Sequential([
    
    tf.keras.layers.Masking(mask_value=0, input_shape=[n_steps, n_inputs]),
    keras.layers.GRU(20, return_sequences=True),
    keras.layers.GRU(20, return_sequences=True),
    keras.layers.GRU(20, return_sequences=True),
    keras.layers.GRU(5, activation='softmax')
])

In [17]:
#define the optimizer and learning rate
optimizer = keras.optimizers.Nadam(lr=0.001)

#compile the  model
model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=['accuracy'])

#fit the model
history = model.fit(train_x, train_y, epochs=20,
                   validation_data = (test_x, test_y), callbacks=callback)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [18]:
# evaluate the model
scores = model.evaluate(test_x, test_y, verbose=0)
scores

# first is loss, second is accuracy

[0.4070214629173279, 0.8655834794044495]

In [19]:
# extract the accuracy from model.evaluate
print("%s: %.2f" % (model.metrics_names[0], scores[0]))
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

loss: 0.41
accuracy: 86.56%
