# ECG Dataset

---

1. Import Main Packages
2. Loading Data
3. First View on the Data
4. EDA
5. Data Processing
6. Base Model
7. Feature Selection
8. Hyperparameter Tuning
9. Test Final mMdel on the Testdata
10. Discussion of further improvements

---

## 1. Import Main Packages

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Shows plots in jupyter notebook
%matplotlib inline

## 2. Loading Data

In [None]:
df = pd.read_csv("D:\Programming\Python\Kaggle\ECG\mitbih_train.csv\mitbih_train.csv", header = None)

In [None]:
df.head()

## First View on the Data

In [None]:
df.info()

In [None]:
df.describe()

Check for null values.

In [None]:
df.isnull().values.sum()

Check the number of cases for each class.

In [None]:
df[187].value_counts()

## EDA

In [None]:
plt.figure(figsize=(7,7))
plt.pie(df[187].value_counts(), labels = ["Normal","Unknown","Ventricular","Supraventricular","Fusion"],colors= ['blue','purple','green','red','yellow'],autopct='%3.1f%%')

In [None]:
color = ['blue','red','green','yellow','purple']
label = ["Normal","Supraventricular","Ventricular",
         "Fusion","Unknown"]

Showing one case for each class in comparison.

In [None]:
fig, ax = plt.subplots(6, figsize=(10, 10))

for i in range(5):
    t = sns.lineplot((df[df[187] == i].iloc[0])[:-1], label=label[i], color=color[i], ax=ax[i])
    t = sns.lineplot((df[df[187] == i].iloc[0])[:-1], label=label[i], color=color[i], ax=ax[5])


The last 50 features are always 0 and may could be dropped.

Here 100 cases for each class is presented together to show the variance in each class.

In [None]:
fig, ax = plt.subplots(5,  sharex=True, sharey=True,figsize=(10,10))
for j in range(5):
    for i in range(100):
        t = ax[j].plot((df[df[187] == j].iloc[i])[:-1], color=color[j], alpha = 0.1)
    ax[j].title.set_text(label[j]) 
  


It shows that at least for the first 3 classes, the last 50 features do hold value.

## Data Processing

In [None]:
df.shape

Drop duplicates.

In [None]:
df.drop_duplicates(keep=False, inplace=True)
df.shape

Divide the trainingsdata into trainins and validationdata that have the sae distribution of classes.

In [None]:
from sklearn.model_selection import StratifiedShuffleSplit
split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
for train_index, valid_index in split.split(df, df[187]):
    X = df.iloc[train_index]
    X_valid = df.iloc[valid_index]

In [None]:
X[187].value_counts() / len(X)

In [None]:
X_valid[187].value_counts() / len(X_valid)

Division of features and labels.

In [None]:
y = X[187]
y_valid = X_valid[187]
X.pop(187)
X_valid.pop(187)


In [None]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import f_classif

Scaling of the data.

In [None]:
scaler=StandardScaler()

In [None]:
X = scaler.fit_transform(X)
X = pd.DataFrame(X)
X_valid = scaler.transform(X_valid)
X_valid = pd.DataFrame(X_valid)


## Base Model

The base model will give a first impression on the accuracy that can be expected regarding this dataset.

In [None]:
import tensorflow 
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Flatten
from keras import optimizers
import math


model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=X.shape[1:]))
model.add(keras.layers.Dense(50,
                                 kernel_initializer="lecun_normal",
                                 activation="selu"))
model.add(keras.layers.Dense(50,
                                 kernel_initializer="lecun_normal",
                                 activation="selu"))

model.add(keras.layers.Dense(5, activation="softmax"))

optimizer=keras.optimizers.SGD(learning_rate=1e-2, momentum=0.9)
model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])


In [None]:

import keras.callbacks
early_stopping_cb = keras.callbacks.EarlyStopping(patience=10,
                                                  restore_best_weights=True)
#onecycle = OneCycleScheduler(math.ceil(len(X) / 32) * 32, max_rate=0.05)


history=model.fit(X, y, epochs=100,
          validation_data=(X_valid,y_valid),
          callbacks = early_stopping_cb, batch_size=32)# class_weight=class_weights)

In [None]:
predictions = model.predict(X_valid)

In [None]:
predictions=np.argmax(predictions, axis=1)
predictions

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_valid, predictions))

In [None]:
model.evaluate(X_valid, y_valid)

The accuracy is already quite good with 97.57% but can be improved with feature selection andParametertuning.

## Feature Selection

For Feature Selection i use a PCA that keeps 97% of the variance in the dataset.

In [None]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.decomposition import PCA

class PCA_97_Selector(BaseEstimator, TransformerMixin):
    
    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X_reduced = pca_fit.transform(X)
        X=pd.DataFrame(X_reduced)
        return X

In [None]:
pca = PCA(n_components=0.97)
pca_fit = pca.fit(X)

In [None]:
pipeline =  Pipeline([
        ("scaler", StandardScaler()),
        ("PCA", PCA_97_Selector())
])

In [None]:
X = pipeline.fit_transform(X)
X = pd.DataFrame(X)
X_valid = pipeline.fit_transform(X_valid)
X_valid = pd.DataFrame(X_valid)


In [None]:
X.shape

The number of features were reduced from 187 to only 52.
Lets try these new features! Hopefully our accuracy did not decrease.

In [None]:
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=X.shape[1:]))
model.add(keras.layers.Dense(50,
                                 kernel_initializer="lecun_normal",
                                 activation="selu"))
model.add(keras.layers.Dense(50,
                                 kernel_initializer="lecun_normal",
                                 activation="selu"))

model.add(keras.layers.Dense(5, activation="softmax"))

optimizer=keras.optimizers.SGD(learning_rate=1e-2, momentum=0.9)
model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])

In [None]:
history=model.fit(X, y, epochs=100,
          validation_data=(X_valid,y_valid),
          callbacks = early_stopping_cb, batch_size=32)

In [None]:
predictions = model.predict(X_valid)
predictions=np.argmax(predictions, axis=1)


In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_valid, predictions))

In [None]:
model.evaluate(X_valid, y_valid)

The accuracy did improve but with Paranetertuning it could improve even more.

## Hyperparameter Tuning

For the tuning i use the predefined validationset from before instead of crossvalidation because of limited processing power.

In [None]:
from sklearn.model_selection import  PredefinedSplit
split_index = [-1]*len(X)+[0]*len(X_valid)
X_split = pd.concat([X, X_valid], axis=0)
Y_split = pd.concat([y, y_valid], axis=0)
pds = PredefinedSplit(test_fold=split_index)
print(pd.DataFrame(split_index).value_counts())

First i find the good architecture (number of layers and neurons) using a grid search.

In [None]:
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import GridSearchCV
keras.backend.clear_session()
def create_model(neurons, layers):
 # create model
   model = Sequential()
   model.add(keras.layers.Flatten(input_shape=X.shape[1:]))
   for i in range(layers):
    model.add(keras.layers.Dense(neurons,
                                 kernel_initializer="lecun_normal",
                                 activation="selu"))

   model.add(keras.layers.Dense(5, activation="softmax"))
   optimizer=keras.optimizers.SGD(learning_rate=1e-2, momentum=0.9)
   model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
   return model
 
model = KerasClassifier(model=create_model, epochs=20, verbose=0)
neurons = [50, 75, 100, 150]
layers = [2, 3, 4]

param_grid = dict(model__neurons=neurons, model__layers=layers)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1, cv=3)
grid_result = grid.fit(X_split, Y_split, callbacks = early_stopping_cb, batch_size = 32)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Now i find the right learning rate schedule using randomized search because here are more variables and random search is more efficient in these cases.

In [None]:
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV
from tensorflow import keras
from tensorflow.keras import optimizers
from tensorflow.keras.optimizers import schedules
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from keras.optimizers import schedules
keras.backend.clear_session()
def create_model(learning_rate, momentum, decay, nesterov):
 # create model
   model = Sequential()
   model.add(keras.layers.Flatten(input_shape=X.shape[1:]))
   for i in range(3):
    model.add(keras.layers.Dense(150,
                                 kernel_initializer="lecun_normal",
                                 activation="selu"))

   model.add(keras.layers.Dense(5, activation="softmax"))
   s = 20 * len(X) // 32
   learning_schedule = keras.optimizers.schedules.ExponentialDecay(learning_rate, s, decay)
   optimizer=keras.optimizers.SGD(learning_rate=learning_schedule, momentum=momentum, nesterov=nesterov)
   model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
   return model
 
model = KerasClassifier(model=create_model, epochs=25, verbose=0)
nesterov=[True, False]
learning_rate = [1e-2, 5e-3, 1e-3]
decay =[0.1, 5e-2, 1e-2]
momentum = [0.8, 0.9, 0.95]

param_grid = dict(model__learning_rate=learning_rate, model__momentum=momentum, model__decay=decay, model__nesterov=nesterov)
grid = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_jobs=1, cv=3, n_iter=15)
grid_result = grid.fit(X_split, Y_split, callbacks = early_stopping_cb, batch_size =32)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

loss function, batchsize and activation function will stay the same for now but mets try some alpha dropout.

In [None]:

model_alpha = keras.models.Sequential()
model_alpha.add(keras.layers.Flatten(input_shape=X.shape[1:]))
for i in range(3):
    model_alpha.add(keras.layers.Dense(150,
                                 kernel_initializer="lecun_normal",
                                 activation="selu"))
    model_alpha.add(keras.layers.AlphaDropout(rate=0.1))

model_alpha.add(keras.layers.Dense(5, activation="softmax"))
s = 20 * len(X) // 32
learning_schedule = keras.optimizers.schedules.ExponentialDecay(0.01, s, 0.1)
optimizer=keras.optimizers.SGD(learning_rate=learning_schedule, momentum=0.95, nesterov=False)

model_alpha.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])


In [None]:
history=model_alpha.fit(X, y, epochs=100,
          validation_data=(X_valid,y_valid),
          callbacks = early_stopping_cb, batch_size=32)

In [None]:
predictions = model_alpha.predict(X_valid)
predictions=np.argmax(predictions, axis=1)
print(classification_report(y_valid, predictions))
model_alpha.evaluate(X_valid, y_valid)

In [None]:

model_batch = keras.models.Sequential()
model_batch.add(keras.layers.Flatten(input_shape=X.shape[1:]))
model_batch.add(keras.layers.BatchNormalization())
for i in range(3):
    model_batch.add(keras.layers.Dense(150,
                                 kernel_initializer="he_normal",
                                 activation="elu"))
    model_batch.add(keras.layers.BatchNormalization())

model_batch.add(keras.layers.Dense(5, activation="softmax"))
s = 20 * len(X) // 32
learning_schedule = keras.optimizers.schedules.ExponentialDecay(0.01, s, 0.1)
optimizer=keras.optimizers.SGD(learning_rate=learning_schedule, momentum=0.95, nesterov=False)

model_batch.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])


In [None]:
history=model_batch.fit(X, y, epochs=100,
          validation_data=(X_valid,y_valid),
          callbacks = early_stopping_cb, batch_size=32)

In [None]:
predictions = model_batch.predict(X_valid)
predictions=np.argmax(predictions, axis=1)
print(classification_report(y_valid, predictions))
model_batch.evaluate(X_valid, y_valid)

In [None]:
keras.backend.clear_session()
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=X.shape[1:]))
for i in range(3):
    model.add(keras.layers.Dense(150,
                                 kernel_initializer="lecun_normal",
                                 activation="selu"))


model.add(keras.layers.Dense(5, activation="softmax"))
s = 20 * len(X) // 32
learning_schedule = keras.optimizers.schedules.ExponentialDecay(0.01, s, 0.1)
optimizer=keras.optimizers.SGD(learning_rate=learning_schedule, momentum=0.95, nesterov=False)

model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])

In [None]:
history=model.fit(X, y, epochs=30, batch_size=32, callbacks = early_stopping_cb, validation_data=(X_valid,y_valid))

In [None]:
df_test = pd.read_csv("D:\Programming\Python\Kaggle\ECG\mitbih_test.csv\mitbih_test.csv", header = None)

In [None]:
df_test.head()

In [None]:
y_test = df_test[187]

X_test = df_test.drop(columns=[187])
X_test

In [None]:
X_test = pipeline.fit_transform(X_test)

In [None]:
X_test

In [None]:
predictions = model.predict(X_test)
predictions=np.argmax(predictions, axis=1)
print(classification_report(y_test, predictions))
model.evaluate(X_test, y_test)

In [None]:
predictions = model.predict(X_test)
predictions