In [82]:
import csv
import os

import biosppy.signals.ecg as ecg
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


from sklearn.model_selection import train_test_split 
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score

We define a function which takes the signals and extracts relevant features:

In [None]:

def get_features(sample_data):  
    signal_data = []
    for i in range(len(sample_data)):   
            signal = sample_data[i]

            #initialize variables 
            mean_dist, median_dist, var_dist, max_dist, min_dist = -1000, -1000, -1000, -1000, -1000
            mean_r_amp, median_r_amp, var_r_amp, min_r_amp, max_r_amp, = -1000, -1000, -1000, -1000, -1000
            mean_max, median_max, var_max, min_max, max_max = -1000, -1000, -1000, -1000, -1000
            mean_min, median_min, var_min, min_min, max_min = -1000, -1000, -1000, -1000, -1000
            min_var, max_var, mean_var, median_var, var_var = -1000, -1000, -1000, -1000, -1000


            r_peaks = ecg.engzee_segmenter(signal, sampling_rate=200)['rpeaks']
            num_peaks = len(r_peaks)
            min_sig = np.min(signal)
            max_sig = np.max(signal)
            var_sig = np.std(signal)
            mean_sig = np.mean(signal)
            median_sig = np.median(signal)

        
            if(num_peaks>1):
                #info  on distance between r peaks
                dist = np.diff(r_peaks)
                mean_dist = np.mean(dist)
                median_dist = np.median(dist)
                var_dist = np.std(dist)
                max_dist = np.max(dist)
                min_dist = np.min(dist)

                #info about r peak amplitude
                signal_at_r_peaks = signal[r_peaks]
                mean_r_amp = np.mean(signal_at_r_peaks)
                median_r_amp = np.median(signal_at_r_peaks)
                var_r_amp = np.std(signal_at_r_peaks)
                min_r_amp = np.min(signal_at_r_peaks)
                max_r_amp = np.max(signal_at_r_peaks)

            #seperate signal into heartbeats
            beats = ecg.extract_heartbeats(signal, r_peaks, 200)['templates']
            if(len(beats>2)):
                # info about the max of each beat, over all beats of the signal (should be equal ro r peaks)
                max_beats = np.max(beats, axis=1)
                mean_max = np.mean(max_beats)
                median_max = np.median(max_beats)
                var_max = np.std(max_beats)
                min_max = np.min(max_beats)
                max_max = np.min(max_beats)

                # info about the minimum of each beat, over all beats of the signal
                min_beats = np.min(beats, axis=1)
                mean_min = np.mean(min_beats)
                median_min = np.median(min_beats)
                var_min = np.std(min_beats)
                min_min = np.min(min_beats)
                max_min = np.min(min_beats)

            #info bout the variance between beats
                var_beats = np.std(beats, axis=0)
                min_var = np.min(var_beats)
                max_var = np.max(var_beats)
                mean_var = np.mean(var_beats)
                median_var = np.median(var_beats)
                var_var = np.std(var_beats)

            temp =  [
                num_peaks, 
                min_sig, max_sig, var_sig, mean_sig, median_sig,
                mean_dist, median_dist, var_dist, max_dist, min_dist,
                mean_r_amp, median_r_amp, var_r_amp, min_r_amp, max_r_amp,
                mean_max, median_max, var_max, min_max, max_max,
                mean_min, median_min, var_min, min_min, max_min,
                min_var, max_var, mean_var, median_var, var_var
            ]
            signal_data.append(temp)
            if i%200 == 0: print(i)
            if i%1000 == 0: print(temp)
    return signal_data



We Read in the training and test data. We crop the Signals NaN values and seperate the labels for the training set

In [84]:
#load data from csv
data_train = pd.read_csv('data/train.csv', index_col='id')
data_test = pd.read_csv('data/test.csv', index_col='id')
# labels array and NaN cropped data
labels_array = data_train['y'].to_numpy()

sample_data = []
for i in range(len(data_train)):
    sample_data.append( data_train.loc[i].dropna().to_numpy(dtype='float32'))
    if i%1000 == 0: print(len(sample_data[i]))

test_data = []
for i in range(len(data_test)):
    test_data.append( data_train.loc[i].dropna().to_numpy(dtype='float32'))
    if i%1000 == 0: print(len(test_data[i]))

16323
8781
10004
8654
8207
16545
16323
8781
10004
8654


In [None]:
X = get_features(sample_data)
X_test = get_features(test_data)

X_train, X_val, y_train, y_val = train_test_split(X, labels_array, test_size=0.1, random_state=42)


In [144]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.metrics import AUC
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler

# Convert labels to categorical one-hot encoding
y_train_categorical = to_categorical(y_train)
y_val_categorical = to_categorical(y_val)

X_train = np.array(X_train)
X_val = np.array(X_val)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_val = scaler.transform(X_val)

# Define the model
model = Sequential()
model.add(Dense(64, input_dim=len(X_train[0]), activation='relu'))
model.add(Dense(128, input_dim=len(X_train[0]), activation='sigmoid'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='sigmoid'))
model.add(Dense(16, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(y_train_categorical.shape[1], activation='softmax'))

# Compile the model
# Adjust the learning rate
learning_rate = 0.0005
optimizer = Adam(learning_rate=learning_rate)

model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy', AUC(name='f1_score')])

# Train the model
model.fit(X_train, y_train_categorical, epochs=100, batch_size=64)

# Evaluate the model
y_pred_train = model.predict(X_train)
y_pred_train_classes = np.argmax(y_pred_train, axis=1)
f1_train = f1_score(y_train, y_pred_train_classes, average='micro')

y_pred_val = model.predict(X_val)
y_pred_val_classes = np.argmax(y_pred_val, axis=1)
f1_val = f1_score(y_val, y_pred_val_classes, average='micro')
print('f1 train score: ', f1_train)
print('f1 val score: ', f1_val)
print(f'Validation Loss: {val_loss}')
print(f'Validation Accuracy: {val_accuracy}')

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.1644 - f1_score: 0.3168 - loss: 1.5510
Epoch 2/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.3512 - f1_score: 0.6684 - loss: 1.3682
Epoch 3/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5830 - f1_score: 0.8044 - loss: 1.3395
Epoch 4/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5899 - f1_score: 0.8051 - loss: 1.3193
Epoch 5/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5992 - f1_score: 0.8112 - loss: 1.2982
Epoch 6/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5934 - f1_score: 0.8115 - loss: 1.2810
Epoch 7/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5976 - f1_score: 0.8163 - loss: 1.2619
Epoch 8/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━

In [139]:
labels_categorical = to_categorical(labels_array)
X = np.array(X)

#model.fit(X, labels_categorical, epochs=50, batch_size=64)
y_pred_test = model.predict(np.array(X_test))
y_pred_test_classes = np.argmax(y_pred_test, axis=1)

table = pd.DataFrame({'id': np.arange(0, y_pred_test_classes.shape[0]), 'y': y_pred_test_classes.flatten()})
table.to_csv("./data/y_test_pred.csv", index=False)

[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 811us/step
