In [1]:
#%%
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
from keras.models import Sequential
from keras import layers
from datetime import datetime
from pathlib import Path
from scipy import signal
from os import walk
from keras.preprocessing import sequence
from scipy.io import wavfile
import glob
# from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

import tensorflow as tf

from multiprocessing.dummy import Pool as ThreadPool
import keras
pool=ThreadPool(12)


In [2]:
log_dir = "/tf/logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
#%tensorboard

In [3]:
# Load wavefile
def readwav(file:str):
    filepath = Path(file).absolute()
    samplerate, data = wavfile.read((filepath))
    # print(f"samplerate = {samplerate}")
    return data,samplerate

In [4]:
def getTiming(data:np.ndarray,samplerate:int):
    length = data.shape[0] / samplerate
    return np.arange(0,length,1/samplerate)



In [5]:
def filterSignal(data:np.ndarray,t:np.ndarray,plot:bool=True,length=None,filter=True):
    ## normalize input
    sig = data/np.amax(data)
    norm_heart = data/np.amax(data)
    sos = signal.butter(1, [.2,195], 'bp', fs=1000, output='sos')
    filtered_heart = signal.sosfilt(sos, sig)
    ## Removing noise
    noise_heart = signal.signaltools.wiener(filtered_heart,300)
    noise_heart = filtered_heart
    if(not filter):
        noise_heart = norm_heart
    if length:
        resampled,resampledt = signal.resample(noise_heart,33075,t=t)
    if plot:
        if length:
            # _, (ax1, ax2,ax3,ax4) = plt.subplots(4, 1, sharex=True)
            # ax4.plot(resampledt, resampled)
            # ax4.set_title('After Resampling')
            # ax4.set_xlabel('Time [seconds]')
            _, (ax1, ax2) = plt.subplots(2, 1, sharex=True)
            ax1.plot(t, sig)
            ax1.set_title('Original Heart Rate Signal')
            ax2.plot(resampledt, resampled)
            ax2.set_title('After Resampling')
            ax2.set_xlabel('Time [seconds]')
        
        #     _, (ax1, ax2,ax3) = plt.subplots(3, 1, sharex=True)
        # ax1.plot(t, sig)
        # ax1.set_title('Original Heart Rate Signal')
        # ax2.plot(t, norm_heart)
        # ax2.set_title('After Bandpass filter')
        # ax3.plot(t, noise_heart)
        # ax3.set_title('After Noise Filter')
        # ax3.set_xlabel('Time [seconds]')

        # plt.tight_layout()
    plt.show()
    return (resampled,resampledt) if length else (noise_heart,t)
    

In [6]:
lengths = []
def generateSignal(file:str,plot:bool=False,loglevel:str=None):
    data,samplerate = readwav(file)
    length = data.shape[0] / samplerate
    lengths.append(length)
    t = getTiming(data,samplerate)

    sig,t = filterSignal(data,t,plot,length)
    return t,sig,samplerate

In [7]:
trainingFolder = "./heartbeats/classifications"
trainingpath = Path(trainingFolder)
paths = [Path(dir[0]) for dir in walk(trainingpath)][1:]


In [8]:
train_data = []
# test_data = []
plot = False
def processFiles(indexedWave,classification,trainIndex):
    wav,index = indexedWave

    t,d,_ = generateSignal(wav,plot)
    # if(index < trainIndex):
    train_data.append([d,t,classification])
    # else:
        # test_data.append([d,t,classification])


def get_training_data(path:Path):   
    classification = path.name
    wavList = glob.glob(str(path.joinpath("*.wav")))
    trainIndex=int(math.ceil(len(wavList)*.8)) # use 80% of data for training
    pool.map(lambda x: processFiles(x,classification,trainIndex),zip(wavList,range(0,len(wavList))) )
    # for wav in zip(wavList,range(0,len(wavList))):
    #     processFiles(wav,classification,trainIndex)




In [9]:
# Load Data 
#Loading data from this many files is intensive, speeding up w/ multithreading

for path in paths:
    get_training_data(path)
#train_data=np.array(train_data)
#test_data=np.array(test_data)



In [10]:
sig,time,classification = zip(*train_data)
getMaxLength = lambda list: max([len(item) for item in list])
max_length = getMaxLength(sig)


In [11]:
df = pd.DataFrame(train_data,columns=["bpm","ibi","sdnn","sdsd","rmssd","pnn20","pnn50","hr_mad","sd1","sd2","s","sdr","breathingrate","classification"])
# print(df[["signal","time"]].values)

# y = df.iloc
# sequence.pad_sequences(df[["signal","time"]].values,maxlen=max_length,dtype="float64")
# print(df.iloc[2])
df = pd.get_dummies(df,columns=["classification"])


In [12]:
y = df.iloc[:,2:].values
x = np.dstack((xs,xt))
# x = xs
x.shape

(176, 33075, 2)

In [13]:
# Feature extraction
n_steps, n_length = 15, 2205
x = x.reshape((x.shape[0], n_steps,1, n_length, 2))
# x = x.reshape((x.shape[0], n_steps, n_length, 2))

x.shape


(176, 15, 1, 2205, 2)

In [14]:
print(x.shape)
# y=tf.keras.utils.to_categorical(np.array(y),num_classes=5)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1)

(176, 15, 1, 2205, 2)


In [15]:
# define model
# strategy = tf.distribute.MirroredStrategy()
# with strategy.scope():
model = Sequential()
model.add(layers.ConvLSTM2D(filters=32, kernel_size=(1,3), activation='relu', input_shape=(n_steps, 1, n_length, 2)))
model.add(layers.BatchNormalization())
# model.add(layers.Conv2D(filters=32, kernel_size=(1,3), activation='relu', input_shape=(n_steps, 1, n_length, 2)))
model.add(layers.MaxPooling2D((1, 2)))
model.add(layers.Dropout(.2))

model.add(layers.Conv2D(filters=32, kernel_size=(1,3), activation='relu', input_shape=(n_steps, 1, n_length, 2)))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(filters=32, kernel_size=(1,3), activation='relu', input_shape=(n_steps, 1, n_length, 2)))
model.add(layers.MaxPooling2D((1, 2)))
model.add(layers.Dropout(.2))

model.add(layers.Conv2D(filters=32, kernel_size=(1,3), activation='relu', input_shape=(n_steps, 1, n_length, 2)))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(filters=32, kernel_size=(1,3), activation='relu', input_shape=(n_steps, 1, n_length, 2)))
model.add(layers.MaxPooling2D((1, 2)))
model.add(layers.Dropout(.2))
# model.add(Conv1D(filters=16, kernel_size=4, activation='relu'))

# model.add(ConvLSTM2D(filters=16, kernel_size=(1,3), activation='relu'))

model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(5, activation='softmax'))
model.build(np.shape(x_train))
print(model.summary())
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv_lstm2d (ConvLSTM2D)    (None, 1, 2203, 32)       13184     
                                                                 
 batch_normalization (BatchN  (None, 1, 2203, 32)      128       
 ormalization)                                                   
                                                                 
 max_pooling2d (MaxPooling2D  (None, 1, 1101, 32)      0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 1, 1101, 32)       0         
                                                                 
 conv2d (Conv2D)             (None, 1, 1099, 32)       3104      
                                                                 
 batch_normalization_1 (Batc  (None, 1, 1099, 32)      1

In [16]:
# # define model
# strategy = tf.distribute.MirroredStrategy()
# with strategy.scope():
#     model = Sequential()
#     # model.add(ConvLSTM2D(filters=32, kernel_size=(1,3), activation='relu', input_shape=(n_steps, 1, n_length, 2)))
#     model.add(Conv1D(filters=16, kernel_size=4, activation='relu'))
#     model.add(LSTM(100))

#     # # model.add(ConvLSTM2D(filters=16, kernel_size=(1,3), activation='relu'))

#     # model.add(Dropout(0.5))
#     model.add(Flatten())
#     model.add(Dense(100, activation='relu'))
#     model.add(Dense(5, activation='softmax'))
#     model.build(np.shape(x_train))
#     print(model.summary())
#     model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [17]:
model.fit(x_train, y_train, epochs=10, batch_size=1)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f01fc1f77f0>

In [18]:
scores = model.evaluate(x_test, y_test, verbose=0)
print(scores)
m, s = np.mean(scores), np.std(scores)
print('Accuracy: %.3f%% (+/-%.3f)' % (m, s))

[12.008731842041016, 0.3333333432674408]
Accuracy: 6.171% (+/-5.838)


In [19]:
predictions = model.predict(x_test)