In [19]:
#%%
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import chi2_contingency
import math
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten,Dropout,Conv2D,LSTM, LeakyReLU, MaxPooling2D
from datetime import datetime
from pathlib import Path
from scipy import signal
from os import walk
from keras.preprocessing import sequence
from scikeras.wrappers import KerasClassifier
from scipy.io import wavfile
import glob
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

import tensorflow as tf

from os.path import basename
from multiprocessing.dummy import Pool as ThreadPool

pool=ThreadPool(24)


In [2]:
log_dir = "/tf/logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
#%tensorboard

UsageError: Line magic function `%tensorboard` not found.


In [3]:
# Load wavefile
def readwav(file:str):
    filepath = Path(file).absolute()
    samplerate, data = wavfile.read((filepath))
    # print(f"samplerate = {samplerate}")
    return data,samplerate

In [4]:
def getTiming(data:np.ndarray,samplerate:int):
    length = data.shape[0] / samplerate
    return np.arange(0,length,1/samplerate)



In [5]:
def filterSignal(data:np.ndarray,t:np.ndarray,plot:bool=True):
    ## normalize input
    sig = data/np.amax(data)
    sos = signal.butter(1, [.2,3], 'bp', fs=1000, output='sos')
    filtered_heart = signal.sosfilt(sos, sig)
    norm_heart = filtered_heart/np.amax(filtered_heart)
    ## Removing noise
    norm_heart = signal.signaltools.wiener(filtered_heart,300)
    if plot:
        _, (ax1, ax2,ax3) = plt.subplots(3, 1, sharex=True)
        ax1.plot(t, sig)
        ax1.set_title('Original Heart Rate Signal')
        ax2.plot(t, norm_heart)
        ax2.set_title('After Bandpass filter')
        ax3.plot(t, norm_heart)
        ax3.set_title('After Noise Filter')
        ax3.set_xlabel('Time [seconds]')
        plt.tight_layout()
        plt.show()
    return norm_heart
    

In [6]:
def generateSignal(file:str,plot:bool=False,loglevel:str=None):
    data,samplerate = readwav(file)
    t = getTiming(data,samplerate)
    return t,filterSignal(data,t,plot),samplerate

In [7]:
trainingFolder = "./heartbeats/classifications"
trainingpath = Path(trainingFolder)
paths = [Path(dir[0]) for dir in walk(trainingpath)][1:]


In [8]:
train_data = []
test_data = []
plot = False
def processFiles(indexedWave,classification,trainIndex):
    wav,index = indexedWave
    t,d,_ = generateSignal(wav,plot)
    if(index < trainIndex):
        train_data.append([d,t,classification])
    else:
        test_data.append([d,t,classification])


def get_training_data(path:Path):   
    classification = path.name
    wavList = glob.glob(str(path.joinpath("*.wav")))
    trainIndex=int(math.ceil(len(wavList)*.8)) # use 80% of data for training
    pool.map(lambda x: processFiles(x,classification,trainIndex),zip(wavList,range(0,len(wavList))) )
    # for wav in zip(wavList,range(0,len(wavList))):
    #     processFiles(wav,classification,trainIndex)




In [9]:
# Load Data 
#Loading data from this many files is intensive, speeding up w/ multithreading

for path in paths:
    get_training_data(path)
train_data=np.array(train_data)
test_data=np.array(test_data)



  train_data=np.array(train_data)
  test_data=np.array(test_data)


In [44]:
def padData(series:pd.Series):
    maxLength=0
    for _,value in series.items():
        maxLength=max(maxLength,len(value))
    # for index,value in series.items():
    # print(series)
    # series[index]=sequence.pad_sequences(series[index],maxlen=maxLength,dtype='float64')
    # print(maxLength)
    # print(series)
    mod = sequence.pad_sequences(series,maxlen=maxLength,padding='post',dtype='float64',value=0).tolist()
    # print(mod)
    return mod


In [45]:
train_set = pd.DataFrame(train_data,columns=["signal","timing","classification"])
train_set = pd.get_dummies(train_set,columns=["classification"])
test_set = pd.DataFrame(test_data,columns=["signal","timing","classification"])
test_set = pd.get_dummies(test_set,columns=["classification"])
# print(np.stack(padData(train_set["signal"])))
# print(train_set["signal"])
train_set["signal"] = (padData(train_set["signal"]))
train_set["timing"] = (padData(train_set["timing"]))
test_set["signal"] = (padData(test_set["signal"]))
test_set["timing"] = (padData(test_set["timing"]))

396900
0      [-1.7519662909827494e-07, -1.175316157382523e-...
1      [0.012417078213215934, 0.012940551628650217, 0...
2      [0.0026388929361166207, 0.002623511752097469, ...
3      [-6.868738632576114e-05, -5.080191118800028e-0...
4      [4.827771064303697e-05, 4.891067453619705e-05,...
                             ...                        
138    [0.009117810768986333, 0.009172225133000471, 0...
139    [-0.0017034451589284143, -0.001563755232277169...
140    [0.002309420742175835, 0.002777685485082106, 0...
141    [-0.0023368859321800145, -0.002342523847503692...
142    [0.0077366706612047795, 0.007799375712485868, ...
Name: signal, Length: 143, dtype: object


In [41]:

x_train =  train_set[["signal","timing"]].to_numpy()
y_train = train_set.iloc[:,2:].to_numpy()
# x_train = np.reshape(x_train,(x_train.shape[0],x_train.shape[1],1))


x_test =  test_set[["signal","timing"]].to_numpy()
y_test = test_set.iloc[:,2:].to_numpy()
# x_test = np.reshape(x_test,(x_test.shape[0],x_test.shape[1],1))
print(np.shape(x_train))
print(x_train)

(143, 2)
[[-1.75196629e-07  0.00000000e+00]
 [ 1.24170782e-02  0.00000000e+00]
 [ 2.63889294e-03  0.00000000e+00]
 [-6.86873863e-05  0.00000000e+00]
 [ 4.82777106e-05  0.00000000e+00]
 [-3.63207646e-05  0.00000000e+00]
 [-3.65722487e-05  0.00000000e+00]
 [-1.43582201e-02  0.00000000e+00]
 [ 1.60950306e-06  0.00000000e+00]
 [-2.84769690e-02  0.00000000e+00]
 [-1.80348707e-06  0.00000000e+00]
 [ 2.55965482e-03  0.00000000e+00]
 [-5.97241935e-03  0.00000000e+00]
 [-3.00896645e-02  0.00000000e+00]
 [ 1.32214766e-02  0.00000000e+00]
 [-1.43365779e-04  0.00000000e+00]
 [ 1.48870399e-02  0.00000000e+00]
 [ 4.66192978e-04  0.00000000e+00]
 [ 2.79976048e-03  0.00000000e+00]
 [-5.57533107e-06  0.00000000e+00]
 [-3.90992126e-03  0.00000000e+00]
 [-1.07282432e-03  0.00000000e+00]
 [ 1.61530256e-03  0.00000000e+00]
 [ 1.27494134e-02  0.00000000e+00]
 [-7.86083549e-05  0.00000000e+00]
 [ 2.47518553e-04  0.00000000e+00]
 [-4.51966565e-06  0.00000000e+00]
 [ 1.46019884e-04  0.00000000e+00]
 [ 2.087413

In [35]:
model=Sequential()
model.add(Dense(16, input_shape=(x_train.shape[1],1), activation="relu"))
model.add(Dense(8, activation='tanh'))
model.add(Dense(5, activation='sigmoid'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])



In [17]:

# model = Sequential()
# model.add(Conv1D(filters=512, kernel_size=1000, padding='same', activation='relu'))
# model.add(MaxPooling1D(pool_size=2))
# model.add(LSTM(400,return_sequences=False))

# # model.add(LSTM(400,return_sequences=False,recurrent_activation='selu',kernel_initializer='lecun_normal', input_shape=(x_train.shape[1],1)))

# model.add(Dense(5, activation='softmax'))
# model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


In [36]:
model.fit(x_train, y_train, epochs=100, batch_size=512)
model.summary()


Epoch 1/100


ValueError: in user code:

    File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 878, in train_function  *
        return step_function(self, iterator)
    File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 867, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 860, in run_step  **
        outputs = model.train_step(data)
    File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 809, in train_step
        loss = self.compiled_loss(
    File "/usr/local/lib/python3.8/dist-packages/keras/engine/compile_utils.py", line 201, in __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "/usr/local/lib/python3.8/dist-packages/keras/losses.py", line 141, in __call__
        losses = call_fn(y_true, y_pred)
    File "/usr/local/lib/python3.8/dist-packages/keras/losses.py", line 245, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "/usr/local/lib/python3.8/dist-packages/keras/losses.py", line 1664, in categorical_crossentropy
        return backend.categorical_crossentropy(
    File "/usr/local/lib/python3.8/dist-packages/keras/backend.py", line 4994, in categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)

    ValueError: Shapes (None, 5) and (None, 2, 5) are incompatible


In [16]:
predictions = model.predict(test_set)

NameError: name 'model' is not defined

In [None]:
scores = model.evaluate(x_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

Accuracy: 30.30%
