In [253]:
import keras
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout, BatchNormalization

%matplotlib inline
pd.plotting.register_matplotlib_converters()

In [254]:
# Function is used to randomly reduce a dataset's sample size
# Returns a df that is reduced_size % the size of the original dataset
# ====================================================================
# reduced_size: A percentage in range [0,1] that represents ratio of reduced size to the original (redice/original)
# df: the df that will be reduced

def reduce_df(reduced_size, df):
    if reduced_size >= 0 and reduced_size <= 1:
        n = df.shape[0]
        num_drop = int(n * (1-reduced_size))
        n_list = [i for i in range(n)]
        np.random.shuffle(n_list)
        drop_list = n_list[0:num_drop]
        return df.drop(drop_list)
    
    else:
        print('NO SAMPLES REMOVED!\nEnter a valid reduction percentage in range [0,1]\n')
        return df

In [255]:
df_train = pd.read_csv('../heartECG_data/mitbih_train.csv')
n_orig = df_train.shape[0]
df_train = reduce_df(0.6,df_train) #shrink df by 60% of it's original size (# samples)
n_new = df_train.shape[0]
n_change = n_orig - n_new
n_change_percent = round(n_new/n_orig,2)

print('Orig # samples:', n_orig)
print('New # samples:', n_new)
print('# samples removed:', n_change)
print('Removed Ratio (should equal reduced_size param):', n_change_percent)

Orig # samples: 87553
New # samples: 52532
# samples removed: 35021
Removed Ratio (should equal reduced_size param): 0.6


In [256]:
cols = [i for i in range(188)]
df_train.columns = cols
df_train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,178,179,180,181,182,183,184,185,186,187
0,0.960114,0.863248,0.461538,0.196581,0.094017,0.125356,0.099715,0.088319,0.074074,0.082621,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.967136,1.0,0.830986,0.586854,0.356808,0.248826,0.14554,0.089202,0.117371,0.150235,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.927461,1.0,0.626943,0.193437,0.094991,0.072539,0.043178,0.053541,0.093264,0.189983,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.716814,0.539823,0.283186,0.129794,0.064897,0.076696,0.026549,0.032448,0.00885,0.044248,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10,1.0,0.657563,0.178571,0.178571,0.136555,0.035714,0.021008,0.067227,0.159664,0.252101,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [207]:
X = np.array(df_train.iloc[:,0:-1])
y = df_train.iloc[:,-1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print('# training samples:', X_train.shape[0])
print('# test samples:', X_test.shape[0])


n_steps = 187
# n_samples = X_train.shape[0]
# n_features = X_train.shape[1]
n_features = 1
X = X.reshape((X.shape[0], X.shape[1], n_features))

X_train = X_train.reshape(X_train.shape[0], X_train.shape[1],n_features)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1],1)
X_train.shape

# training samples: 42025
# test samples: 10507


(42025, 187, 1)

In [250]:
model = keras.models.Sequential()

model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(n_steps, n_features)))
model.add(Dropout(0.5))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
# model.add(Dense(100, activation='relu'))
# model.add(BatchNormalization())
# model.add(Dense(in_shape[0], activation='softmax'))
model.add(Dense(1))
model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])


# model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(n_steps, n_features)))
# model.add(Dropout(0.5))
# model.add(MaxPooling1D(pool_size=2))
# model.add(Flatten())
# model.add(Dense(50, activation='relu'))
# model.add(Dense(1))
# model.compile(optimizer='adam', loss='mse')
# model.summary()



model.summary()
model.fit(X_train, y_train, epochs=15, batch_size=100, verbose=1)

Model: "sequential_94"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_122 (Conv1D)          (None, 186, 64)           192       
_________________________________________________________________
dropout_19 (Dropout)         (None, 186, 64)           0         
_________________________________________________________________
max_pooling1d_56 (MaxPooling (None, 93, 64)            0         
_________________________________________________________________
flatten_53 (Flatten)         (None, 5952)              0         
_________________________________________________________________
dense_69 (Dense)             (None, 1)                 5953      
Total params: 6,145
Trainable params: 6,145
Non-trainable params: 0
_________________________________________________________________
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 

<keras.callbacks.callbacks.History at 0x1cdca81cc18>

In [251]:
y_hat = model.predict(X_test)
y_hat

fin = []
for y in y_hat:
    fin.append(np.sqrt(round(y[0]) ** 2))
y_hat = fin

In [252]:
accuracy_score(y_test, fin)

0.8001332445036642