# Importing libraries

In [1]:
import numpy as np
import pandas as pd

from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

from keras.layers import Dense,Dropout
from keras.models import Sequential
from keras.utils import to_categorical

# Loading dataset as pandas dataframe

In [2]:
arrhythmia_data=pd.read_csv('./arrhythmia.csv')

In [3]:
arrhythmia_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 451 entries, 0 to 450
Columns: 280 entries, 75 to 8
dtypes: float64(120), int64(155), object(5)
memory usage: 986.7+ KB


# Assigning names to columns

In [4]:
column_names=['age','gender','height','weight','qrs_duration','p-r_Interval','q-t_interval',
              't_interval','p_interval','QRS','T','P','QRST','J',
              'Heart rate','Q_D1','R_D1','S_D1','R\'_D1','S\'_D1','NOD_D1',
              'EOR_R_D1','EOD_R_D1','EOR_P_D1','EOD_P_D1','EOR_T_D1','EOD_T_D1','Q_D2','R_D2',
              'S_D2','R\'_D2','S\'_D2','NOD_D2','EOR_R_D2','EOD_R_D2','EOR_P_D2','EOD_P_D2',
              'EOR_T_D2','EOD_T_D2','Q_D3','R_D3','S_D3','R\'_D3','S\'_D3','NOD_D3',
              'EOR_R_D3','EOD_R_D3','EOR_P_D3','EOD_P_D3','EOR_T_D3','EOD_T_D3','Q_AVR','R_AVR',
              'S_AVR','R\'_AVR','S\'_AVR','NOD_AVR','EOR_R_AVR','EOD_R_AVR','EOR_P_AVR','EOD_P_AVR',
              'EOR_T_AVR','EOD_T_AVR','Q_AVL','R_AVL','S_AVL','R\'_AVL','S\'_AVL','NOD_AVL','EOR_R_AVL',
              'EOD_R_AVL','EOR_P_AVL','EOD_P_AVL','EOR_T_AVL','EOD_T_AVL','Q_AVF','R_AVF','S_AVF','R\'_AVF',
              'S\'_AVF','NOD_AVF','EOR_R_AVF','EOD_R_AVF','EOR_P_AVF','EOD_P_AVF','EOR_T_AVF','EOD_T_AVF','Q_V1',
              'R_V1','S_V1','R\'_V1','S\'_V1','NOD_V1','EOR_R_V1','EOD_R_V1','EOR_P_V1','EOD_P_V1','EOR_T_V1',
              'EOD_T_V1','Q_V2','R_V2','S_V2','R\'_V2','S\'_V2','NOD_V2','EOR_R_V2','EOD_R_V2','EOR_P_V2','EOD_P_V2',
              'EOR_T_V2','EOD_T_V2','Q_V3','R_V3','S_V3','R\'_V3','S\'_V3','NOD_V3','EOR_R_V3','EOD_R_V3','EOR_P_V3',
              'EOD_P_V3','EOR_T_V3','EOD_T_V3','Q_V4','R_V4','S_V4','R\'_V4','S\'_V4','NOD_V4','EOR_R_V4','EOD_R_V4',
              'EOR_P_V4','EOD_P_V4','EOR_T_V4','EOD_T_V4','Q_V5','R_V5','S_V5','R\'_V5','S\'_V5','NOD_V5','EOR_R_V5',
              'EOD_R_V5','EOR_P_V5','EOD_P_V5','EOR_T_V5','EOD_T_V5','Q_V6','R_V6','S_V6','R\'_V6','S\'_V6','NOD_V6',
              'EOR_R_V6','EOD_R_V6','EOR_P_V6','EOD_P_V6','EOR_T_V6','EOD_T_V6','A_JJ_D1','A_Q_D1','A_R_D1','A_S_D1',
              'A_R\'_D1','A_S\'_D1','A_P_D1','A_T_D1','QRSA_D1','QRSTA_D1','A_JJ_D2','A_Q_D2','A_R_D2','A_S_D2','A_R\'_D2',
              'A_S\'_D2','A_P_D2','A_T_D2','QRSA_D2','QRSTA_D2','A_JJ_D3','A_Q_D3','A_R_D3','A_S_D3','A_R\'_D3','A_S\'_D3',
              'A_P_D3','A_T_D3','QRSA_D3','QRSTA_D3','A_JJ_AVR','A_Q_AVR','A_R_AVR','A_S_AVR','A_R\'_AVR','A_S\'_AVR','A_P_AVR'
              ,'A_T_AVR','QRSA_AVR','QRSTA_AVR','A_JJ_AVL','A_Q_AVL','A_R_AVL','A_S_AVL','A_R\'_AVL','A_S\'_AVL','A_P_AVL'
              ,'A_T_AVL','QRSA_AVL','QRSTA_AVL','A_JJ_AVF','A_Q_AVF','A_R_AVF','A_S_AVF','A_R\'_AVF','A_S\'_AVF','A_P_AVF',
              'A_T_AVF','QRSA_AVF','QRSTA_AVF','A_JJ_V1','A_Q_V1','A_R_V1','A_S_V1','A_R\'_V1','A_S\'_V1','A_P_V1','A_T_V1',
              'QRSA_V1','QRSTA_V1','A_JJ_V2','A_Q_V2','A_R_V2','A_S_V2','A_R\'_V2','A_S\'_V2','A_P_V2','A_T_V2','QRSA_V2',
              'QRSTA_V2','A_JJ_V3','A_Q_V3','A_R_V3','A_S_V3','A_R\'_V3','A_S\'_V3','A_P_V3','A_T_V3','QRSA_V1','QRSTA_V3',
              'A_JJ_V4','A_Q_V4','A_R_V4','A_S_V4','A_R\'_V4','A_S\'_V4','A_P_V4','A_T_V4','QRSA_V4','QRSTA_V4','A_JJ_V5',
              'A_Q_V5','A_R_V5','A_S_V5','A_R\'_V5','A_S\'_V5','A_P_V5','A_T_V5','QRSA_V5','QRSTA_V5','A_JJ_V6','A_Q_V6',
              'A_R_V6','A_S_V6','A_R\'_V6','A_S\'_V6','A_P_V6','A_T_V6','QRSA_V6','QRSTA_V6','Class']
arrhythmia_data.columns=column_names
arrhythmia_data.head()

Unnamed: 0,age,gender,height,weight,qrs_duration,p-r_Interval,q-t_interval,t_interval,p_interval,QRS,...,A_Q_V6,A_R_V6,A_S_V6,A_R'_V6,A_S'_V6,A_P_V6,A_T_V6,QRSA_V6,QRSTA_V6,Class
0,56,1,165,64,81,174,401,149,39,25,...,0.0,8.5,0.0,0.0,0.0,0.2,2.1,20.4,38.8,6
1,54,0,172,95,138,163,386,185,102,96,...,0.0,9.5,-2.4,0.0,0.0,0.3,3.4,12.3,49.0,10
2,55,0,175,94,100,202,380,179,143,28,...,0.0,12.2,-2.2,0.0,0.0,0.4,2.6,34.6,61.6,1
3,75,0,190,80,88,181,360,177,103,-16,...,0.0,13.1,-3.6,0.0,0.0,-0.1,3.9,25.4,62.8,7
4,13,0,169,51,100,167,321,174,91,107,...,-0.6,12.2,-2.8,0.0,0.0,0.9,2.2,13.5,31.1,14


# Replacing '?' with np.nan (not a number)

In [5]:
arrhythmia_data.replace({'?': np.nan}, regex=False,inplace=True) #https://stackoverflow.com/questions/50894580/how-to-replace-question-mark-in-value-of-a-panda-frame-as-missing-value/57800409#57800409
null_values=(arrhythmia_data.isna().sum()).sort_values(ascending=False)
null_cols=null_values[null_values!=0].index
arrhythmia_data=arrhythmia_data.astype(float)
print(null_values[null_values!=0])

J             375
P              22
T               8
Heart rate      1
QRST            1
dtype: int64


# Preprocess to replace missing values with the mean of the column 

In [6]:
for col in null_cols:
    mean_of_col= (arrhythmia_data[col][arrhythmia_data[col].notnull()]).astype(float).mean()
    arrhythmia_data[col]=arrhythmia_data[col].fillna(mean_of_col,inplace= False)

# Shuffle to randomize data

In [7]:
arrhythmia_data = shuffle(arrhythmia_data)

# Applying train, validation & test split with ratios of 80, 10, 10 respectively

In [8]:
X = arrhythmia_data.iloc[:, :-1].values
y = arrhythmia_data.iloc[:, -1].values

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size=0.5)

temp_y_test = y_test.copy()

print(x_train.shape, x_val.shape, x_test.shape)

(360, 279) (45, 279) (46, 279)


# Converting numeric classes into categorical data

In [9]:
y_train = to_categorical(y_train)
y_val = to_categorical(y_val)
y_test = to_categorical(y_test)

# Avoiding dummy variable trap

In [10]:
y_train = y_train[:,1:]
y_val = y_val[:,1:]
y_test = y_test[:,1:]

# Applying Min-Max-Normalization

In [11]:
mms_x = MinMaxScaler()

pp_x_train = mms_x.fit_transform(x_train)
pp_x_val = mms_x.transform(x_val)
pp_x_test = mms_x.transform(x_test)

In [12]:
print(pp_x_train.shape, pp_x_val.shape, pp_x_test.shape)

(360, 279) (45, 279) (46, 279)


# Creating multilayer perceptron in Keras

In [13]:
model=Sequential()
model.add(Dense(128,input_dim=(279),activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(16,activation='softmax'))

# Loss function, optimizer and accuracy method

In [14]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy','top_k_categorical_accuracy'])

# Fitting the model using train and validation data

In [15]:
model.fit(pp_x_train, y_train, epochs=20, batch_size=8, validation_data=(pp_x_val, y_val))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x24e3dd38518>

# Calculating accuracy of model on test data

In [16]:
_, accuracy, top_k_categorical = model.evaluate(pp_x_test, y_test)
print('Accuracy: %.2f & Top K Categorical : %.2f' % (accuracy*100,top_k_categorical*100))

Accuracy: 47.83 & Top K Categorical : 82.61
