In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [2]:
from keras.models import Sequential
from keras.layers import Dense, ReLU
import tensorflow as tf
from statistics import mode

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [3]:
tf.logging.set_verbosity(tf.logging.ERROR)
pd.options.mode.chained_assignment = None
np.random.seed(100)

In [4]:
myData = pd.read_csv(r"C:\Users\DEVENDRA PRASAD\heart_disease\heart.csv")
myData.columns = ['age', 'sex', 'chest_pain_type', 'resting_blood_pressure', 'cholesterol', 'fasting_blood_sugar', 'rest_ecg', 'max_heart_rate_achieved', 'exercise_induced_angina', 'st_depression', 'st_slope', 'num_major_vessels', 'thalassemia', 'target']

In [5]:
myData['sex'][myData['sex'] == 0] = 'female'
myData['sex'][myData['sex'] == 1] = 'male'
myData['chest_pain_type'][myData['chest_pain_type'] == 1] = 'typical angina'
myData['chest_pain_type'][myData['chest_pain_type'] == 2] = 'atypical angina'
myData['chest_pain_type'][myData['chest_pain_type'] == 3] = 'non-anginal pain'
myData['chest_pain_type'][myData['chest_pain_type'] == 4] = 'asymptomatic'
myData['fasting_blood_sugar'][myData['fasting_blood_sugar'] == 0] = 'lower than 120mg/ml'
myData['fasting_blood_sugar'][myData['fasting_blood_sugar'] == 1] = 'greater than 120mg/ml'
myData['rest_ecg'][myData['rest_ecg'] == 0] = 'normal'
myData['rest_ecg'][myData['rest_ecg'] == 1] = 'ST-T wave abnormality'
myData['rest_ecg'][myData['rest_ecg'] == 2] = 'left ventricular hypertrophy'
myData['exercise_induced_angina'][myData['exercise_induced_angina'] == 0] = 'no'
myData['exercise_induced_angina'][myData['exercise_induced_angina'] == 1] = 'yes'
myData['st_slope'][myData['st_slope'] == 1] = 'upsloping'
myData['st_slope'][myData['st_slope'] == 2] = 'flat'
myData['st_slope'][myData['st_slope'] == 3] = 'downsloping'
myData['thalassemia'][myData['thalassemia'] == 1] = 'normal'
myData['thalassemia'][myData['thalassemia'] == 2] = 'fixed defect'
myData['thalassemia'][myData['thalassemia'] == 3] = 'reversible defect'

In [6]:
myData = pd.get_dummies(myData, drop_first=True)

In [7]:
myData = (myData - np.min(myData)) / (np.max(myData) - np.min(myData))
x_train, x_test, y_train, y_test = train_test_split(myData.drop('target', axis=1),
                                                    myData['target'], test_size=.2, random_state=0)

In [8]:
lin_model = LogisticRegression(solver='lbfgs')
lin_model.fit(x_train, y_train)
print("Linear Model Accuracy: ", lin_model.score(x_test, y_test))

Linear Model Accuracy:  0.8524590163934426


In [9]:
knn_model = KNeighborsClassifier()
knn_model.fit(x_train, y_train)
print("K Nearest Neighbor Model Accuracy: ", knn_model.score(x_test, y_test))

K Nearest Neighbor Model Accuracy:  0.8360655737704918


In [10]:
svm_model = SVC(gamma='auto')
svm_model.fit(x_train, y_train)
print("Support Vector Machine Model Accuracy: ", svm_model.score(x_test, y_test))

Support Vector Machine Model Accuracy:  0.8524590163934426


In [11]:
nb_model = GaussianNB()
nb_model.fit(x_train, y_train)
print("Naive Bayes Model Accuracy: ", nb_model.score(x_test, y_test))

Naive Bayes Model Accuracy:  0.8688524590163934


In [12]:
tree_model = DecisionTreeClassifier()
tree_model.fit(x_train, y_train)
print("Decision Tree Model Accuracy: ", tree_model.score(x_test, y_test))

Decision Tree Model Accuracy:  0.7540983606557377


In [13]:
forest_model = RandomForestClassifier(n_estimators=100)
forest_model.fit(x_train, y_train)
print("Random Forest Model Accuracy: ", forest_model.score(x_test, y_test))

Random Forest Model Accuracy:  0.8360655737704918


In [14]:
nn_model = Sequential()
nn_model.add(Dense(100, input_shape=(19,)))
nn_model.add(ReLU())
nn_model.add(Dense(100))
nn_model.add(ReLU())
nn_model.add(Dense(10))
nn_model.add(ReLU())
nn_model.add(Dense(1, activation='sigmoid'))

In [15]:
nn_model.compile(optimizer='Adam', loss='categorical_hinge', metrics=['accuracy'])
nn_model.fit(x_train, y_train, epochs=15, verbose=0)

<keras.callbacks.callbacks.History at 0x181687d2b88>

In [16]:
y_predicted = (nn_model.predict(x_test) > 0.5)
conf_mat = confusion_matrix(y_test, y_predicted)
print(conf_mat)
total = sum(sum(conf_mat))
sensitivity = conf_mat[0, 0]/(conf_mat[0, 0] + conf_mat[1, 0])
specificity = conf_mat[1, 1]/(conf_mat[1, 1] + conf_mat[0, 1])
accuracy = (conf_mat[0, 0] + conf_mat[1, 1])/total

[[23  4]
 [ 4 30]]


In [17]:
print('specificity : ', specificity)
print('sensitivity : ', sensitivity)
print('accuracy : ', accuracy)

specificity :  0.8823529411764706
sensitivity :  0.8518518518518519
accuracy :  0.8688524590163934


In [18]:
votes = lin_model.predict(x_test) + svm_model.predict(x_test) + nb_model.predict(x_test) \
        + forest_model.predict(x_test) + tree_model.predict(x_test) + knn_model.predict(x_test) \
        + ((nn_model.predict(x_test)>0.5).T)[0].astype(float)

In [19]:
print(votes >= 4)

[False False False False False False False False False False  True  True
 False  True  True  True False  True False  True  True False False False
  True False False False  True  True  True False  True  True  True  True
 False  True False  True False  True  True False  True  True  True False
 False  True  True  True  True False  True False  True  True  True  True
  True]


In [20]:
print(y_test.values == 1.0)

[False  True False False  True False False False False False  True  True
 False  True  True  True  True  True False  True  True False False False
  True False False False  True  True False False  True  True  True False
 False  True False False  True  True  True False  True  True  True False
 False  True  True  True  True  True  True False  True False  True  True
  True]


In [21]:
conf_mat = confusion_matrix((y_test.values == 1.0),  (votes >= 4))
total = sum(sum(conf_mat))
sensitivity = conf_mat[0, 0]/(conf_mat[0, 0] + conf_mat[1, 0])
specificity = conf_mat[1, 1]/(conf_mat[1, 1] + conf_mat[0, 1])
accuracy = (conf_mat[0, 0] + conf_mat[1, 1])/total

In [22]:
print("Statistics for voting classifier, where simple majority rules:\n")
print(conf_mat)
print('specificity : ', specificity)
print('sensitivity : ', sensitivity)
print('accuracy : ', accuracy)

Statistics for voting classifier, where simple majority rules:

[[23  4]
 [ 5 29]]
specificity :  0.8787878787878788
sensitivity :  0.8214285714285714
accuracy :  0.8524590163934426


In [23]:
conf_mat = confusion_matrix((y_test.values == 1.0),  (votes >= 2))
total = sum(sum(conf_mat))
sensitivity = conf_mat[0, 0]/(conf_mat[0, 0] + conf_mat[1, 0])
specificity = conf_mat[1, 1]/(conf_mat[1, 1] + conf_mat[0, 1])
accuracy = (conf_mat[0, 0] + conf_mat[1, 1])/total

In [24]:
print("Statistics for voting classifier, where it only takes 2 positive votes (out of 7 votes) to declare "
      "a positive result:\n")

Statistics for voting classifier, where it only takes 2 positive votes (out of 7 votes) to declare a positive result:



In [25]:
print(conf_mat)
print('specificity : ', specificity)
print('sensitivity : ', sensitivity)
print('accuracy : ', accuracy)

[[21  6]
 [ 2 32]]
specificity :  0.8421052631578947
sensitivity :  0.9130434782608695
accuracy :  0.8688524590163934
