In [1]:
import numpy as np
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv("heart.csv")
data

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0
...,...,...,...,...,...,...,...,...,...,...,...,...
913,45,M,TA,110,264,0,Normal,132,N,1.2,Flat,1
914,68,M,ASY,144,193,1,Normal,141,N,3.4,Flat,1
915,57,M,ASY,130,131,0,Normal,115,Y,1.2,Flat,1
916,57,F,ATA,130,236,0,LVH,174,N,0.0,Flat,1


In [3]:
data.isnull().sum()

Age               0
Sex               0
ChestPainType     0
RestingBP         0
Cholesterol       0
FastingBS         0
RestingECG        0
MaxHR             0
ExerciseAngina    0
Oldpeak           0
ST_Slope          0
HeartDisease      0
dtype: int64

In [4]:
X = data.iloc[:, :11]
y = data.iloc[:, 11]
X

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up
...,...,...,...,...,...,...,...,...,...,...,...
913,45,M,TA,110,264,0,Normal,132,N,1.2,Flat
914,68,M,ASY,144,193,1,Normal,141,N,3.4,Flat
915,57,M,ASY,130,131,0,Normal,115,Y,1.2,Flat
916,57,F,ATA,130,236,0,LVH,174,N,0.0,Flat


In [5]:
set(X["ChestPainType"])

{'ASY', 'ATA', 'NAP', 'TA'}

In [6]:
set(X["RestingECG"])

{'LVH', 'Normal', 'ST'}

In [7]:
set(X["ST_Slope"])

{'Down', 'Flat', 'Up'}

In [8]:
set(X["ExerciseAngina"])

{'N', 'Y'}

In [9]:
chest = pd.get_dummies(X["ChestPainType"], drop_first=True)
gender = pd.get_dummies(X["Sex"], drop_first=True)
restingECG = pd.get_dummies(X["RestingECG"], drop_first=True)
exerciseAngina = pd.get_dummies(X["ExerciseAngina"], drop_first=True)
st_slope = pd.get_dummies(X["ST_Slope"], drop_first=True)

X = pd.concat([X, chest, gender, restingECG, exerciseAngina, st_slope], axis=1)
X = X.drop(["ChestPainType", "Sex", "RestingECG", "ExerciseAngina", "ST_Slope"], axis=1)
X

Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS,MaxHR,Oldpeak,ATA,NAP,TA,M,Normal,ST,Y,Flat,Up
0,40,140,289,0,172,0.0,1,0,0,1,1,0,0,0,1
1,49,160,180,0,156,1.0,0,1,0,0,1,0,0,1,0
2,37,130,283,0,98,0.0,1,0,0,1,0,1,0,0,1
3,48,138,214,0,108,1.5,0,0,0,0,1,0,1,1,0
4,54,150,195,0,122,0.0,0,1,0,1,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
913,45,110,264,0,132,1.2,0,0,1,1,1,0,0,1,0
914,68,144,193,1,141,3.4,0,0,0,1,1,0,0,1,0
915,57,130,131,0,115,1.2,0,0,0,1,1,0,1,1,0
916,57,130,236,0,174,0.0,1,0,0,0,0,0,0,1,0


In [10]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train

Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS,MaxHR,Oldpeak,ATA,NAP,TA,M,Normal,ST,Y,Flat,Up
795,42,120,240,1,194,0.8,0,1,0,1,1,0,0,0,0
25,36,130,209,0,178,0.0,0,1,0,1,1,0,0,0,1
84,56,150,213,1,125,1.0,0,0,0,1,1,0,1,1,0
10,37,130,211,0,142,0.0,0,1,0,0,1,0,0,0,1
344,51,120,0,1,104,0.0,0,0,0,1,1,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106,48,120,254,0,110,0.0,0,0,0,0,0,1,0,0,1
270,45,120,225,0,140,0.0,0,0,0,1,1,0,0,0,1
860,60,130,253,0,144,1.4,0,0,0,1,1,0,1,0,1
435,60,152,0,0,118,0.0,0,0,0,1,0,1,1,0,1


In [24]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)
X_train[0]

array([-1.24506731, -0.70898547,  0.372803  ,  1.84260945,  2.28435288,
       -0.09706109, -0.48333575,  1.87905939, -0.21350421,  0.54060477,
        0.80817891, -0.48762079, -0.83846064, -1.03325003, -0.84792072])

### Neural Network

In [12]:
tf.random.set_seed(42)

model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(
    loss=tf.keras.losses.binary_crossentropy,
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    metrics=[
        tf.keras.metrics.BinaryAccuracy(name='accuracy'),
        tf.keras.metrics.Precision(name='precision'),
        tf.keras.metrics.Recall(name='recall')
    ]
)

history = model.fit(X_train, y_train, epochs=60)


Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


In [13]:
model.evaluate(X_test, y_test)



[0.9607263803482056, 0.875, 0.8888888955116272, 0.8971962332725525]

In [17]:
y_pred = model.predict(X_test)
print(y_pred)
pred_1 = [0 if i<0.5 else 1 for i in y_pred]
print(pred_1)
y_pred = (y_pred>0.5)
print(y_pred)


[[5.68502546e-05]
 [9.99993920e-01]
 [1.00000000e+00]
 [9.99996781e-01]
 [3.81260842e-01]
 [9.98195350e-01]
 [1.00000000e+00]
 [2.25255964e-03]
 [1.34427235e-01]
 [9.99972224e-01]
 [1.14203639e-01]
 [9.91832621e-07]
 [9.99569356e-01]
 [4.43070196e-03]
 [9.99998689e-01]
 [9.99999762e-01]
 [1.44806190e-11]
 [1.00000000e+00]
 [9.08604562e-01]
 [7.15457249e-09]
 [8.65933180e-01]
 [9.99813616e-01]
 [2.01427558e-16]
 [6.79375930e-03]
 [9.99527097e-01]
 [9.99240756e-01]
 [7.99190900e-07]
 [1.79470656e-03]
 [9.85670923e-08]
 [1.98588923e-09]
 [9.98932183e-01]
 [8.53908060e-15]
 [9.99258339e-01]
 [9.99991179e-01]
 [1.00000000e+00]
 [1.00000000e+00]
 [1.00000000e+00]
 [1.20543808e-09]
 [9.54921246e-01]
 [9.99996543e-01]
 [9.90864813e-01]
 [7.74987936e-01]
 [9.99999762e-01]
 [5.56677621e-11]
 [4.29472529e-08]
 [5.54163754e-01]
 [9.99997854e-01]
 [9.99999881e-01]
 [1.00000000e+00]
 [4.38037008e-04]
 [7.63367832e-01]
 [2.69515611e-13]
 [9.99608576e-01]
 [9.99998450e-01]
 [6.07855439e-01]
 [5.003746

In [33]:
a = [-1.24506731, -0.70898547,  0.372803  ,  1.84260945,  2.28435288,
       -0.09706109, -0.48333575,  1.87905939, -0.21350421,  0.54060477,
        0.80817891, -0.48762079, -0.83846064, -1.03325003, -0.84792072]
a = np.reshape(a, (1, 15))
b = model.predict(a)
b[0][0], b, a

(8.358645e-12,
 array([[8.358645e-12]], dtype=float32),
 array([[-1.24506731, -0.70898547,  0.372803  ,  1.84260945,  2.28435288,
         -0.09706109, -0.48333575,  1.87905939, -0.21350421,  0.54060477,
          0.80817891, -0.48762079, -0.83846064, -1.03325003, -0.84792072]]))

In [15]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
cm

array([[66, 11],
       [10, 97]], dtype=int64)

In [16]:
score = accuracy_score(y_pred, y_test)
score

0.8858695652173914

### Random Forest

In [17]:
from sklearn.ensemble import RandomForestClassifier

In [18]:
clf1 = RandomForestClassifier()
clf1.fit(X_train, y_train)

RandomForestClassifier()

In [19]:
clf1.score(X_test, y_test)

0.8804347826086957

In [20]:
y_pred1 = clf1.predict(X_test)
y_pred1 = (y_pred1>0.5)

In [21]:
cm1 = confusion_matrix(y_test, y_pred1)
cm1

array([[66, 11],
       [11, 96]], dtype=int64)

### SVM

In [22]:
from sklearn.svm import SVC

In [23]:
clf2 = SVC(C=200, gamma="auto")
clf2.fit(X_train, y_train)

SVC(C=200, gamma='auto')

In [24]:
y_pred2 = clf2.predict(X_test)
y_pred2 = (y_pred2>0.5)

In [25]:
cm2 = confusion_matrix(y_test, y_pred1)
cm2

array([[66, 11],
       [11, 96]], dtype=int64)

In [26]:
clf2.score(X_test, y_test)

0.8260869565217391

In [27]:
# Save model
model.save("my_model.h5")

In [28]:
import pickle
pickle.dump(clf1, open("rf_model.pkl", "wb"))

In [29]:
# !pip freeze > requirements.txt

In [2]:
a = 0.6
a = (a>0.5)
a

True

In [27]:
k = tf.keras.models.load_model("my_model.h5")

In [28]:
k.predict(X_test)

array([[1.05626401e-04],
       [9.99993682e-01],
       [1.00000000e+00],
       [9.99994278e-01],
       [4.08601195e-01],
       [9.97403204e-01],
       [1.00000000e+00],
       [2.22940254e-03],
       [1.03595637e-01],
       [9.99971390e-01],
       [1.24317810e-01],
       [8.88906982e-07],
       [9.99297738e-01],
       [2.38192221e-03],
       [9.99997616e-01],
       [9.99999762e-01],
       [2.14055804e-11],
       [1.00000000e+00],
       [9.36663270e-01],
       [5.94904659e-09],
       [7.91057587e-01],
       [9.99792874e-01],
       [2.85459659e-16],
       [7.30035454e-03],
       [9.99725282e-01],
       [9.99192297e-01],
       [1.10234782e-06],
       [2.60640262e-03],
       [5.71255292e-08],
       [2.04538297e-09],
       [9.99378920e-01],
       [1.19483204e-14],
       [9.99209762e-01],
       [9.99989390e-01],
       [1.00000000e+00],
       [1.00000000e+00],
       [1.00000000e+00],
       [1.28818722e-09],
       [9.46699739e-01],
       [9.99996066e-01],


In [34]:
a = k.predict([[40, 140, 289, 0, 172, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0]])
a[0][0], a

(1.0, array([[1.]], dtype=float32))

In [36]:
m = [[40, 140, 289, 0, 172, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0]]
m[0][0]

40