In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import pickle

# Read in the insurance dataset
df = pd.read_csv("/content/drive/MyDrive/ANN_Heart_Disease/heart_disease_uci.csv")
# Check out the data
df.head()

Unnamed: 0,id,age,sex,dataset,cp,trestbps,chol,fbs,restecg,thalch,exang,oldpeak,slope,ca,thal,num
0,1,63,Male,Cleveland,typical angina,145.0,233.0,True,lv hypertrophy,150.0,False,2.3,downsloping,0.0,fixed defect,0
1,2,67,Male,Cleveland,asymptomatic,160.0,286.0,False,lv hypertrophy,108.0,True,1.5,flat,3.0,normal,2
2,3,67,Male,Cleveland,asymptomatic,120.0,229.0,False,lv hypertrophy,129.0,True,2.6,flat,2.0,reversable defect,1
3,4,37,Male,Cleveland,non-anginal,130.0,250.0,False,normal,187.0,False,3.5,downsloping,0.0,normal,0
4,5,41,Female,Cleveland,atypical angina,130.0,204.0,False,lv hypertrophy,172.0,False,1.4,upsloping,0.0,normal,0


In [2]:
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.model_selection import train_test_split

In [3]:
df = df.drop(columns=['id', 'dataset'])

In [4]:
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalch,exang,oldpeak,slope,ca,thal,num
0,63,Male,typical angina,145.0,233.0,True,lv hypertrophy,150.0,False,2.3,downsloping,0.0,fixed defect,0
1,67,Male,asymptomatic,160.0,286.0,False,lv hypertrophy,108.0,True,1.5,flat,3.0,normal,2
2,67,Male,asymptomatic,120.0,229.0,False,lv hypertrophy,129.0,True,2.6,flat,2.0,reversable defect,1
3,37,Male,non-anginal,130.0,250.0,False,normal,187.0,False,3.5,downsloping,0.0,normal,0
4,41,Female,atypical angina,130.0,204.0,False,lv hypertrophy,172.0,False,1.4,upsloping,0.0,normal,0


In [16]:
df = df.dropna()

In [17]:
# Create a column transformer
columnTransformer = make_column_transformer(
    (MinMaxScaler(), ['age', 'trestbps', 'chol', 'thalch', 'oldpeak', 'ca']),  # Turn all values in these columns between 0 and 1
    (OneHotEncoder(handle_unknown="ignore"), ["sex", "cp", "fbs", "restecg",  "exang", "slope", "thal"])
)

with open('columnTransformer.pkl', 'wb') as f:
    pickle.dump(columnTransformer, f)


In [39]:
# Create X & y
X = df.drop("num", axis=1)
y = (df["num"] > 0).astype("int32")   # 0 = no disease, 1 = disease

In [40]:
X.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalch,exang,oldpeak,slope,ca,thal
0,63,Male,typical angina,145.0,233.0,True,lv hypertrophy,150.0,False,2.3,downsloping,0.0,fixed defect
1,67,Male,asymptomatic,160.0,286.0,False,lv hypertrophy,108.0,True,1.5,flat,3.0,normal
2,67,Male,asymptomatic,120.0,229.0,False,lv hypertrophy,129.0,True,2.6,flat,2.0,reversable defect
3,37,Male,non-anginal,130.0,250.0,False,normal,187.0,False,3.5,downsloping,0.0,normal
4,41,Female,atypical angina,130.0,204.0,False,lv hypertrophy,172.0,False,1.4,upsloping,0.0,normal


In [41]:
y.head()

Unnamed: 0,num
0,0
1,1
2,1
3,0
4,0


In [42]:
# Build our train and test sets (use random state to ensure same split as before)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit column transformer on the training data only (doing so on test data would result in data leakage)
columnTransformer.fit(X_train)

# Transform training and test data with normalization (MinMaxScalar) and one hot encoding (OneHotEncoder)
X_train_normal = columnTransformer.transform(X_train)
X_test_normal = columnTransformer.transform(X_test)

In [43]:
import numpy as np

print("Any NaN in X_train_normal:", np.isnan(X_train_normal).any())
print("Any Inf in X_train_normal:", np.isinf(X_train_normal).any())
print("Any NaN in y_train:", np.isnan(y_train).any())
print("Any Inf in y_train:", np.isinf(y_train).any())

Any NaN in X_train_normal: False
Any Inf in X_train_normal: False
Any NaN in y_train: False
Any Inf in y_train: False


In [44]:
columnTransformer

In [45]:
X_train_normal

array([[0.6875    , 0.43396226, 0.36206897, ..., 0.        , 1.        ,
        0.        ],
       [0.70833333, 0.43396226, 0.20474138, ..., 0.        , 1.        ,
        0.        ],
       [0.77083333, 0.62264151, 0.31465517, ..., 1.        , 0.        ,
        0.        ],
       ...,
       [0.58333333, 0.32075472, 0.27801724, ..., 0.        , 0.        ,
        1.        ],
       [0.625     , 0.37735849, 0.22413793, ..., 0.        , 1.        ,
        0.        ],
       [0.875     , 0.1509434 , 0.35560345, ..., 0.        , 1.        ,
        0.        ]])

In [60]:
# 1. Set random seed
tf.random.set_seed(42)

# 2. Upgraded insurence model
heart_disease_model = tf.keras.Sequential([
    tf.keras.layers.Dense(16, activation="relu"),
    tf.keras.layers.Dense(32, activation="relu"),
    tf.keras.layers.Dense(1, activation="sigmoid")
])

# 3. Compile the model
heart_disease_model.compile(
    loss="binary_crossentropy",
    optimizer=tf.keras.optimizers.Adam(1e-3),
    metrics=[tf.keras.metrics.BinaryAccuracy(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall()]
  )

# 4. Fit the model
history = heart_disease_model.fit(X_train_normal, y_train, epochs=100, verbose=1)


Epoch 1/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 22ms/step - binary_accuracy: 0.5798 - loss: 0.6810 - precision_1: 0.5717 - recall_1: 0.5357
Epoch 2/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - binary_accuracy: 0.6679 - loss: 0.6595 - precision_1: 0.7282 - recall_1: 0.5033
Epoch 3/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - binary_accuracy: 0.7159 - loss: 0.6403 - precision_1: 0.8658 - recall_1: 0.4901
Epoch 4/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - binary_accuracy: 0.7257 - loss: 0.6205 - precision_1: 0.8963 - recall_1: 0.4910
Epoch 5/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - binary_accuracy: 0.7686 - loss: 0.5998 - precision_1: 0.9374 - recall_1: 0.5597 
Epoch 6/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - binary_accuracy: 0.7571 - loss: 0.5779 - precision_1: 0.8885 - recall_1: 0.5701
Epo

In [61]:
# 5. Evaluate the model
heart_disease_model.evaluate(X_test_normal, y_test)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 46ms/step - binary_accuracy: 0.8694 - loss: 0.3226 - precision_1: 0.8575 - recall_1: 0.8111 


[0.33589866757392883,
 0.8666666746139526,
 0.8695651888847351,
 0.800000011920929]

In [66]:
[X_test_normal[0]]

[array([0.60416667, 0.18867925, 0.46982759, 0.52671756, 0.70967742,
        1.        , 0.        , 1.        , 1.        , 0.        ,
        0.        , 0.        , 1.        , 0.        , 0.        ,
        0.        , 1.        , 1.        , 0.        , 1.        ,
        0.        , 0.        , 1.        , 0.        , 0.        ])]

In [72]:
heart_disease_model.predict(X_test_normal), y_test

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step


(array([[0.99922246],
        [0.0482977 ],
        [0.7680614 ],
        [0.9761228 ],
        [0.11408009],
        [0.9918781 ],
        [0.02044041],
        [0.93391716],
        [0.09829424],
        [0.9958762 ],
        [0.24665956],
        [0.00394572],
        [0.02513823],
        [0.9986247 ],
        [0.31805053],
        [0.9520442 ],
        [0.19769162],
        [0.9373998 ],
        [0.00725968],
        [0.18764889],
        [0.880539  ],
        [0.35382488],
        [0.92277837],
        [0.06683342],
        [0.15273347],
        [0.00533038],
        [0.8104254 ],
        [0.00432634],
        [0.00234212],
        [0.08990295],
        [0.36682242],
        [0.00664354],
        [0.9590444 ],
        [0.9961643 ],
        [0.7534373 ],
        [0.15739785],
        [0.31879905],
        [0.1818446 ],
        [0.02804919],
        [0.00172852],
        [0.04463525],
        [0.0451496 ],
        [0.9856213 ],
        [0.9944042 ],
        [0.39726388],
        [0

In [92]:
x_single = df.iloc[24:25]             # pandas Series, raw features
x_single.drop("num", axis=1, inplace=True)
x_single

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x_single.drop("num", axis=1, inplace=True)


Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalch,exang,oldpeak,slope,ca,thal
24,60,Male,asymptomatic,130.0,206.0,False,lv hypertrophy,132.0,True,2.4,flat,2.0,reversable defect


In [93]:
x_single_norm = columnTransformer.transform(x_single)
x_single_norm

array([[0.64583333, 0.33962264, 0.22844828, 0.46564885, 0.38709677,
        0.66666667, 0.        , 1.        , 1.        , 0.        ,
        0.        , 0.        , 1.        , 0.        , 1.        ,
        0.        , 0.        , 0.        , 1.        , 0.        ,
        1.        , 0.        , 0.        , 0.        , 1.        ]])

In [94]:
heart_disease_model.predict(x_single_norm)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step


array([[0.9961643]], dtype=float32)