In [15]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
from tensorflow.keras.utils import plot_model

#  Import and read the Heart Disease - FOR ML.csv.
dataset_df = pd.read_csv('Heart Disease - FOR ML.csv')
dataset_df.head()

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease,Patient ID
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,Presence,31065
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,Absence,31606
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,Presence,32147
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,Absence,32688
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,Absence,33229


In [3]:
# Map string values to integers
mapping = {'Absence': 0, 'Presence': 1}

# Apply mapping to string column
dataset_df['Heart Disease'] = dataset_df['Heart Disease'].map(mapping)

In [4]:
dataset_df.head()

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease,Patient ID
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,1,31065
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,0,31606
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,1,32147
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,0,32688
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,0,33229


In [5]:
# Drop the non-beneficial columns 'Patient ID'.
dataset_df = dataset_df.drop(columns =['Patient ID'])
dataset_df.head()

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,1
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,0
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,1
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,0
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,0


In [6]:
# Determine the number of unique values in each column.
dataset_df.nunique()

Age                         41
Sex                          2
Chest pain type              4
BP                          47
Cholesterol                144
FBS over 120                 2
EKG results                  3
Max HR                      90
Exercise angina              2
ST depression               39
Slope of ST                  3
Number of vessels fluro      4
Thallium                     3
Heart Disease                2
dtype: int64

In [7]:
# Convert categorical data to numeric with `pd.get_dummies`
dummies = pd.get_dummies(dataset_df)
dummies.head()

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,1
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,0
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,1
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,0
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,0


In [8]:
# Split our preprocessed data into our features and target arrays
y = dummies['Heart Disease'].values
X = dummies[['Age', 'Sex', 'Chest pain type', 'BP', 'Cholesterol', 'FBS over 120', 'EKG results', 'Exercise angina', 'ST depression', 'Slope of ST', 'Number of vessels fluro', 'Thallium']].values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [9]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [14]:
# (FIRST ATTEMPT)
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
input_features_total = len(X_train[0])
hidden_nodes_layer1 = 8
hidden_nodes_layer2 = 5

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim = input_features_total, activation = "relu"))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation = "relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 8)                 104       
                                                                 
 dense_4 (Dense)             (None, 5)                 45        
                                                                 
 dense_5 (Dense)             (None, 1)                 6         
                                                                 
Total params: 155 (620.00 Byte)
Trainable params: 155 (620.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [11]:
# Compile the model (FIRST ATTEMPT)
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])




In [12]:
# Train the model (FIRST ATTEMPT)
fit_model = nn.fit(X_train_scaled,y_train,epochs=100)

Epoch 1/100


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 

In [13]:
# Evaluate the model using the test data (FIRST ATTEMPT)
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

3/3 - 0s - loss: 0.5053 - accuracy: 0.8382 - 331ms/epoch - 110ms/step
Loss: 0.5052741765975952, Accuracy: 0.8382353186607361


In [17]:
 !pip install keras-tuner



In [17]:
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])
    
    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=100,
        step=2), activation=activation, input_dim=input_features_total))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 6)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=100,
            step=2),
            activation=activation))
    
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])
    
    return nn_model

In [18]:
# Import the kerastuner library
import keras_tuner as kt

tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=100,
    hyperband_iterations=2)

Reloading Tuner from .\untitled_project\tuner0.json


In [19]:
# Run the kerastuner search for best hyperparameters
tuner.search(X_train_scaled,y_train,epochs=100,validation_data=(X_test_scaled,y_test))

Trial 500 Complete [00h 00m 07s]
val_accuracy: 0.8088235259056091

Best val_accuracy So Far: 0.8823529481887817
Total elapsed time: 4d 20h 58m 23s


In [18]:
# (SECOND ATTEMPT)
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
input_features_total = len(X_train[0])
hidden_nodes_layer1 = 8
hidden_nodes_layer2 = 5

nn_2 = tf.keras.models.Sequential()

# First hidden layer
nn_2.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim = input_features_total, activation = "relu"))

# Second hidden layer
nn_2.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation = "relu"))

# Output layer
nn_2.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn_2.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 8)                 104       
                                                                 
 dense_7 (Dense)             (None, 5)                 45        
                                                                 
 dense_8 (Dense)             (None, 1)                 6         
                                                                 
Total params: 155 (620.00 Byte)
Trainable params: 155 (620.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [19]:
# Compile the model (SECOND ATTEMPT)
nn_2.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [20]:
# Train the model (SECOND ATTEMPT)
fit_model = nn_2.fit(X_train_scaled,y_train,epochs=150)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

In [21]:
# Evaluate the model using the test data (SECOND ATTEMPT)
model_loss, model_accuracy = nn_2.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

3/3 - 0s - loss: 0.3501 - accuracy: 0.8529 - 103ms/epoch - 34ms/step
Loss: 0.3500809669494629, Accuracy: 0.8529411554336548


In [22]:
# (THIRD ATTEMPT)
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
input_features_total = len(X_train[0])
hidden_nodes_layer1 = 16
hidden_nodes_layer2 = 10

nn_3 = tf.keras.models.Sequential()

# First hidden layer
nn_3.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim = input_features_total, activation = "relu"))

# Second hidden layer
nn_3.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation = "relu"))

# Output layer
nn_3.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn_3.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_9 (Dense)             (None, 16)                208       
                                                                 
 dense_10 (Dense)            (None, 10)                170       
                                                                 
 dense_11 (Dense)            (None, 1)                 11        
                                                                 
Total params: 389 (1.52 KB)
Trainable params: 389 (1.52 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [23]:
# Compile the model (THIRD ATTEMPT)
nn_3.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [24]:
# Train the model (THIRD ATTEMPT)
fit_model = nn_3.fit(X_train_scaled,y_train,epochs=200)
     

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [25]:
# Evaluate the model using the test data (THIRD ATTEMPT)
model_loss, model_accuracy = nn_3.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

3/3 - 0s - loss: 0.5381 - accuracy: 0.7794 - 109ms/epoch - 36ms/step
Loss: 0.538118839263916, Accuracy: 0.779411792755127


In [26]:
#  Import and read the Heart Disease - FOR ML.csv. to attempt dropping another column
dataset2_df = pd.read_csv('Heart Disease - FOR ML.csv')
dataset2_df.head()

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease,Patient ID
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,Presence,31065
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,Absence,31606
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,Presence,32147
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,Absence,32688
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,Absence,33229


In [27]:
# Map string values to integers
mapping = {'Absence': 0, 'Presence': 1}

# Apply mapping to string column
dataset2_df['Heart Disease'] = dataset2_df['Heart Disease'].map(mapping)

In [28]:
dataset2_df.head()

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease,Patient ID
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,1,31065
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,0,31606
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,1,32147
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,0,32688
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,0,33229


In [29]:
dataset2_df = dataset2_df.drop(columns =['Patient ID','Sex'])
dataset2_df.head()

Unnamed: 0,Age,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease
0,70,4,130,322,0,2,109,0,2.4,2,3,3,1
1,67,3,115,564,0,2,160,0,1.6,2,0,7,0
2,57,2,124,261,0,0,141,0,0.3,1,0,7,1
3,64,4,128,263,0,0,105,1,0.2,2,1,7,0
4,74,2,120,269,0,2,121,1,0.2,1,1,3,0


In [30]:
# Determine the number of unique values in each column.
dataset2_df.nunique()

Age                         41
Chest pain type              4
BP                          47
Cholesterol                144
FBS over 120                 2
EKG results                  3
Max HR                      90
Exercise angina              2
ST depression               39
Slope of ST                  3
Number of vessels fluro      4
Thallium                     3
Heart Disease                2
dtype: int64

In [31]:
dummies = pd.get_dummies(dataset2_df)
dummies.head()

Unnamed: 0,Age,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease
0,70,4,130,322,0,2,109,0,2.4,2,3,3,1
1,67,3,115,564,0,2,160,0,1.6,2,0,7,0
2,57,2,124,261,0,0,141,0,0.3,1,0,7,1
3,64,4,128,263,0,0,105,1,0.2,2,1,7,0
4,74,2,120,269,0,2,121,1,0.2,1,1,3,0


In [32]:
# Split our preprocessed data into our features and target arrays
y_2 = dummies['Heart Disease'].values
X_2 = dummies[['Age', 'Chest pain type', 'BP', 'Cholesterol', 'FBS over 120', 'EKG results', 'Exercise angina', 'ST depression', 'Slope of ST', 'Number of vessels fluro', 'Thallium']].values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X_2, y_2)

In [33]:
# Create a StandardScaler instances
scaler_2 = StandardScaler()

# Fit the StandardScaler
X_scaler_2 = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler_2.transform(X_train)
X_test_scaled = X_scaler_2.transform(X_test)

In [34]:
# (FIRST ATTEMPT - NO 'SEX' OR 'PATIENT ID')
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
input_features_total = len(X_train[0])
hidden_nodes_layer1 = 8
hidden_nodes_layer2 = 5

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim = input_features_total, activation = "relu"))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation = "relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_12 (Dense)            (None, 8)                 96        
                                                                 
 dense_13 (Dense)            (None, 5)                 45        
                                                                 
 dense_14 (Dense)            (None, 1)                 6         
                                                                 
Total params: 147 (588.00 Byte)
Trainable params: 147 (588.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [35]:
# Compile the model 
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [36]:
# Train the model 
fit_model = nn.fit(X_train_scaled,y_train,epochs=1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

In [37]:
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

3/3 - 0s - loss: 1.2640 - accuracy: 0.7941 - 121ms/epoch - 40ms/step
Loss: 1.2640037536621094, Accuracy: 0.7941176295280457


In [38]:
# (SECOND ATTEMPT - NO 'SEX' OR 'PATIENT ID')
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
input_features_total = len(X_train[0])
hidden_nodes_layer1 = 16
hidden_nodes_layer2 = 10

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim = input_features_total, activation = "relu"))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation = "relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_15 (Dense)            (None, 16)                192       
                                                                 
 dense_16 (Dense)            (None, 10)                170       
                                                                 
 dense_17 (Dense)            (None, 1)                 11        
                                                                 
Total params: 373 (1.46 KB)
Trainable params: 373 (1.46 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [39]:
# Compile the model (SECOND ATTEMPT)
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [40]:
# Train the model (SECOND ATTEMPT)
fit_model = nn.fit(X_train_scaled,y_train,epochs=1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

In [41]:
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

3/3 - 0s - loss: 2.0886 - accuracy: 0.7794 - 123ms/epoch - 41ms/step
Loss: 2.088621139526367, Accuracy: 0.779411792755127


In [42]:
#  Import and read the Heart Disease - FOR ML.csv. to attempt dropping another column
dataset3_df = pd.read_csv('Heart Disease - FOR ML.csv')
dataset3_df.head()

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease,Patient ID
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,Presence,31065
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,Absence,31606
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,Presence,32147
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,Absence,32688
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,Absence,33229


In [43]:
# Map string values to integers
mapping = {'Absence': 0, 'Presence': 1}

# Apply mapping to string column
dataset3_df['Heart Disease'] = dataset3_df['Heart Disease'].map(mapping)

In [44]:
dataset3_df.head()

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease,Patient ID
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,1,31065
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,0,31606
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,1,32147
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,0,32688
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,0,33229


In [45]:
dataset3_df = dataset3_df.drop(columns =['Patient ID','Sex','Age'])
dataset3_df.head()

Unnamed: 0,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease
0,4,130,322,0,2,109,0,2.4,2,3,3,1
1,3,115,564,0,2,160,0,1.6,2,0,7,0
2,2,124,261,0,0,141,0,0.3,1,0,7,1
3,4,128,263,0,0,105,1,0.2,2,1,7,0
4,2,120,269,0,2,121,1,0.2,1,1,3,0


In [46]:
# Determine the number of unique values in each column.
dataset3_df.nunique()

Chest pain type              4
BP                          47
Cholesterol                144
FBS over 120                 2
EKG results                  3
Max HR                      90
Exercise angina              2
ST depression               39
Slope of ST                  3
Number of vessels fluro      4
Thallium                     3
Heart Disease                2
dtype: int64

In [47]:
# Split our preprocessed data into our features and target arrays
y_3 = dummies['Heart Disease'].values
X_3 = dummies[['Chest pain type', 'BP', 'Cholesterol', 'FBS over 120', 'EKG results', 'Exercise angina', 'ST depression', 'Slope of ST', 'Number of vessels fluro', 'Thallium']].values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X_3, y_3)

In [48]:
# Create a StandardScaler instances
scaler_3 = StandardScaler()

# Fit the StandardScaler
X_scaler_3 = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler_3.transform(X_train)
X_test_scaled = X_scaler_3.transform(X_test)

In [49]:
# (FIRST ATTEMPT - NO 'SEX' OR 'PATIENT ID' OR 'AGE')
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
input_features_total = len(X_train[0])
hidden_nodes_layer1 = 8
hidden_nodes_layer2 = 5

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim = input_features_total, activation = "relu"))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation = "relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_18 (Dense)            (None, 8)                 88        
                                                                 
 dense_19 (Dense)            (None, 5)                 45        
                                                                 
 dense_20 (Dense)            (None, 1)                 6         
                                                                 
Total params: 139 (556.00 Byte)
Trainable params: 139 (556.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [50]:
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [51]:
fit_model = nn.fit(X_train_scaled,y_train,epochs=1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

In [52]:
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

3/3 - 0s - loss: 1.3701 - accuracy: 0.8235 - 127ms/epoch - 42ms/step
Loss: 1.3700777292251587, Accuracy: 0.8235294222831726


In [53]:
# (SECOND ATTEMPT - NO 'SEX' OR 'PATIENT ID' OR 'AGE')
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
input_features_total = len(X_train[0])
hidden_nodes_layer1 = 16
hidden_nodes_layer2 = 10

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim = input_features_total, activation = "relu"))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation = "relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_21 (Dense)            (None, 16)                176       
                                                                 
 dense_22 (Dense)            (None, 10)                170       
                                                                 
 dense_23 (Dense)            (None, 1)                 11        
                                                                 
Total params: 357 (1.39 KB)
Trainable params: 357 (1.39 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [54]:
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [55]:
fit_model = nn.fit(X_train_scaled,y_train,epochs=1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

In [56]:
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

3/3 - 0s - loss: 3.9137 - accuracy: 0.8235 - 145ms/epoch - 48ms/step
Loss: 3.913651466369629, Accuracy: 0.8235294222831726


In [57]:
dataset4_df = pd.read_csv('Heart Disease - FOR ML.csv')
dataset4_df.head()

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease,Patient ID
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,Presence,31065
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,Absence,31606
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,Presence,32147
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,Absence,32688
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,Absence,33229


In [58]:
# Map string values to integers
mapping = {'Absence': 0, 'Presence': 1}

# Apply mapping to string column
dataset4_df['Heart Disease'] = dataset4_df['Heart Disease'].map(mapping)

In [59]:
dataset4_df.head()

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease,Patient ID
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,1,31065
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,0,31606
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,1,32147
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,0,32688
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,0,33229


In [60]:
dataset4_df = dataset4_df.drop(columns =['Patient ID'])
dataset4_df.head()

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,1
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,0
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,1
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,0
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,0


In [61]:
# Determine the number of unique values in each column.
dataset4_df.nunique()

Age                         41
Sex                          2
Chest pain type              4
BP                          47
Cholesterol                144
FBS over 120                 2
EKG results                  3
Max HR                      90
Exercise angina              2
ST depression               39
Slope of ST                  3
Number of vessels fluro      4
Thallium                     3
Heart Disease                2
dtype: int64

In [62]:
dummies = pd.get_dummies(dataset4_df)
dummies.head()

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,1
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,0
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,1
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,0
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,0


In [63]:
# Split our preprocessed data into our features and target arrays
y_4 = dummies['Heart Disease'].values
X_4 = dummies[['Age', 'Sex', 'EKG results', 'FBS over 120', 'Chest pain type', 'BP', 'Cholesterol', 'Exercise angina', 'ST depression', 'Slope of ST', 'Number of vessels fluro', 'Thallium', 'Max HR']].values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X_4, y_4)

In [64]:
input_features_total = len(X_train[0])
hidden_nodes_layer1 = 16
hidden_nodes_layer2 = 10

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim = input_features_total, activation = "relu"))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation = "relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_24 (Dense)            (None, 16)                224       
                                                                 
 dense_25 (Dense)            (None, 10)                170       
                                                                 
 dense_26 (Dense)            (None, 1)                 11        
                                                                 
Total params: 405 (1.58 KB)
Trainable params: 405 (1.58 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [65]:
len(X_train[0])

13

In [66]:
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [67]:
fit_model = nn.fit(X_train,y_train,epochs=1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

In [68]:
model_loss, model_accuracy = nn.evaluate(X_train,y_train,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

7/7 - 0s - loss: 0.3827 - accuracy: 0.8317 - 143ms/epoch - 20ms/step
Loss: 0.38271892070770264, Accuracy: 0.8316831588745117


In [69]:
#USING RANDOM FOREST results 
dataset5_df = pd.read_csv('Heart Disease - FOR ML.csv')
dataset5_df.head()

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease,Patient ID
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,Presence,31065
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,Absence,31606
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,Presence,32147
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,Absence,32688
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,Absence,33229


In [70]:
# Map string values to integers
mapping = {'Absence': 0, 'Presence': 1}

# Apply mapping to string column
dataset5_df['Heart Disease'] = dataset5_df['Heart Disease'].map(mapping)

In [71]:
dataset5_df = dataset5_df.drop(columns =['FBS over 120', 'EKG results', 'Sex', 'Patient ID'])
dataset5_df.head()

Unnamed: 0,Age,Chest pain type,BP,Cholesterol,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease
0,70,4,130,322,109,0,2.4,2,3,3,1
1,67,3,115,564,160,0,1.6,2,0,7,0
2,57,2,124,261,141,0,0.3,1,0,7,1
3,64,4,128,263,105,1,0.2,2,1,7,0
4,74,2,120,269,121,1,0.2,1,1,3,0


In [72]:
# Determine the number of unique values in each column.
dataset5_df.nunique()

Age                         41
Chest pain type              4
BP                          47
Cholesterol                144
Max HR                      90
Exercise angina              2
ST depression               39
Slope of ST                  3
Number of vessels fluro      4
Thallium                     3
Heart Disease                2
dtype: int64

In [73]:
dummies = pd.get_dummies(dataset5_df)
dummies.head()

Unnamed: 0,Age,Chest pain type,BP,Cholesterol,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease
0,70,4,130,322,109,0,2.4,2,3,3,1
1,67,3,115,564,160,0,1.6,2,0,7,0
2,57,2,124,261,141,0,0.3,1,0,7,1
3,64,4,128,263,105,1,0.2,2,1,7,0
4,74,2,120,269,121,1,0.2,1,1,3,0


In [74]:
# Split our preprocessed data into our features and target arrays
y_5 = dummies['Heart Disease'].values
X_5 = dummies[['Age', 'Chest pain type', 'BP', 'Cholesterol', 'Exercise angina', 'ST depression', 'Slope of ST', 'Number of vessels fluro', 'Thallium', 'Max HR']].values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X_5, y_5)

In [75]:
input_features_total = len(X_train[0])
hidden_nodes_layer1 = 16
hidden_nodes_layer2 = 10

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim = input_features_total, activation = "relu"))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation = "relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_27 (Dense)            (None, 16)                176       
                                                                 
 dense_28 (Dense)            (None, 10)                170       
                                                                 
 dense_29 (Dense)            (None, 1)                 11        
                                                                 
Total params: 357 (1.39 KB)
Trainable params: 357 (1.39 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [76]:
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [77]:
fit_model = nn.fit(X_train,y_train,epochs=1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

In [78]:
model_loss, model_accuracy = nn.evaluate(X_train,y_train,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

7/7 - 0s - loss: 0.3145 - accuracy: 0.8861 - 113ms/epoch - 16ms/step
Loss: 0.3145270347595215, Accuracy: 0.8861386179924011


In [79]:
#USING TABLEAU results 
dataset6_df = pd.read_csv('Heart Disease - FOR ML.csv')
dataset6_df.head()

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease,Patient ID
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,Presence,31065
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,Absence,31606
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,Presence,32147
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,Absence,32688
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,Absence,33229


In [80]:
# Map string values to integers
mapping = {'Absence': 0, 'Presence': 1}

# Apply mapping to string column
dataset6_df['Heart Disease'] = dataset6_df['Heart Disease'].map(mapping)

In [81]:
dataset6_df.head()

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease,Patient ID
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,1,31065
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,0,31606
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,1,32147
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,0,32688
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,0,33229


In [82]:
dataset6_df = dataset6_df.drop(columns =['Age', 'Sex', 'Chest pain type', 'BP', 'Cholesterol', 'FBS over 120', 'EKG results', 'Max HR', 'Slope of ST', 'Number of vessels fluro', 'Patient ID'])
dataset6_df.head()

Unnamed: 0,Exercise angina,ST depression,Thallium,Heart Disease
0,0,2.4,3,1
1,0,1.6,7,0
2,0,0.3,7,1
3,1,0.2,7,0
4,1,0.2,3,0


In [83]:
# Determine the number of unique values in each column.
dataset6_df.nunique()

Exercise angina     2
ST depression      39
Thallium            3
Heart Disease       2
dtype: int64

In [84]:
# Split our preprocessed data into our features and target arrays
y_6 = dummies['Heart Disease'].values
X_6 = dummies[['Exercise angina', 'ST depression', 'Thallium']].values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X_6, y_6)

In [85]:
input_features_total = len(X_train[0])
hidden_nodes_layer1 = 16
hidden_nodes_layer2 = 10

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim = input_features_total, activation = "relu"))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation = "relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_30 (Dense)            (None, 16)                64        
                                                                 
 dense_31 (Dense)            (None, 10)                170       
                                                                 
 dense_32 (Dense)            (None, 1)                 11        
                                                                 
Total params: 245 (980.00 Byte)
Trainable params: 245 (980.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [86]:
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [87]:
fit_model = nn.fit(X_train,y_train,epochs=1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

In [88]:
model_loss, model_accuracy = nn.evaluate(X_train,y_train,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

7/7 - 0s - loss: 0.4272 - accuracy: 0.8069 - 118ms/epoch - 17ms/step
Loss: 0.42720600962638855, Accuracy: 0.8069307208061218


In [89]:
#USING OPPOSITE OF TABLEAU results 
dataset7_df = pd.read_csv('Heart Disease - FOR ML.csv')
dataset7_df.head()

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease,Patient ID
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,Presence,31065
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,Absence,31606
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,Presence,32147
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,Absence,32688
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,Absence,33229


In [90]:
# Map string values to integers
mapping = {'Absence': 0, 'Presence': 1}

# Apply mapping to string column
dataset7_df['Heart Disease'] = dataset7_df['Heart Disease'].map(mapping)

In [92]:
dataset7_df.head()

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease,Patient ID
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,1,31065
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,0,31606
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,1,32147
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,0,32688
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,0,33229


In [93]:
dataset7_df = dataset7_df.drop(columns =['Exercise angina', 'Thallium', 'ST depression'])
dataset7_df.head()

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Slope of ST,Number of vessels fluro,Heart Disease,Patient ID
0,70,1,4,130,322,0,2,109,2,3,1,31065
1,67,0,3,115,564,0,2,160,2,0,0,31606
2,57,1,2,124,261,0,0,141,1,0,1,32147
3,64,1,4,128,263,0,0,105,2,1,0,32688
4,74,0,2,120,269,0,2,121,1,1,0,33229


In [95]:
# Determine the number of unique values in each column.
dataset7_df.nunique()

Age                         41
Sex                          2
Chest pain type              4
BP                          47
Cholesterol                144
FBS over 120                 2
EKG results                  3
Max HR                      90
Slope of ST                  3
Number of vessels fluro      4
Heart Disease                2
Patient ID                 270
dtype: int64

In [97]:
# Split our preprocessed data into our features and target arrays
y_7 = dummies['Heart Disease'].values
X_7 = dummies[['Age', 'Chest pain type', 'BP', 'Cholesterol', 'Slope of ST', 'Number of vessels fluro', 'Max HR']].values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X_7, y_7)

In [98]:
input_features_total = len(X_train[0])
hidden_nodes_layer1 = 16
hidden_nodes_layer2 = 10

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim = input_features_total, activation = "relu"))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation = "relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_33 (Dense)            (None, 16)                128       
                                                                 
 dense_34 (Dense)            (None, 10)                170       
                                                                 
 dense_35 (Dense)            (None, 1)                 11        
                                                                 
Total params: 309 (1.21 KB)
Trainable params: 309 (1.21 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [99]:
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [100]:
fit_model = nn.fit(X_train,y_train,epochs=1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

In [101]:
model_loss, model_accuracy = nn.evaluate(X_train,y_train,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

7/7 - 0s - loss: 0.4241 - accuracy: 0.8366 - 117ms/epoch - 17ms/step
Loss: 0.42407044768333435, Accuracy: 0.8366336822509766


In [34]:
dataset8_df = pd.read_csv('Heart Disease - FOR ML.csv')
dataset8_df.head()

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease,Patient ID
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,Presence,31065
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,Absence,31606
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,Presence,32147
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,Absence,32688
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,Absence,33229


In [35]:
# Map string values to integers
mapping = {'Absence': 0, 'Presence': 1}

# Apply mapping to string column
dataset8_df['Heart Disease'] = dataset8_df['Heart Disease'].map(mapping)

In [36]:
dataset8_df = dataset8_df.drop(columns =['Age', 'FBS over 120', 'Sex', 'BP', 'Cholesterol', 'Max HR', 'Slope of ST', 'Number of vessels fluro', 'Patient ID', 'EKG results'])
dataset8_df.head()

Unnamed: 0,Chest pain type,Exercise angina,ST depression,Thallium,Heart Disease
0,4,0,2.4,3,1
1,3,0,1.6,7,0
2,2,0,0.3,7,1
3,4,1,0.2,7,0
4,2,1,0.2,3,0


In [37]:
# Determine the number of unique values in each column.
dataset8_df.nunique()

Chest pain type     4
Exercise angina     2
ST depression      39
Thallium            3
Heart Disease       2
dtype: int64

In [39]:
# Split our preprocessed data into our features and target arrays
y_8 = dummies['Heart Disease'].values
X_8 = dummies[['Age', 'FBS over 120', 'Sex', 'BP', 'Cholesterol', 'Max HR', 'Slope of ST', 'Number of vessels fluro', 'EKG results']].values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X_8, y_8)

In [40]:
input_features_total = len(X_train[0])
hidden_nodes_layer1 = 16
hidden_nodes_layer2 = 10

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim = input_features_total, activation = "relu"))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation = "relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 16)                160       
                                                                 
 dense_7 (Dense)             (None, 10)                170       
                                                                 
 dense_8 (Dense)             (None, 1)                 11        
                                                                 
Total params: 341 (1.33 KB)
Trainable params: 341 (1.33 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [41]:
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [42]:
fit_model = nn.fit(X_train,y_train,epochs=1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

In [43]:
model_loss, model_accuracy = nn.evaluate(X_train,y_train,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

7/7 - 0s - loss: 0.4322 - accuracy: 0.8317 - 295ms/epoch - 42ms/step
Loss: 0.43218329548835754, Accuracy: 0.8316831588745117
