In [1]:
#import dependencies

import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.datasets import make_classification
from sklearn.metrics import classification_report
sns.set(style="whitegrid", color_codes=True, font_scale=1.3)

In [None]:
#loading data via pandas, in case of no Postgres Connection
# heart =pd.read_csv('Resources/heart.csv')

In [None]:
import psycopg2
import sqlalchemy
from sqlalchemy import create_engine
from site_key import pw

In [None]:
#create engine and connection to postgres
engine = create_engine(f'postgresql://postgres:{pw}@localhost:5432/heart_failure')
connection = engine.connect()

In [2]:
#read tables from heart_failure DB
heart = pd.read_sql('select * from heart_failure', connection)
heart.head(5)

Mounted at /content/drive


Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


In [3]:
# Find if null data exists
heart.isnull().sum()

Age               0
Sex               0
ChestPainType     0
RestingBP         0
Cholesterol       0
FastingBS         0
RestingECG        0
MaxHR             0
ExerciseAngina    0
Oldpeak           0
ST_Slope          0
HeartDisease      0
dtype: int64

In [4]:
#renaming columns
heart.rename(columns={'Age': 'PatientAge', 'Sex': 'Gender', 'ChestPainType': 'ChestPainType', 'RestingBP': 'BloodPressure', 
                      'Cholesterol': 'Cholesterol', 'FastingBS':'BloodSugar', 'RestingECG':'Electrocardiogram',
                      'MaxHR': 'MaxHeartRate', 'ExerciseAngina': 'ExerciseAngina', 'OldPeak': 'OldPeak', 
                      'ST_Slope': "ST_Slope", 'HeartDisease': 'HeartDisease' }, inplace=True)

In [5]:
#check to see how the data is distributed
heart.describe()

Unnamed: 0,PatientAge,BloodPressure,Cholesterol,BloodSugar,MaxHeartRate,Oldpeak,HeartDisease
count,918.0,918.0,918.0,918.0,918.0,918.0,918.0
mean,53.510893,132.396514,198.799564,0.233115,136.809368,0.887364,0.553377
std,9.432617,18.514154,109.384145,0.423046,25.460334,1.06657,0.497414
min,28.0,0.0,0.0,0.0,60.0,-2.6,0.0
25%,47.0,120.0,173.25,0.0,120.0,0.0,0.0
50%,54.0,130.0,223.0,0.0,138.0,0.6,1.0
75%,60.0,140.0,267.0,0.0,156.0,1.5,1.0
max,77.0,200.0,603.0,1.0,202.0,6.2,1.0


In [6]:
#Scatterplot matrix to spot outliers

fig = px.scatter_matrix(heart, dimensions=
["PatientAge","BloodPressure","Cholesterol","BloodSugar","MaxHeartRate"],
labels={col:col.replace('_', ' ') for col in heart.columns},           
height=900, color="HeartDisease", color_continuous_scale=px.colors.diverging.Tropic)
fig.show()

In [7]:
#Percentage of NON-heart disease cases
data_0 = heart[heart.HeartDisease == 0].HeartDisease.count() / heart.HeartDisease.count()
data_0

0.4466230936819172

In [8]:
#Percentage of heart disease cases
data_1= heart[heart.HeartDisease == 1].HeartDisease.count() / heart.HeartDisease.count()
data_1

0.5533769063180828

In [9]:
heart_converted = pd.get_dummies(heart)

# Seperating Dependent Features from other independent features ---
y = heart_converted['HeartDisease']
X = heart_converted.drop(columns=['HeartDisease'], axis=1)

In [10]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=80)
X_train.head()


Unnamed: 0,PatientAge,BloodPressure,Cholesterol,BloodSugar,MaxHeartRate,Oldpeak,Gender_F,Gender_M,ChestPainType_ASY,ChestPainType_ATA,ChestPainType_NAP,ChestPainType_TA,Electrocardiogram_LVH,Electrocardiogram_Normal,Electrocardiogram_ST,ExerciseAngina_N,ExerciseAngina_Y,ST_Slope_Down,ST_Slope_Flat,ST_Slope_Up
892,39,138,220,0,152,0.0,1,0,0,0,1,0,0,1,0,1,0,0,1,0
286,59,140,169,0,140,0.0,0,1,1,0,0,0,0,1,0,1,0,0,0,1
523,59,124,160,0,117,1.0,0,1,1,0,0,0,0,1,0,0,1,0,1,0
502,69,140,208,0,140,2.0,0,1,1,0,0,0,0,0,1,0,1,0,1,0
429,63,133,0,0,120,1.0,0,1,0,0,1,0,1,0,0,0,1,0,1,0


In [11]:
 # Standardize the data

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

## Model Implementation

In [12]:
import tensorflow as tf

In [13]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 = 100
hidden_nodes_layer2 = 80
hidden_nodes_layer3 = 10

nn = tf.keras.models.Sequential()

# First hidden layer

nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, activation="relu", input_dim=number_input_features))



# Optimization: added more hidden layers increased the units
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="sigmoid"))

nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation="sigmoid"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 100)               2100      
                                                                 
 dense_1 (Dense)             (None, 80)                8080      
                                                                 
 dense_2 (Dense)             (None, 10)                810       
                                                                 
 dense_3 (Dense)             (None, 1)                 11        
                                                                 
Total params: 11,001
Trainable params: 11,001
Non-trainable params: 0
_________________________________________________________________


In [14]:
# Compile the model

nn.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])

In [15]:
# Train the model

fit_model = nn.fit(X_train_scaled, y_train, epochs=300)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

In [16]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

6/6 - 0s - loss: 0.5112 - accuracy: 0.8424 - 166ms/epoch - 28ms/step
Loss: 0.5112456679344177, Accuracy: 0.842391312122345
