In [1]:
# Dependencies
import numpy as np
import pandas as pd
import datetime as dt

# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func
from sqlalchemy import create_engine, inspect, MetaData, Table
from sklearn.datasets import make_blobs
import sklearn as skl
import tensorflow as tf

%matplotlib inline
import matplotlib.pyplot as plt
import os
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler
from matplotlib import rcParams
from sklearn.metrics import accuracy_score

In [2]:
engine = create_engine(f"postgresql://xaelnmld:VEhKnrZBM4mjoDAo-CEY240oVB7LSiDv@mahmud.db.elephantsql.com/xaelnmld")

# reflect an existing database into a new model
Base = automap_base()
# reflect the tables
Base.prepare(engine, reflect=True)

# Save references to each table
# Measurement = Base.classes.measurement
# Station = Base.classes.station

In [3]:
# Create our session (link) from Python to the DB
session = Session(engine)

In [4]:
engine

Engine(postgresql://xaelnmld:***@mahmud.db.elephantsql.com/xaelnmld)

In [5]:
inspector = inspect(engine)

In [6]:
columns = inspector.get_columns('heart_data')
for column in columns:
    print(column["name"], column["type"])

Age BIGINT
Sex BIGINT
cp BIGINT
trestbps DOUBLE_PRECISION
chol DOUBLE_PRECISION
fbs DOUBLE_PRECISION
restecg DOUBLE_PRECISION
thalach DOUBLE_PRECISION
exang DOUBLE_PRECISION
oldpeak DOUBLE_PRECISION
num BIGINT
Location BIGINT


In [7]:
# Make a connection to the SQL database
conn = engine.connect()

In [8]:
# Query All Records in the the Database
heart_data_df = pd.read_sql("SELECT * FROM heart_data", conn)
heart_data_df.head()

Unnamed: 0,Age,Sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,num,Location
0,63,1,1,145.0,233.0,1.0,2.0,150.0,0.0,2.3,0,0
1,67,1,4,160.0,286.0,0.0,2.0,108.0,1.0,1.5,1,0
2,67,1,4,120.0,229.0,0.0,2.0,129.0,1.0,2.6,0,0
3,37,1,3,130.0,250.0,0.0,0.0,187.0,0.0,3.5,0,0
4,41,0,2,130.0,204.0,0.0,2.0,172.0,0.0,1.4,0,0


In [None]:
heart_data_df.shape

In [None]:
heart_data_df['Location'].value_counts()

In [None]:
heart_data_df.describe()

In [None]:
rcParams['figure.figsize'] = 15, 14
plt.matshow(heart_data_df.corr())
plt.yticks(np.arange(heart_data_df.shape[1]), heart_data_df.columns)
plt.xticks(np.arange(heart_data_df.shape[1]), heart_data_df.columns)
plt.colorbar()

In [None]:
heart_data_df.hist()

In [None]:
rcParams['figure.figsize'] = 8,6
plt.bar(heart_data_df['num'].unique(), heart_data_df['num'].value_counts(), color = ['green', 'yellow'])
plt.xticks([0, 1])
plt.xlabel('Target Classes')
plt.ylabel('Count')
plt.title('Count of each Target Class')

In [None]:
# What is the distribution of those with or without heart disease
#heart_data_df.groupby('num').size()
heart_data_df['num'].value_counts()

In [None]:
# Set the y variable to the 'num' column.
# Drop the 'num' value and set X to the remaining data.
y =heart_data_df["num"]
X = heart_data_df.drop("num", axis=1)
X.head()

In [None]:
# Split the dta by using train_test_split()
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [None]:
# Create a logistic regression model
clf = LogisticRegression(max_iter=500)
clf.fit(X_train,y_train)
print(f"train acc: {clf.score(X_train,y_train)}")
print(f"test acc: {clf.score(X_test,y_test)}")

In [None]:
# Create a confusion matrix from the test values and predictions
y_pred = clf.predict(X_test)
# y_test
cm = confusion_matrix(y_test,y_pred)
cm

In [None]:
cm_df = pd.DataFrame(cm,index=["Actual 0", "Actual 1"],columns=["Pred 0","Pred 1"])
cm_df

In [None]:
# Calculate the sensitivity of the model based on the confusion matrix
sens = 83 / (83/134)
sens

In [None]:
# Remove heart disease num target from features data
y = heart_data_df.num.values
X = heart_data_df.drop(columns="num").values

# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [None]:
# Preprocess numerical data for neural network

# Create a StandardScaler instances
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=16, activation="relu", input_dim=11))
nn_model.add(tf.keras.layers.Dense(units=16, activation="relu"))
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=50)

# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")