In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import pandas as pd
from sqlalchemy import create_engine, MetaData, Table
from sklearn.decomposition import PCA
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix, classification_report
from config import password, passwordAWS

In [2]:
# Starting database engine.
environmentSetting = 'production'

databaseString = f"postgres://postgres:{password}@localhost:5432/CardioDatabase"
databaseStringAWS = f"postgres://postgres:{passwordAWS}@group2cardio.cl2wtm3wzyhx.us-east-2.rds.amazonaws.com"


if environmentSetting == 'dev':
        databaseEngine = create_engine(databaseString)
else:
        databaseEngine = create_engine(databaseStringAWS)

databaseConnection = databaseEngine.connect()  

In [4]:
# Load the CSV
cardioDf = pd.read_sql('cardio_combined', con=databaseConnection, index_col='id')
cardioDf.head(20)

In [None]:
cardioDf = cardioDf.astype('int64')
cardioDf.dtypes

In [None]:
cardioDf = cardioDf.drop(['smoke', 'alco', 'active'], axis=1)
cardioDf.head(10)

In [None]:
# Scale data 
scaler = StandardScaler()
cardioAttributes = cardioDf.drop('cardio', axis=1)
cardioLabels = cardioDf['cardio']
cardioAttributesScaled = scaler.fit_transform(cardioAttributes)
print(cardioAttributesScaled)

In [None]:
# Split training/test datasets
trainingCardioAttributes, testingCardioAttributes, trainingCardioLabels, testingCardioLabels = train_test_split(cardioAttributesScaled, cardioLabels, random_state=78)

In [None]:
# Create a random forest classifier.
rfModel = RandomForestClassifier(n_estimators=200, random_state=2)

In [None]:
# Fitting the model
rfModel = rfModel.fit(trainingCardioAttributes, trainingCardioLabels)

In [None]:
# Evaluate the model
cardioLabelPredictions = rfModel.predict(testingCardioAttributes)

In [None]:
matrix = confusion_matrix(testingCardioLabels, cardioLabelPredictions)
print(matrix)

In [None]:
report = classification_report(testingCardioLabels, cardioLabelPredictions)
print(report)

In [None]:
print(f\"Accuracy Score:{accuracy_score(testingCardioLabels, cardioLabelPredictions)}")

In [None]:
# Plot non-normalized confusion matrix
titles_options = [ ("Normalized confusion matrix", 'true'),
           ("Confusion matrix, without normalization", None)]
for title, normalize in titles_options:
        disp = plot_confusion_matrix(remodel, testingCardioAttributes, testingCardioLabels,
                                 display_labels=cardioLabels,
                                 cmap=plt.cm.Blues,
                                 normalize=normalize)
        disp.ax_.set_title(title)
    plt.savefig('../Resources/confusion_matrix.png')
    print(title)
    print(disp.confusion_matrix)
plt.show()