In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import pandas as pd
from config import postgreSQLKey
from sqlalchemy import create_engine, MetaData, Table


In [2]:
# Set up connection with PG Admin
databaseString = f"postgres://postgres:{postgreSQLKey}@127.0.0.1:5432/cardioResearch"
databaseEngine = create_engine(databaseString)
databaseConnection = databaseEngine.connect()

In [3]:
cardioDf = pd.read_sql('cardio_combined',con=databaseConnection, index_col='id')

In [4]:
# Load the CSV

cardioDf

Unnamed: 0_level_0,age,gender,height,weight,ap_hi,ap_lo,cholesterol,glc,bmi,smoke,alco,active,cardio
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,50,2,168,62,110,80,1,1,22.0,0,0,1,0
1,55,1,156,85,140,90,3,1,35.0,0,0,1,1
8,60,1,151,67,120,80,2,2,29.0,0,0,0,0
9,61,1,157,93,130,80,3,1,38.0,0,0,1,0
13,48,1,158,71,110,70,1,1,28.0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
99975,49,2,168,80,120,80,1,1,28.0,0,0,1,1
99977,50,1,156,102,130,80,1,1,42.0,0,1,0,1
99990,51,1,161,56,170,90,1,1,22.0,0,0,1,1
99993,53,2,168,76,120,80,1,1,27.0,1,0,1,0


In [6]:
# Scale data 
scaler = StandardScaler()
cardioAttributes = cardioDf.drop('cardio', axis=1)
cardioLabels = cardioDf['cardio']
cardioAttributesScaled = scaler.fit_transform(cardioAttributes)

In [7]:
cardioAttributesScaled

array([[-0.48366063,  1.355754  , -0.9931558 , ..., -0.31212727,
        -0.23860882,  0.49467834],
       [ 0.25602505, -0.73759694,  0.80373916, ..., -0.31212727,
        -0.23860882,  0.49467834],
       [ 0.99571073, -0.73759694, -0.39419081, ..., -0.31212727,
        -0.23860882, -2.02151563],
       ...,
       [-0.3357235 , -0.73759694,  2.60063413, ..., -0.31212727,
        -0.23860882,  0.49467834],
       [-0.03984923,  1.355754  , -0.39419081, ...,  3.20382132,
        -0.23860882,  0.49467834],
       [ 0.40396218, -0.73759694, -0.39419081, ..., -0.31212727,
        -0.23860882,  0.49467834]])

In [8]:
# Split training/test datasets
trainingCardioAttributes, testingCardioAttributes, trainingCardioLabels, testingCardioLabels = train_test_split(cardioAttributesScaled, cardioLabels, random_state=78)


In [9]:
# Create a random forest classifier.
rfModel = RandomForestClassifier(n_estimators=5000, random_state=78)

# Fitting the model
rfModel = rfModel.fit(trainingCardioAttributes, trainingCardioLabels)

# Evaluate the model
cardioLabelPredictions = rfModel.predict(testingCardioAttributes)

In [10]:
print(f"Accuracy Score: {accuracy_score(testingCardioLabels, cardioLabelPredictions)}")

Accuracy Score: 0.7033252613549111
