## Neural Network Model

In [2]:
# Importing dependencies
import pandas as pd
from pathlib import Path
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
from sqlalchemy import create_engine

In [3]:
# Connecting to PostgreSQL
engine = create_engine('postgresql://postgres:postgres@localhost:5432/proj4_db')

query = "SELECT * FROM cleaned_data"
df = pd.read_sql(query, engine)

   sex  age  education  smokingStatus  cigsPerDay  BPMeds  prevalentStroke  \
0  1.0   39          4            0.0           0   False            False   
1  0.0   46          2            0.0           0   False            False   
2  1.0   48          1            1.0          20   False            False   
3  0.0   61          3            1.0          30   False            False   
4  0.0   46          3            1.0          23   False            False   

   prevalentHyp  diabetes  totCHOL  sysBP  diaBP    BMI  heartRate  glucose  \
0         False     False      195  106.0   70.0  26.97         80       77   
1         False     False      250  121.0   81.0  28.73         95       76   
2         False     False      245  127.5   80.0  25.34         75       70   
3          True     False      225  150.0   95.0  28.58         65      103   
4         False     False      285  130.0   84.0  23.10         85       85   

   CHDRisk  
0    False  
1    False  
2    False  
3   

In [4]:
df.head()

Unnamed: 0,sex,age,education,smokingStatus,cigsPerDay,BPMeds,prevalentStroke,prevalentHyp,diabetes,totCHOL,sysBP,diaBP,BMI,heartRate,glucose,CHDRisk
0,1.0,39,4,0.0,0,False,False,False,False,195,106.0,70.0,26.97,80,77,False
1,0.0,46,2,0.0,0,False,False,False,False,250,121.0,81.0,28.73,95,76,False
2,1.0,48,1,1.0,20,False,False,False,False,245,127.5,80.0,25.34,75,70,False
3,0.0,61,3,1.0,30,False,False,True,False,225,150.0,95.0,28.58,65,103,True
4,0.0,46,3,1.0,23,False,False,False,False,285,130.0,84.0,23.1,85,85,False


In [5]:
# Separating target variables and features
y = df['CHDRisk']
X = df.drop(columns='CHDRisk')

In [6]:
# Splitting the data into testing and training data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)

In [7]:
y_train.value_counts()

CHDRisk
False    2312
True      415
Name: count, dtype: int64

In [8]:
# Using synthetic minority over-sampling technique to balance the target variable conditions in the training data
smote = SMOTE(random_state=1)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

In [9]:
y_train_resampled.value_counts()

CHDRisk
False    2312
True     2312
Name: count, dtype: int64

In [10]:
# Scaling the feature variables
scaler = StandardScaler()
X_scaler = scaler.fit(X_train_resampled)

X_train_scaled = X_scaler.transform(X_train_resampled)
X_test_scaled = X_scaler.transform(X_test)

## Original NN Model

In [11]:
# Defining original model
number_input_features = X_train.shape[1]

nn = tf.keras.models.Sequential()

# Input layer
nn.add(
    tf.keras.layers.Dense(units=64, input_dim=number_input_features, activation='relu')
)

# Hidden layer 1
nn.add(tf.keras.layers.Dense(units=32, activation='relu'))

# Hidden layer 2
nn.add(tf.keras.layers.Dense(units=16, activation='relu'))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# Checking the structure of the model
nn.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [12]:
from tensorflow.keras import metrics

# Compiling the model
nn.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy', metrics.Recall()]
)

In [14]:
# Training model
fit_model = nn.fit(X_train_scaled, y_train_resampled, epochs=100)

Epoch 1/100
[1m145/145[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6425 - loss: 0.6171 - recall: 0.5531
Epoch 2/100
[1m145/145[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 906us/step - accuracy: 0.7271 - loss: 0.5441 - recall: 0.7558
Epoch 3/100
[1m145/145[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7312 - loss: 0.5420 - recall: 0.7568
Epoch 4/100
[1m145/145[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 877us/step - accuracy: 0.7416 - loss: 0.5217 - recall: 0.7651
Epoch 5/100
[1m145/145[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 868us/step - accuracy: 0.7417 - loss: 0.5076 - recall: 0.7618
Epoch 6/100
[1m145/145[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 874us/step - accuracy: 0.7672 - loss: 0.4896 - recall: 0.7794
Epoch 7/100
[1m145/145[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 847us/step - accuracy: 0.7774 - loss: 0.4773 - recall: 0.7987
Epoch 8/100
[1m145/145

In [15]:
# Evaluate the model using the test data
model_loss, model_accuracy, model_recall = nn.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}, Recall: {model_recall}")


29/29 - 0s - 10ms/step - accuracy: 0.7055 - loss: 1.2111 - recall: 0.3551
Loss: 1.2111209630966187, Accuracy: 0.7054945230484009, Recall: 0.3550724685192108


This neural network model was able to learn quickly on the training data, however when evaluated with the testing data, only produced an accuracy score of 0.71. Recall performance was better than the Random Forest models at 0.36, but is still low.