# Library and data import

In [37]:
import tensorflow as tf
from tensorflow import keras

import os
import tempfile

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

import sklearn
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [38]:
df = pd.read_csv("heart_indicators_clean_data.csv") 

# ANN Model Training

In [39]:
train_df, test_df = train_test_split(df, test_size=0.2)
train_df, val_df = train_test_split(train_df, test_size=0.2)

# Form np arrays of labels and features.
train_labels = np.array(train_df.pop('HeartDiseaseorAttack'))
bool_train_labels = train_labels != 0
val_labels = np.array(val_df.pop('HeartDiseaseorAttack'))
test_labels = np.array(test_df.pop('HeartDiseaseorAttack'))

train_features = np.array(train_df)
val_features = np.array(val_df)
test_features = np.array(test_df)

PCA: 1 - Scaling Data

In [40]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

# Fit on training set only.
scaler.fit(train_features)

# Apply transform to both the training set and the test set.
train_features = scaler.transform(train_features)
val_features = scaler.transform(val_features)
test_features = scaler.transform(test_features)

PCA: 2 - PCA

In [41]:
from sklearn.decomposition import PCA

In [42]:
pca = PCA(n_components = 0.95)

pca.fit(train_features)

PCA(n_components=0.95)

In [43]:
train_features = pca.transform(train_features)
val_features = pca.transform(val_features)
test_features = pca.transform(test_features)

In [44]:
METRICS = [
      keras.metrics.TruePositives(name='tp'),
      keras.metrics.FalsePositives(name='fp'),
      keras.metrics.TrueNegatives(name='tn'),
      keras.metrics.FalseNegatives(name='fn'), 
      keras.metrics.BinaryAccuracy(name='accuracy'),
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall'),
      keras.metrics.AUC(name='auc'),
      keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
]

def make_model(number_of_hidden_layers=1, activation_function='relu', final_activation_function='sigmoid', learning_rate=1e-3):
  layers = [keras.layers.Dense(
          16, activation='relu',
          input_shape=(train_features.shape[-1],))]
  for layer in range(number_of_hidden_layers):
    layers.append(keras.layers.Dense(16, input_dim=16, activation=activation_function))

  layers.append(keras.layers.Dropout(0.5))
  layers.append(keras.layers.Dense(1, activation=final_activation_function))

  model = keras.Sequential(layers)

  model.compile(
      optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
      loss=keras.losses.BinaryCrossentropy(),
      metrics=METRICS)

  return model


In [45]:
EPOCHS = 10 #was 100
BATCH_SIZE = 2 #was 2048

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_prc', 
    verbose=1,
    patience=10,
    mode='max',
    restore_best_weights=True)

In [46]:
model = make_model()
model.summary()

Model: "sequential_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_33 (Dense)            (None, 16)                320       
                                                                 
 dense_34 (Dense)            (None, 16)                272       
                                                                 
 dropout_11 (Dropout)        (None, 16)                0         
                                                                 
 dense_35 (Dense)            (None, 1)                 17        
                                                                 
Total params: 609
Trainable params: 609
Non-trainable params: 0
_________________________________________________________________


In [47]:
pos_features = train_features[bool_train_labels]
neg_features = train_features[~bool_train_labels]

pos_labels = train_labels[bool_train_labels]
neg_labels = train_labels[~bool_train_labels]

In [48]:
ids = np.arange(len(pos_features))
choices = np.random.choice(ids, len(neg_features))

res_pos_features = pos_features[choices]
res_pos_labels = pos_labels[choices]

res_pos_features.shape

(147165, 19)

In [49]:
resampled_features = np.concatenate([res_pos_features, neg_features], axis=0)
resampled_labels = np.concatenate([res_pos_labels, neg_labels], axis=0)

order = np.arange(len(resampled_labels))
np.random.shuffle(order)
resampled_features = resampled_features[order]
resampled_labels = resampled_labels[order]

resampled_features.shape

(294330, 19)

In [50]:
BUFFER_SIZE = 100000

def make_ds(features, labels):
  ds = tf.data.Dataset.from_tensor_slices((features, labels))#.cache()
  ds = ds.shuffle(BUFFER_SIZE).repeat()
  return ds

pos_ds = make_ds(pos_features, pos_labels)
neg_ds = make_ds(neg_features, neg_labels)

In [51]:
resampled_ds = tf.data.Dataset.sample_from_datasets([pos_ds, neg_ds], weights=[0.5, 0.5])
resampled_ds = resampled_ds.batch(BATCH_SIZE).prefetch(2)

In [52]:
pos = len(df[df["HeartDiseaseorAttack"] == 1])
neg = len(df[df["HeartDiseaseorAttack"] == 0])

resampled_steps_per_epoch = np.ceil(2.0*neg/BATCH_SIZE)
resampled_steps_per_epoch

229787.0

In [None]:
model = make_model(number_of_hidden_layers=1)

val_ds = tf.data.Dataset.from_tensor_slices((val_features, val_labels)).cache()
val_ds = val_ds.batch(BATCH_SIZE).prefetch(2) 

resampled_history = model.fit(
    resampled_ds,
    epochs=EPOCHS,
    steps_per_epoch=resampled_steps_per_epoch,
    callbacks=[early_stopping],
    validation_data=val_ds)

initial_weights = os.path.join(tempfile.mkdtemp(), 'initial_weights')
model.save_weights('initial_weights')

Epoch 1/10
  7445/229787 [..............................] - ETA: 12:48 - loss: 0.5508 - tp: 5647.0000 - fp: 2315.0000 - tn: 5083.0000 - fn: 1845.0000 - accuracy: 0.7206 - precision: 0.7092 - recall: 0.7537 - auc: 0.7947 - prc: 0.7677

In [35]:
activations = ['relu', 'sigmoid', 'softmax']
rates = [1e-2,1e-3,1e-4]

results = {}

for activation_function in activations:
  results[activation_function] = []
  for learning_rate in rates:
    model = make_model(number_of_hidden_layers=1, activation_function=activation_function, learning_rate=learning_rate)
    model.load_weights('initial_weights')

    val_ds = tf.data.Dataset.from_tensor_slices((val_features, val_labels)).cache()
    val_ds = val_ds.batch(BATCH_SIZE).prefetch(2) 

    resampled_history = model.fit(
        resampled_ds,
        epochs=EPOCHS,
        steps_per_epoch=resampled_steps_per_epoch,
        callbacks=[early_stopping],
        validation_data=val_ds)
    
    res = {'rec':0,'acc':0}
    res['rec'] = max(resampled_history.history["val_recall"])
    res['acc'] = max(resampled_history.history["val_accuracy"])
    
    results[activation_function].append(res)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10

KeyboardInterrupt: ignored

In [None]:
print(results)

In [None]:
def make_model():
  layers = [keras.layers.Dense(16, activation='relu',input_shape=(train_features.shape[-1],)),
          keras.layers.Dense(16, input_dim=16, activation='relu'),
          keras.layers.Dropout(0.5),
          keras.layers.Dense(1, activation='sigmoid')]

  model = keras.Sequential(layers)

  model.compile(
      optimizer=keras.optimizers.Adam(learning_rate=1e-3),
      loss=keras.losses.BinaryCrossentropy(),
      metrics=METRICS)

  return model

model = make_model()
model.summary()

val_ds = tf.data.Dataset.from_tensor_slices((val_features, val_labels)).cache()
val_ds = val_ds.batch(BATCH_SIZE).prefetch(2) 

resampled_history = model.fit(
    resampled_ds,
    epochs=EPOCHS,
    steps_per_epoch=resampled_steps_per_epoch,
    callbacks=[early_stopping],
    validation_data=val_ds)