<a href="https://colab.research.google.com/github/bentonluu/CPSC501-Assignment4-TensorFlow/blob/master/heartPredict.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

In [64]:
from google.colab import files
uploaded = files.upload()

Saving heart.csv to heart.csv


In [0]:
from __future__ import absolute_import, division, print_function, unicode_literals
from keras import regularizers
from numpy.random import RandomState
import functools
import sys
import tensorflow as tf
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt

In [0]:
LABEL_COLUMN = 'chd'
TRAIN_CSV_PATH = 'heart_train.csv'
TEST_CSV_PATH = 'heart_test.csv'

heart_df = pd.read_csv("heart.csv")

train_data = heart_df.sample(frac=0.80, random_state=RandomState())
test_data = heart_df.loc[~heart_df.index.isin(train_data.index)]

train_data.to_csv(TRAIN_CSV_PATH, index=False)
test_data.to_csv(TEST_CSV_PATH, index=False)

In [0]:
def get_dataset(file_path, **kwargs):
  dataset = tf.data.experimental.make_csv_dataset(file_path, batch_size=5, label_name=LABEL_COLUMN)
  return dataset

# Center the data
def normalize_numeric_data(data, mean, std):
  return (data-mean)/std

class PackNumericFeatures(object):
  def __init__(self, names):
    self.names = names

  def __call__(self, features, labels):
    numeric_features = [features.pop(name) for name in self.names]
    numeric_features = [tf.cast(feat, tf.float32) for feat in numeric_features]
    numeric_features = tf.stack(numeric_features, axis=-1)
    features['numeric'] = numeric_features

    return features, labels

In [0]:
CSV_FEATURES = ['sbp', 'tobacco', 'ldl', 'adiposity', 'typea','obesity','alcohol','age']
raw_train_data = get_dataset(TRAIN_CSV_PATH)
raw_test_data = get_dataset(TEST_CSV_PATH)

packed_train_data = raw_train_data.map(PackNumericFeatures(CSV_FEATURES))
packed_test_data = raw_test_data.map(PackNumericFeatures(CSV_FEATURES))

desc = pd.read_csv(TRAIN_PATH)[CSV_FEATURES].describe()

In [0]:
MEAN = np.array(desc.T['mean'])
STD = np.array(desc.T['std'])

normalizer = functools.partial(normalize_numeric_data, mean=MEAN, std=STD)

numeric_column = tf.feature_column.numeric_column('numeric', normalizer_fn=normalizer, shape=[len(CSV_FEATURES)])
numeric_columns = [numeric_column]

CATEGORIES = {
    'famhist': ['Present', 'Absent']
}

categorical_columns = []
for feature, vocab in CATEGORIES.items():
  cat_col = tf.feature_column.categorical_column_with_vocabulary_list(
        key=feature, vocabulary_list=vocab)
  categorical_columns.append(tf.feature_column.indicator_column(cat_col))

preprocessing_layer = tf.keras.layers.DenseFeatures(categorical_columns + numeric_columns)

In [0]:
model = tf.keras.Sequential([
  preprocessing_layer,
  tf.keras.layers.Dense(128, kernel_regularizer=regularizers.l2(0.0001), activation='relu'),
  tf.keras.layers.Dropout(0.5),
  tf.keras.layers.Dense(128, kernel_regularizer=regularizers.l2(0.0001), activation='relu'),
  tf.keras.layers.Dropout(0.5),
  tf.keras.layers.Dense(1, activation='sigmoid'),
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

print("--Fit model--")
model.fit(packed_train_data, epochs=10, verbose=2, steps_per_epoch=64)

print("--Evaluate model--")
model_loss, model_acc = model.evaluate(packed_test_data, verbose=2, steps=64)
print(f"Model Loss:    {model_loss:.2f}")
print(f"Model Accuray: {model_acc*100:.1f}%")
