In [1]:
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

TensorFlow 2.x selected.


Load imports

In [0]:
from __future__ import absolute_import, division, print_function, unicode_literals
import functools

import numpy as np
import tensorflow as tf

Load heart_train.csv and heart_test.csv

In [2]:
from google.colab import files
uploaded = files.upload()

Saving heart_train.csv to heart_train (1).csv


In [0]:
LABEL_COLUMN = 'chd'
LABELS = [0, 1]

Check file contents

In [0]:
def get_dataset(file_path, **kwargs):
  dataset = tf.data.experimental.make_csv_dataset(
      file_path,
      batch_size=5, # Artificially small to make examples easier to show.
      label_name=LABEL_COLUMN,
      na_value="?",
      num_epochs=1,
      ignore_errors=True, 
      **kwargs)
  return dataset

In [0]:
def show_batch(dataset):
  for batch, label in dataset.take(1):
    for key, value in batch.items():
      print("{:20s}: {}".format(key,value.numpy()))

In [6]:
SELECT_COLUMNS = ['chd', 'sbp', 'tobacco', 'ldl', 'adiposity', 'famhist', 'typea', 'obesity', 'alcohol', 'age']

train_dataset = get_dataset("heart_train.csv", select_columns=SELECT_COLUMNS)
test_dataset = get_dataset("heart_test.csv", select_columns=SELECT_COLUMNS)

show_batch(train_dataset)

Instructions for updating:
Use `tf.data.Dataset.interleave(map_func, cycle_length, block_length, num_parallel_calls=tf.data.experimental.AUTOTUNE)` instead. If sloppy execution is desired, use `tf.data.Options.experimental_determinstic`.
sbp                 : [124 166 126 108 140]
tobacco             : [ 4.   6.  10.5 15.   4.2]
ldl                 : [6.65 8.8  4.49 4.91 2.91]
adiposity           : [30.84 37.89 17.33 34.65 28.83]
famhist             : [b'Present' b'Absent' b'Absent' b'Absent' b'Present']
typea               : [54 39 67 41 43]
obesity             : [28.4  28.7  19.37 27.96 24.7 ]
alcohol             : [33.51 43.2   0.   14.4  47.52]
age                 : [60 52 49 56 48]


In [0]:
def pack(features, label):
  return tf.stack(list(features.values()), axis=-1), label

In [0]:
class PackNumericFeatures(object):
  def __init__(self, names):
    self.names = names

  def __call__(self, features, labels):
    numeric_features = [features.pop(name) for name in self.names]
    numeric_features = [tf.cast(feat, tf.float32) for feat in numeric_features]
    numeric_features = tf.stack(numeric_features, axis=-1)
    features['numeric'] = numeric_features

    return features, labels

In [9]:
NUMERIC_FEATURES = ['sbp', 'tobacco', 'ldl', 'adiposity', 'typea', 'obesity', 'alcohol', 'age']

packed_train_data = train_dataset.map(
    PackNumericFeatures(NUMERIC_FEATURES))

packed_test_data = test_dataset.map(
    PackNumericFeatures(NUMERIC_FEATURES))

example_batch, labels_batch = next(iter(packed_train_data)) 

show_batch(packed_train_data)

famhist             : [b'Present' b'Absent' b'Present' b'Absent' b'Absent']
numeric             : [[138.     0.     1.96  11.82  54.    22.01   8.13  21.  ]
 [118.     0.     2.39  12.13  49.    18.46   0.26  17.  ]
 [154.     2.4    5.63  42.17  59.    35.07  12.86  50.  ]
 [132.     0.     4.17  36.57  57.    30.61  18.    49.  ]
 [142.     0.     4.19  18.04  56.    23.65  20.78  42.  ]]


In [10]:
import pandas as pd
desc = pd.read_csv("heart_train.csv")[NUMERIC_FEATURES].describe()
desc

Unnamed: 0,sbp,tobacco,ldl,adiposity,typea,obesity,alcohol,age
count,396.0,396.0,396.0,396.0,396.0,396.0,396.0,396.0
mean,139.088384,3.53447,4.721641,25.417854,52.611111,25.980404,17.155328,42.727273
std,20.883153,4.622259,1.969109,7.862184,9.9082,4.185559,24.696052,14.758524
min,101.0,0.0,0.98,6.74,13.0,14.7,0.0,15.0
25%,125.5,0.05,3.3,19.825,46.0,22.7575,0.51,31.0
50%,134.0,1.8,4.325,26.115,53.0,25.78,7.78,44.5
75%,148.5,5.1525,5.6325,31.1025,59.0,28.4025,23.7375,55.25
max,218.0,31.2,14.16,42.49,77.0,45.72,147.19,64.0


In [0]:
MEAN = np.array(desc.T['mean'])
STD = np.array(desc.T['std'])
def normalize_numeric_data(data, mean, std):
  # Center the data
  return (data-mean)/std

In [12]:
# See what you just created.
normalizer = functools.partial(normalize_numeric_data, mean=MEAN, std=STD)

numeric_column = tf.feature_column.numeric_column('numeric', normalizer_fn=normalizer, shape=[len(NUMERIC_FEATURES)])
numeric_columns = [numeric_column]
example_batch['numeric']
numeric_layer = tf.keras.layers.DenseFeatures(numeric_columns)
numeric_layer(example_batch).numpy()

CATEGORIES = {
    'famhist': ['present', 'absent'],
}

categorical_columns = []
for feature, vocab in CATEGORIES.items():
  cat_col = tf.feature_column.categorical_column_with_vocabulary_list(
        key=feature, vocabulary_list=vocab)
  categorical_columns.append(tf.feature_column.indicator_column(cat_col))

categorical_layer = tf.keras.layers.DenseFeatures(categorical_columns)
print(categorical_layer(example_batch).numpy()[0])

Instructions for updating:
The old _FeatureColumn APIs are being deprecated. Please use the new FeatureColumn APIs instead.
Instructions for updating:
The old _FeatureColumn APIs are being deprecated. Please use the new FeatureColumn APIs instead.
[0. 0.]


In [13]:
preprocessing_layer = tf.keras.layers.DenseFeatures(categorical_columns+numeric_columns)
print(preprocessing_layer(example_batch).numpy()[0])

[ 0.          0.          0.71405023  0.20888714 -0.02114749  1.8509039
  0.84666127  1.7177143  -0.6323006   1.4413859 ]


In [24]:
model = tf.keras.Sequential([
  preprocessing_layer,
  tf.keras.layers.Dense(1024, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(1024, activation='relu'),
  tf.keras.layers.Dense(1, activation='sigmoid'),
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

train_data = packed_train_data.shuffle(500)
test_data = packed_test_data

model.fit(train_data, epochs=50, verbose = 2)

print("--Evaluate model--")
model_loss, model_acc = model.evaluate(test_data, verbose=2)
print(f"Model Loss:    {model_loss:.2f}")
print(f"Model Accuray: {model_acc*100:.1f}%")

Epoch 1/100
80/80 - 2s - loss: 0.5965 - accuracy: 0.6465
Epoch 2/100
80/80 - 1s - loss: 0.5527 - accuracy: 0.7222
Epoch 3/100
80/80 - 2s - loss: 0.5259 - accuracy: 0.7096
Epoch 4/100
80/80 - 2s - loss: 0.5087 - accuracy: 0.7323
Epoch 5/100
80/80 - 1s - loss: 0.5138 - accuracy: 0.7222
Epoch 6/100
80/80 - 1s - loss: 0.5053 - accuracy: 0.7348
Epoch 7/100
80/80 - 1s - loss: 0.4905 - accuracy: 0.7449
Epoch 8/100
80/80 - 1s - loss: 0.4879 - accuracy: 0.7399
Epoch 9/100
80/80 - 1s - loss: 0.4586 - accuracy: 0.7753
Epoch 10/100
80/80 - 1s - loss: 0.4588 - accuracy: 0.7727
Epoch 11/100
80/80 - 1s - loss: 0.4532 - accuracy: 0.7753
Epoch 12/100
80/80 - 1s - loss: 0.4558 - accuracy: 0.7727
Epoch 13/100
80/80 - 1s - loss: 0.4314 - accuracy: 0.7854
Epoch 14/100
80/80 - 1s - loss: 0.4224 - accuracy: 0.7753
Epoch 15/100
80/80 - 1s - loss: 0.3925 - accuracy: 0.8232
Epoch 16/100
80/80 - 2s - loss: 0.3677 - accuracy: 0.8182
Epoch 17/100
80/80 - 1s - loss: 0.3556 - accuracy: 0.8106
Epoch 18/100
80/80 - 1s