In [1]:
import os
import tensorflow as tf
import pandas as pd
import numpy as np

In [2]:
URL = 'https://storage.googleapis.com/applied-dl/heart.csv'
dataframe = pd.read_csv(URL)
dataframe.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,1,145,233,1,2,150,0,2.3,3,0,fixed,0
1,67,1,4,160,286,0,2,108,1,1.5,2,3,normal,1
2,67,1,4,120,229,0,2,129,1,2.6,2,2,reversible,0
3,37,1,3,130,250,0,0,187,0,3.5,3,0,normal,0
4,41,0,2,130,204,0,2,172,0,1.4,1,0,normal,0


In [3]:
dataframe = dataframe.astype({'age' : np.float32, 'sex' : np.float32, 'cp' : np.float32, 'trestbps' : np.float32, 'chol' : np.float32, 'fbs' : np.float32, 'restecg' : np.float32,
                              'thalach' : np.float32, 'exang' : np.float32, 'oldpeak' : np.float32, 'slope' : np.float32, 'ca' : np.float32, 'target' : np.float32})

In [4]:
dataframe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
age         303 non-null float32
sex         303 non-null float32
cp          303 non-null float32
trestbps    303 non-null float32
chol        303 non-null float32
fbs         303 non-null float32
restecg     303 non-null float32
thalach     303 non-null float32
exang       303 non-null float32
oldpeak     303 non-null float32
slope       303 non-null float32
ca          303 non-null float32
thal        303 non-null object
target      303 non-null float32
dtypes: float32(13), object(1)
memory usage: 17.9+ KB


In [5]:
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
    dataframe = dataframe.copy()
    labels = dataframe.pop('target')
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(dataframe))
    ds = ds.batch(batch_size)
    return ds

In [6]:
dataset = df_to_dataset(dataframe, batch_size=2)
dataset

<BatchDataset shapes: ({trestbps: (None,), thal: (None,), fbs: (None,), age: (None,), cp: (None,), chol: (None,), slope: (None,), ca: (None,), thalach: (None,), sex: (None,), restecg: (None,), exang: (None,), oldpeak: (None,)}, (None,)), types: ({trestbps: tf.float32, thal: tf.string, fbs: tf.float32, age: tf.float32, cp: tf.float32, chol: tf.float32, slope: tf.float32, ca: tf.float32, thalach: tf.float32, sex: tf.float32, restecg: tf.float32, exang: tf.float32, oldpeak: tf.float32}, tf.float32)>

In [7]:
for elem in dataset.take(1):
    print('features keys:\n{}'.format(list(elem[0].keys())))
    print('\n'*3)
    print('features:\n{}'.format(elem[0]))
    print('\n'*3)
    print('labels:\n{}'.format(elem[1]))

features keys:
['trestbps', 'thal', 'fbs', 'age', 'cp', 'chol', 'slope', 'thalach', 'ca', 'sex', 'oldpeak', 'exang', 'restecg']




features:
{'trestbps': <tf.Tensor: id=44, shape=(2,), dtype=float32, numpy=array([130., 140.], dtype=float32)>, 'thal': <tf.Tensor: id=42, shape=(2,), dtype=string, numpy=array([b'reversible', b'normal'], dtype=object)>, 'fbs': <tf.Tensor: id=37, shape=(2,), dtype=float32, numpy=array([1., 0.], dtype=float32)>, 'age': <tf.Tensor: id=32, shape=(2,), dtype=float32, numpy=array([63., 42.], dtype=float32)>, 'cp': <tf.Tensor: id=35, shape=(2,), dtype=float32, numpy=array([4., 4.], dtype=float32)>, 'chol': <tf.Tensor: id=34, shape=(2,), dtype=float32, numpy=array([330., 226.], dtype=float32)>, 'slope': <tf.Tensor: id=41, shape=(2,), dtype=float32, numpy=array([1., 1.], dtype=float32)>, 'thalach': <tf.Tensor: id=43, shape=(2,), dtype=float32, numpy=array([132., 178.], dtype=float32)>, 'ca': <tf.Tensor: id=33, shape=(2,), dtype=float32, numpy=array([3., 0.], dtype

In [8]:
# A utility method to create a feature column
# and to transform a batch of data

from tensorflow import feature_column
from tensorflow.keras import layers

In [9]:
example_batch = next(iter(dataset))[0]

In [10]:
example_batch

{'age': <tf.Tensor: id=51, shape=(2,), dtype=float32, numpy=array([49., 43.], dtype=float32)>,
 'ca': <tf.Tensor: id=52, shape=(2,), dtype=float32, numpy=array([3., 0.], dtype=float32)>,
 'chol': <tf.Tensor: id=53, shape=(2,), dtype=float32, numpy=array([188., 213.], dtype=float32)>,
 'cp': <tf.Tensor: id=54, shape=(2,), dtype=float32, numpy=array([3., 3.], dtype=float32)>,
 'exang': <tf.Tensor: id=55, shape=(2,), dtype=float32, numpy=array([0., 0.], dtype=float32)>,
 'fbs': <tf.Tensor: id=56, shape=(2,), dtype=float32, numpy=array([0., 0.], dtype=float32)>,
 'oldpeak': <tf.Tensor: id=57, shape=(2,), dtype=float32, numpy=array([2. , 0.2], dtype=float32)>,
 'restecg': <tf.Tensor: id=58, shape=(2,), dtype=float32, numpy=array([0., 0.], dtype=float32)>,
 'sex': <tf.Tensor: id=59, shape=(2,), dtype=float32, numpy=array([1., 0.], dtype=float32)>,
 'slope': <tf.Tensor: id=60, shape=(2,), dtype=float32, numpy=array([2., 2.], dtype=float32)>,
 'thal': <tf.Tensor: id=61, shape=(2,), dtype=strin

___
## [1. numeric_column](https://www.tensorflow.org/api_docs/python/tf/feature_column/numeric_column)

In [14]:
age = feature_column.numeric_column("age", normalizer_fn=lambda x: (x-25)/50)
feature_layer = layers.DenseFeatures(age)
feature_layer(example_batch).numpy()

array([[0.48],
       [0.36]], dtype=float32)

## [2. bucketized_column](https://www.tensorflow.org/api_docs/python/tf/feature_column/bucketized_column)

<img src="../images/3_feature_columns/bucketized_column.jpg" width="400">

In [15]:
age = feature_column.numeric_column("age")
age_buckets = feature_column.bucketized_column(age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
feature_layer = layers.DenseFeatures(age_buckets)
feature_layer(example_batch).numpy()

array([[0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]], dtype=float32)

## 3. categorical columns

### [3.1 categorical_column_with_vocabulary_list](https://www.tensorflow.org/api_docs/python/tf/feature_column/categorical_column_with_vocabulary_list)

<img src="../images/3_feature_columns/categorical_column_with_vocabulary.jpg" width="400">


In [19]:
thal = tf.feature_column.categorical_column_with_vocabulary_list('thal', ['fixed', 'normal', 'reversible', 'another_1', 'another_2', 'another_3'])
thal_one_hot = feature_column.indicator_column(thal)
feature_layer = layers.DenseFeatures(thal_one_hot)
feature_layer(example_batch).numpy()

array([[0., 0., 1., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.]], dtype=float32)

### [3.2 hashed column](https://www.tensorflow.org/api_docs/python/tf/feature_column/categorical_column_with_hash_bucket)

<img src="../images/3_feature_columns/hashed_column.jpg" width="600">

In [22]:
thal_hashed = tf.feature_column.categorical_column_with_hash_bucket('thal', hash_bucket_size=15)
thal_shaed_one_hot = feature_column.indicator_column(thal_hashed)
feature_layer = layers.DenseFeatures(thal_shaed_one_hot)
feature_layer(example_batch).numpy()

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]],
      dtype=float32)

## [4. embedding column](https://www.tensorflow.org/api_docs/python/tf/feature_column/embedding_column)

<img src="../images/3_feature_columns/embedding_vs_indicator.jpg" width="600">

In [23]:
thal_embedding = feature_column.embedding_column(thal, dimension=3)
feature_layer = layers.DenseFeatures(thal_embedding)
feature_layer(example_batch).numpy()

array([[ 0.29143268, -0.79603183,  0.02700424],
       [ 0.3871378 , -0.4228113 , -0.00858483]], dtype=float32)

## [5. Crossed column](https://www.tensorflow.org/api_docs/python/tf/feature_column/crossed_column)

<img src="../images/3_feature_columns/crossed_column.jpg" width="600">



In [24]:
crossed_feature = tf.feature_column.crossed_column([age_buckets, thal], hash_bucket_size=1000)
crossed_one_hot = feature_column.indicator_column(crossed_feature)
feature_layer = layers.DenseFeatures(crossed_one_hot)
feature_layer(example_batch)

Instructions for updating:
The old _FeatureColumn APIs are being deprecated. Please use the new FeatureColumn APIs instead.


<tf.Tensor: id=530, shape=(2, 1000), dtype=float32, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)>