In [None]:
import os
import tensorflow as tf
import pandas as pd
import numpy as np

In [None]:
URL = 'https://storage.googleapis.com/applied-dl/heart.csv'
dataframe = pd.read_csv(URL)
dataframe.head()

In [None]:
dataframe = dataframe.astype({'age' : np.float32, 'sex' : np.float32, 'cp' : np.float32, 'trestbps' : np.float32, 'chol' : np.float32, 'fbs' : np.float32, 'restecg' : np.float32,
                              'thalach' : np.float32, 'exang' : np.float32, 'oldpeak' : np.float32, 'slope' : np.float32, 'ca' : np.float32, 'target' : np.float32})

In [None]:
dataframe.info()

In [None]:
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
    dataframe = dataframe.copy()
    labels = dataframe.pop('target')
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(dataframe))
    ds = ds.batch(batch_size)
    return ds

In [None]:
dataset = df_to_dataset(dataframe, batch_size=2)
dataset

In [None]:
for elem in dataset.take(1):
    print('features keys:\n{}'.format(list(elem[0].keys())))
    print('\n'*3)
    print('features:\n{}'.format(elem[0]))
    print('\n'*3)
    print('labels:\n{}'.format(elem[1]))

In [None]:
# A utility method to create a feature column
# and to transform a batch of data

from tensorflow import feature_column
from tensorflow.keras import layers

In [None]:
example_batch = next(iter(dataset))[0]

In [None]:
example_batch

___
## [1. numeric_column](https://www.tensorflow.org/api_docs/python/tf/feature_column/numeric_column)

In [None]:
age = feature_column.numeric_column("age", normalizer_fn=lambda x: (x-25)/50)
feature_layer = layers.DenseFeatures(age)
feature_layer(example_batch).numpy()

## [2. bucketized_column](https://www.tensorflow.org/api_docs/python/tf/feature_column/bucketized_column)

<img src="../images/3_feature_columns/bucketized_column.jpg" width="400">

In [None]:
age = feature_column.numeric_column("age")
age_buckets = feature_column.bucketized_column(age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
feature_layer = layers.DenseFeatures(age_buckets)
feature_layer(example_batch).numpy()

## 3. categorical columns

### [3.1 categorical_column_with_vocabulary_list](https://www.tensorflow.org/api_docs/python/tf/feature_column/categorical_column_with_vocabulary_list)

<img src="../images/3_feature_columns/categorical_column_with_vocabulary.jpg" width="400">


In [None]:
thal = tf.feature_column.categorical_column_with_vocabulary_list('thal', ['fixed', 'normal', 'reversible', 'another_1', 'another_2', 'another_3'])
thal_one_hot = feature_column.indicator_column(thal)
feature_layer = layers.DenseFeatures(thal_one_hot)
feature_layer(example_batch).numpy()

### [3.2 hashed column](https://www.tensorflow.org/api_docs/python/tf/feature_column/categorical_column_with_hash_bucket)

<img src="../images/3_feature_columns/hashed_column.jpg" width="600">

In [None]:
thal_hashed = tf.feature_column.categorical_column_with_hash_bucket('thal', hash_bucket_size=15)
thal_shaed_one_hot = feature_column.indicator_column(thal_hashed)
feature_layer = layers.DenseFeatures(thal_shaed_one_hot)
feature_layer(example_batch).numpy()

## [4. embedding column](https://www.tensorflow.org/api_docs/python/tf/feature_column/embedding_column)

<img src="../images/3_feature_columns/embedding_vs_indicator.jpg" width="600">

In [None]:
thal_embedding = feature_column.embedding_column(thal, dimension=3)
feature_layer = layers.DenseFeatures(thal_embedding)
feature_layer(example_batch).numpy()

## [5. Crossed column](https://www.tensorflow.org/api_docs/python/tf/feature_column/crossed_column)

<img src="../images/3_feature_columns/crossed_column.jpg" width="600">



In [None]:
crossed_feature = tf.feature_column.crossed_column([age_buckets, thal], hash_bucket_size=1000)
crossed_one_hot = feature_column.indicator_column(crossed_feature)
feature_layer = layers.DenseFeatures(crossed_one_hot)
feature_layer(example_batch)