### Full example
#### Build the preprocessing head

In [68]:
import tensorflow as tf
import numpy as np
import pandas as pd

from tensorflow.keras import Input, Sequential
from tensorflow.keras.layers import Dense, Normalization

In [46]:
print(tf.__version__)

2.8.0


In [47]:
SHUFFLE_BUFFER = 500
BATCH_SIZE = 2

In [48]:
csv_file = tf.keras.utils.get_file('heart.csv', 'https://storage.googleapis.com/download.tensorflow.org/data/heart.csv')

In [49]:
df = pd.read_csv(csv_file)

In [50]:
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,1,145,233,1,2,150,0,2.3,3,0,fixed,0
1,67,1,4,160,286,0,2,108,1,1.5,2,3,normal,1
2,67,1,4,120,229,0,2,129,1,2.6,2,2,reversible,0
3,37,1,3,130,250,0,0,187,0,3.5,3,0,normal,0
4,41,0,2,130,204,0,2,172,0,1.4,1,0,normal,0


In [53]:
df.dtypes

age           int64
sex           int64
cp            int64
trestbps      int64
chol          int64
fbs           int64
restecg       int64
thalach       int64
exang         int64
oldpeak     float64
slope         int64
ca            int64
thal         object
target        int64
dtype: object

In [54]:
target = df.pop('target')

In [125]:
binary_feature_names = []
categorical_feature_names = []
numeric_feature_names= []

for name, values in df.items():
    nunique = df[name].nunique()
    print(f"{name:10s}: {nunique}", end='\t')
    if nunique == 2:
        print(f"recommand to Binary")
        binary_feature_names.append(name)
    elif nunique <= 10:
        print(f"recommand to Categorical")
        categorical_feature_names.append(name)
    else:
        print(f"recommand to numerical")
        numeric_feature_names.append(name)

age       : 41	recommand to numerical
sex       : 2	recommand to Binary
cp        : 5	recommand to Categorical
trestbps  : 50	recommand to numerical
chol      : 152	recommand to numerical
fbs       : 2	recommand to Binary
restecg   : 3	recommand to Categorical
thalach   : 91	recommand to numerical
exang     : 2	recommand to Binary
oldpeak   : 40	recommand to numerical
slope     : 3	recommand to Categorical
ca        : 4	recommand to Categorical
thal      : 5	recommand to Categorical


In [126]:
binary_feature_names

['sex', 'fbs', 'exang']

In [127]:
categorical_feature_names

['cp', 'restecg', 'slope', 'ca', 'thal']

In [128]:
numeric_feature_names

['age', 'trestbps', 'chol', 'thalach', 'oldpeak']

In [130]:
numeric_features = df[numeric_feature_names]

In [131]:
numeric_features

Unnamed: 0,age,trestbps,chol,thalach,oldpeak
0,63,145,233,150,2.3
1,67,160,286,108,1.5
2,67,120,229,129,2.6
3,37,130,250,187,3.5
4,41,130,204,172,1.4
...,...,...,...,...,...
298,52,118,186,190,0.0
299,43,132,341,136,3.0
300,65,135,254,127,2.8
301,48,130,256,150,0.0


In [132]:
inputs = {}
for name, column in df.items():
    if type(column[0]) == str:
        dtype = tf.string
    elif (name in categorical_feature_names or 
          name in binary_feature_names):
        dtype = tf.int64
    else:
        dtype = tf.float32
        
    inputs[name] = Input(shape=(), name=name, dtype=dtype)

In [133]:
inputs

{'age': <KerasTensor: shape=(None,) dtype=float32 (created by layer 'age')>,
 'sex': <KerasTensor: shape=(None,) dtype=int64 (created by layer 'sex')>,
 'cp': <KerasTensor: shape=(None,) dtype=int64 (created by layer 'cp')>,
 'trestbps': <KerasTensor: shape=(None,) dtype=float32 (created by layer 'trestbps')>,
 'chol': <KerasTensor: shape=(None,) dtype=float32 (created by layer 'chol')>,
 'fbs': <KerasTensor: shape=(None,) dtype=int64 (created by layer 'fbs')>,
 'restecg': <KerasTensor: shape=(None,) dtype=int64 (created by layer 'restecg')>,
 'thalach': <KerasTensor: shape=(None,) dtype=float32 (created by layer 'thalach')>,
 'exang': <KerasTensor: shape=(None,) dtype=int64 (created by layer 'exang')>,
 'oldpeak': <KerasTensor: shape=(None,) dtype=float32 (created by layer 'oldpeak')>,
 'slope': <KerasTensor: shape=(None,) dtype=int64 (created by layer 'slope')>,
 'ca': <KerasTensor: shape=(None,) dtype=int64 (created by layer 'ca')>,
 'thal': <KerasTensor: shape=(None,) dtype=string 

##### Binary inputs

In [134]:
preprocessed = []

for name in binary_feature_names:
    inp = inputs[name]
    inp = inp[:, tf.newaxis]
    float_value = tf.cast(inp, tf.float32)
    preprocessed.append(float_value)

preprocessed

[<KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'tf.cast_3')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'tf.cast_4')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'tf.cast_5')>]

##### Numeric inputs

In [135]:
def stack_dict(inputs, fun=tf.stack):
    values = []
    for key in sorted(inputs.keys()):
        values.append(tf.cast(inputs[key], dtype=tf.float32))
    
    return fun(values, axis=-1)

In [136]:
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(stack_dict(dict(numeric_features)))

2022-08-06 15:41:00.112854: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-08-06 15:41:00.127217: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


In [139]:
numeric_inputs = {}
for name in numeric_feature_names:
    numeric_inputs[name] = inputs[name]

numeric_inputs = stack_dict(numeric_inputs)
numeric_normalized = normalizer(numeric_inputs)

preprocessed.append(nemeric_normalized)

preprocessed

NameError: name 'nemeric_normalized' is not defined

In [138]:
numeric_feature_names

['age', 'trestbps', 'chol', 'thalach', 'oldpeak']