In [1]:
import tensorflow as tf
from tensorflow.keras import layers

In [2]:
FEATURES = list('abcdefghij')
LABELS = 'labels'
DATA_PATTERN = 'data/tfrecords/*'
BATCH_SIZE = 1000
NUM_PARALLEL_READS = 4
EPOCHS = 5
NUMERIC = 'numeric'

In [3]:
feature_description = dict(zip(FEATURES, len(FEATURES)*[tf.io.FixedLenFeature([], tf.float32)]))
feature_description.update({LABELS: tf.io.FixedLenFeature([], tf.int64)})

In [4]:
%%time

dataset = tf.data.experimental.make_batched_features_dataset(
    DATA_PATTERN,
    features=feature_description,
    batch_size=BATCH_SIZE,
    label_key=LABELS,
    parser_num_threads=NUM_PARALLEL_READS,
    num_epochs=1,
)

CPU times: user 428 ms, sys: 233 ms, total: 661 ms
Wall time: 673 ms


In [5]:
def show_batch(dataset):
    for batch, label in dataset.take(1):
        for key, value in batch.items():
            print("{:20s}: {}".format(key,value.numpy()))
        print("{:20s}: {}".format(LABELS,label.numpy()))

In [6]:
# show_batch(dataset)

In [7]:
class PackNumericFeatures(object):
    def __init__(self, names):
        self.names = names

    def __call__(self, features, labels):
        numeric_features = [features.pop(name) for name in self.names]
        numeric_features = [tf.cast(feat, tf.float32) for feat in numeric_features]
        numeric_features = tf.stack(numeric_features, axis=-1)
        features[NUMERIC] = numeric_features

        return features, labels

In [8]:
packed_data = dataset.map(PackNumericFeatures(FEATURES))

In [9]:
numeric_column = tf.feature_column.numeric_column(NUMERIC, shape=[len(FEATURES)])
numeric_columns = [numeric_column]

In [10]:
numeric_layer = tf.keras.layers.DenseFeatures(numeric_columns)

In [11]:
model = tf.keras.Sequential()
model.add(numeric_layer)
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1))

In [12]:
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [13]:
%%time
hist = model.fit(
    packed_data, epochs=EPOCHS
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
CPU times: user 54.8 s, sys: 6.43 s, total: 1min 1s
Wall time: 25.2 s


In [14]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_features (DenseFeature multiple                  0         
_________________________________________________________________
dense (Dense)                multiple                  704       
_________________________________________________________________
dense_1 (Dense)              multiple                  4160      
_________________________________________________________________
dense_2 (Dense)              multiple                  65        
Total params: 4,929
Trainable params: 4,929
Non-trainable params: 0
_________________________________________________________________
