In [1]:
import tensorflow as tf
from tensorflow.keras import layers

In [2]:
FEATURES = list('abcdefghij')
LABELS = 'labels'
DATA_PATTERN = 'data/sharded/*'
BATCH_SIZE = 1000
PREFETCH_BUFFER_SIZE = 1000 # feature batches
NUM_PARALLEL_READS = 4
EPOCHS = 5
NUMERIC = 'numeric'

In [3]:
%%time
dataset = tf.data.experimental.make_csv_dataset(
    DATA_PATTERN,
    batch_size=BATCH_SIZE,
    label_name=LABELS,
    num_parallel_reads=NUM_PARALLEL_READS,
    prefetch_buffer_size=PREFETCH_BUFFER_SIZE,
    num_epochs=1, # will define the epochs in the train method
) 

CPU times: user 120 ms, sys: 94.9 ms, total: 215 ms
Wall time: 238 ms


In [4]:
def show_batch(dataset):
    for batch, label in dataset.take(1):
        for key, value in batch.items():
            print("{:20s}: {}".format(key,value.numpy()))
        print("{:20s}: {}".format(LABELS,label.numpy()))

In [5]:
# show_batch(dataset)

In [6]:
class PackNumericFeatures(object):
    def __init__(self, names):
        self.names = names

    def __call__(self, features, labels):
        numeric_features = [features.pop(name) for name in self.names]
        numeric_features = [tf.cast(feat, tf.float32) for feat in numeric_features]
        numeric_features = tf.stack(numeric_features, axis=-1)
        features[NUMERIC] = numeric_features

        return features, labels

In [7]:
packed_data = dataset.map(PackNumericFeatures(FEATURES))

In [8]:
numeric_column = tf.feature_column.numeric_column(NUMERIC, shape=[len(FEATURES)])
numeric_columns = [numeric_column]

In [9]:
numeric_layer = tf.keras.layers.DenseFeatures(numeric_columns)

In [10]:
model = tf.keras.Sequential()
model.add(numeric_layer)
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1))

In [11]:
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [12]:
%%time
hist = model.fit(
    packed_data, epochs=EPOCHS
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
CPU times: user 1min 53s, sys: 13.8 s, total: 2min 7s
Wall time: 1min 2s


In [13]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_features (DenseFeature multiple                  0         
_________________________________________________________________
dense (Dense)                multiple                  704       
_________________________________________________________________
dense_1 (Dense)              multiple                  4160      
_________________________________________________________________
dense_2 (Dense)              multiple                  65        
Total params: 4,929
Trainable params: 4,929
Non-trainable params: 0
_________________________________________________________________
