In [25]:
import tensorflow as tf


def make_tensor_dataset():  # 2 batches, 2 samples each, 3 features
    return tf.data.Dataset.from_tensor_slices([[[1, 2, 3], [4, 5, 6]],
                                               [[7, 8, 9], [10, 11, 12]]])


def add_labels(dataset):
    return dataset.map(lambda batch: (batch, tf.constant([0, 1])))


def make_model():
    return tf.keras.models.Sequential([tf.keras.layers.Dense(1)])

# model.predict()


## Labels Ignored

It's ok to pass in a labeled dataset to model.predict, and the labels will just be ignored. The output will be the same as if you didn't pass in the labels.


In [17]:
dataset = make_tensor_dataset()
model = make_model()

predicted = model.predict(dataset)
print()
print(f'Tensor Dataset Output Type: {type(predicted)}')
print(f'Tensor Dataset Output Shape: {predicted.shape}')
print(predicted)
print()

dataset = add_labels(dataset)
predicted = model.predict(dataset)
print()
print(f'Labeled Dataset Output Type: {type(predicted)}')
print(f'Labeled Dataset Output Shape: {predicted.shape}')
print(predicted)
print()


Tensor Dataset Output Type: <class 'numpy.ndarray'>
Tensor Dataset Output Shape: (4, 1)
[[ -1.5339223]
 [ -5.8800726]
 [-10.226223 ]
 [-14.572372 ]]


Labeled Dataset Output Type: <class 'numpy.ndarray'>
Labeled Dataset Output Shape: (4, 1)
[[ -1.5339223]
 [ -5.8800726]
 [-10.226223 ]
 [-14.572372 ]]



2023-05-19 09:05:13.190930: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32 and shape [2,2,3]
	 [[{{node Placeholder/_0}}]]
2023-05-19 09:05:13.257785: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32 and shape [2,2,3]
	 [[{{node Placeholder/_0}}]]


## Returns numpy array

model.predict() returns a numpy array instead of a tensorflow tensor! Also, it returns the whole output tensor in memory with **no batch** dimension after having batched the input dataset for you.


In [20]:
dataset = make_tensor_dataset()
model = make_model()
predicted = model.predict(dataset)

print(type(predicted))
print(predicted.shape)

<class 'numpy.ndarray'>
(4, 1)


2023-05-19 09:10:51.763299: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32 and shape [2,2,3]
	 [[{{node Placeholder/_0}}]]


## Batching Output

If you need to batch the output, for instance to write a large dataset's outputs to disk when it's too large to fit in memory, you can do it this way.


In [26]:
dataset = make_tensor_dataset()
model = make_model()

for batch in dataset:
    predicted = model.predict_on_batch(batch)
    print(type(predicted))
    print(predicted.shape)
    print()

<class 'numpy.ndarray'>
(2, 1)

<class 'numpy.ndarray'>
(2, 1)



2023-05-19 09:18:11.073533: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32 and shape [2,2,3]
	 [[{{node Placeholder/_0}}]]
2023-05-19 09:18:11.083057: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32 and shape [2,3]
	 [[{{node Placeholder/_0}}]]
2023-05-19 09:18:11.127256: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32 and shape [2,3]
	 

## Shuffling

When a dataset is shuffled, if reshuffle_each_iteration is true, then every time you iterate over the dataset, you get a new random **order of batches**.

Note that when you load via **make_csv_dataset**, there is no flag for this, but if you tell it to shuffle, then that's what it does.


In [36]:
dataset = make_tensor_dataset().shuffle(buffer_size=1000,
                                        reshuffle_each_iteration=True,
                                        seed=43)

print(list(dataset))
print()
print(list(dataset))
print()
print(list(dataset))

[<tf.Tensor: shape=(2, 3), dtype=int32, numpy=
array([[ 7,  8,  9],
       [10, 11, 12]], dtype=int32)>, <tf.Tensor: shape=(2, 3), dtype=int32, numpy=
array([[1, 2, 3],
       [4, 5, 6]], dtype=int32)>]

[<tf.Tensor: shape=(2, 3), dtype=int32, numpy=
array([[1, 2, 3],
       [4, 5, 6]], dtype=int32)>, <tf.Tensor: shape=(2, 3), dtype=int32, numpy=
array([[ 7,  8,  9],
       [10, 11, 12]], dtype=int32)>]

[<tf.Tensor: shape=(2, 3), dtype=int32, numpy=
array([[ 7,  8,  9],
       [10, 11, 12]], dtype=int32)>, <tf.Tensor: shape=(2, 3), dtype=int32, numpy=
array([[1, 2, 3],
       [4, 5, 6]], dtype=int32)>]


2023-05-19 09:27:54.757098: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32 and shape [2,2,3]
	 [[{{node Placeholder/_0}}]]


## Shuffling and Outputs

If you implicitly iterate over a shuffled dataset to correlate it with the outputs of a model, the **outputs won't match the inputs**. Notice below how the unshuffled outputs and shuffled outputs don't match, meaning the model saw the inputs in a different order, but that listing out the inputs after that gets it in order (by random chance).

Solutions:

- **don't shuffle** if you're doing predictions and the model has no order dependence
- **shuffle in batches** like above so that you can correlate in input tensor (instead of dataset) with the output


In [42]:
dataset = make_tensor_dataset()
model = make_model()
predictions = model.predict(dataset)
print('Unshuffled Predictions')
print(predictions)

dataset = dataset.shuffle(buffer_size=1000,
                          reshuffle_each_iteration=True,
                          seed=42)

print()
print('Shuffled Predictions')
print(model.predict(dataset))
print()
print('Shuffled Dataset')
print(list(dataset))

Unshuffled Predictions
[[-2.5772736]
 [-4.757553 ]
 [-6.9378324]
 [-9.118112 ]]

Shuffled Predictions
[[-6.9378324]
 [-9.118112 ]
 [-2.5772736]
 [-4.757553 ]]

Shuffled Dataset
[<tf.Tensor: shape=(2, 3), dtype=int32, numpy=
array([[1, 2, 3],
       [4, 5, 6]], dtype=int32)>, <tf.Tensor: shape=(2, 3), dtype=int32, numpy=
array([[ 7,  8,  9],
       [10, 11, 12]], dtype=int32)>]


2023-05-19 09:34:08.603861: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32 and shape [2,2,3]
	 [[{{node Placeholder/_0}}]]
2023-05-19 09:34:08.666442: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32 and shape [2,2,3]
	 [[{{node Placeholder/_0}}]]


# Workflow Example

```Python
model = tf.keras.models.Sequential([tf.keras.layers.Dense(1, activation='sigmoid')])
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
          loss='binary_crossentropy',
          metrics=['accuracy'])
history = model.fit(
    train_dataset,
    validation_data=validation_dataset,
    epochs=epochs
) # use history.histor['accuracy'] for plotting
model.predict(dataset)
model.predict_on_batch(batch)
```