In [23]:
from pyimagesearch.helpers import benchmark
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.datasets import cifar100
from tensorflow.data import AUTOTUNE
import tensorflow as tf

In [24]:
# initialize the batch size and number of steps
BS = 64
NUM_STEPS = 5000

### load the CIFAR-10 dataset from

In [25]:
print("[INFO] loading the cifar100 dataset...")
(trainX, trainY), (testX, testY) = cifar100.load_data()

[INFO] loading the cifar100 dataset...


In [26]:
trainX.shape, testX.shape

((50000, 32, 32, 3), (10000, 32, 32, 3))

### create a standard image generator object

In [27]:
print("[INFO] creating a ImageDataGenerator object...")
imageGen = ImageDataGenerator()
dataGen = imageGen.flow(x=trainX, y=trainY, batch_size=BS, shuffle=True)

[INFO] creating a ImageDataGenerator object...


### build a TensorFlow dataset from the training data

In [28]:
dataset = tf.data.Dataset.from_tensor_slices((trainX, trainY))

In [29]:
dataset

<TensorSliceDataset element_spec=(TensorSpec(shape=(32, 32, 3), dtype=tf.uint8, name=None), TensorSpec(shape=(1,), dtype=tf.int64, name=None))>

# build the data input pipeline

In [30]:
print("[INFO] creating a tf.data input pipeline..")
dataset = dataset.shuffle(1024).cache().repeat().batch(BS).prefetch(AUTOTUNE)

[INFO] creating a tf.data input pipeline..


In [31]:
dataset

<PrefetchDataset element_spec=(TensorSpec(shape=(None, 32, 32, 3), dtype=tf.uint8, name=None), TensorSpec(shape=(None, 1), dtype=tf.int64, name=None))>

### benchmark the image data generator and display the number of data points generated, along with the time taken to perform the operation

In [32]:
totalTime = benchmark(dataGen, NUM_STEPS)
msg = f"[INFO] ImageDataGenerator generated {BS * NUM_STEPS} images in {totalTime:.2f} seconds..."
print(msg)

[INFO] ImageDataGenerator generated 320000 images in 4.36 seconds...


### create a dataset iterator, benchmark the tf.data pipeline, and display the number of data points generator along with the time taken

In [37]:
datasetGen = iter(dataset)

datasetGen

<tensorflow.python.data.ops.iterator_ops.OwnedIterator at 0x7f8cb58f93d0>

In [38]:
totalTime = benchmark(datasetGen, NUM_STEPS)
msg = f"[INFO] tf.data generated {BS * NUM_STEPS} images in {totalTime:.2f} seconds..."
print(msg)

[INFO] tf.data generated 320000 images in 0.75 seconds...
