# Getting started with TensorFlow's Dataset API (continuation)

In this notebook we will contruct `Dataset` objects from user-defined python generators.

In [1]:
import numpy as np
import tensorflow as tf

In [2]:
tf.enable_eager_execution()

In [3]:
tf.VERSION

'1.13.1'

In [4]:
def dataset_generator():
    """A data-producing logic"""
    for i in range(10):
        yield (np.random.random([4]), i)

In [5]:
for i in dataset_generator():
    print(i)

(array([0.27058105, 0.69830066, 0.38385611, 0.60737082]), 0)
(array([0.67234124, 0.99337454, 0.51058177, 0.51350917]), 1)
(array([0.29373408, 0.79397718, 0.02331358, 0.05309646]), 2)
(array([0.95825575, 0.95948214, 0.97225435, 0.33481789]), 3)
(array([0.75388447, 0.12415967, 0.76274676, 0.99042121]), 4)
(array([0.79960608, 0.33980786, 0.94691795, 0.72759385]), 5)
(array([0.44192287, 0.36554297, 0.20602622, 0.68406369]), 6)
(array([0.65394808, 0.69426522, 0.10755394, 0.16472271]), 7)
(array([0.48857658, 0.79834619, 0.81578905, 0.63458126]), 8)
(array([0.8068656 , 0.72511846, 0.04414036, 0.77260463]), 9)


In [6]:
dataset = tf.data.Dataset.from_generator(dataset_generator, output_types=(tf.float32, tf.int32))

Instructions for updating:
tf.py_func is deprecated in TF V2. Instead, use
    tf.py_function, which takes a python function which manipulates tf eager
    tensors instead of numpy arrays. It's easy to convert a tf eager tensor to
    an ndarray (just call tensor.numpy()) but having access to eager tensors
    means `tf.py_function`s can use accelerators such as GPUs as well as
    being differentiable using a gradient tape.
    


In [7]:
for features, label in dataset:
    print('features: %s  |  label: %s' % (features.numpy(), label.numpy()))

Instructions for updating:
Colocations handled automatically by placer.
features: [0.9159084  0.53286314 0.01217008 0.84584475]  |  label: 0
features: [0.9384068  0.23629169 0.07314818 0.79173243]  |  label: 1
features: [0.44883713 0.77921253 0.49431425 0.80673945]  |  label: 2
features: [0.2647792  0.3752539  0.7520048  0.95972496]  |  label: 3
features: [0.22100736 0.03837681 0.91871613 0.5115462 ]  |  label: 4
features: [0.15960833 0.04371153 0.25905412 0.33913657]  |  label: 5
features: [0.269924   0.93548834 0.35879588 0.62367374]  |  label: 6
features: [0.00329182 0.65148    0.894186   0.11565393]  |  label: 7
features: [0.09413405 0.8996212  0.59721625 0.70867187]  |  label: 8
features: [0.00294298 0.32043758 0.2465225  0.13407223]  |  label: 9


### The dataset generator can yield also a dictionary

In [8]:
def dataset_generator():
    """A data-producing logic"""
    for i in range(10):
        yield {'features': np.random.random([4]), 'label': i}

In [9]:
dataset = tf.data.Dataset.from_generator(dataset_generator, output_types=({'features': tf.float32,
                                                                           'label':tf.int32}))
dataset = dataset.repeat(2)

In [10]:
for entry in dataset:
    print('features: %s  |  label: %s' % (entry['features'].numpy(), entry['label'].numpy()))

features: [0.74699664 0.34946454 0.8386742  0.81003034]  |  label: 0
features: [0.04162252 0.5974229  0.35254866 0.42807534]  |  label: 1
features: [0.13503495 0.25979614 0.88276994 0.63166964]  |  label: 2
features: [0.62046987 0.2534979  0.3337435  0.00116533]  |  label: 3
features: [0.92309475 0.12687115 0.7393577  0.7611156 ]  |  label: 4
features: [0.90383595 0.9055808  0.7195641  0.45513982]  |  label: 5
features: [0.5025343  0.3155986  0.90408266 0.9794604 ]  |  label: 6
features: [0.6105794  0.41902468 0.00362045 0.36003393]  |  label: 7
features: [0.60594237 0.33063552 0.4233498  0.13545297]  |  label: 8
features: [0.33002225 0.6350912  0.7357312  0.22735518]  |  label: 9
features: [0.04004329 0.14204617 0.13749245 0.32856888]  |  label: 0
features: [0.09084818 0.30651048 0.31099996 0.12706304]  |  label: 1
features: [0.3482382  0.50807816 0.03591583 0.12454177]  |  label: 2
features: [0.14438093 0.03887073 0.5179454  0.7885791 ]  |  label: 3
features: [0.6891576  0.8785742  0