# Source -> Tranformation -> Iterator

# Dataset Structure

In [1]:
import tensorflow as tf

In [None]:
dataset1 = tf.data.Dataset.from_tensor_slices(tf.random_uniform([4, 10]))

print(dataset1.output_types)
print(dataset1.output_shapes)

In [None]:
dataset2 = tf.data.Dataset.from_tensor_slices(
    (tf.random_uniform([4]),
     tf.random_uniform([4, 100], maxval=100, dtype=tf.float32)))

print(dataset2.output_types)
print(dataset2.output_shapes)

In [None]:
dataset3 = tf.data.Dataset.zip((dataset1, dataset2))
print(dataset3.output_types)
print(dataset3.output_shapes)

In [None]:
dataset = tf.data.Dataset.from_tensor_slices(
    {"a": tf.random_uniform([4]),
     "b": tf.random_uniform([4, 100], maxval=100)})

print(dataset.output_types)
print(dataset.output_shapes)

In [None]:
dataset2 = dataset2.map(lambda x, y: (x+2+y*2))

print(dataset2.output_shapes)

# Create an iterator

In [None]:
# one-shot
dataset = tf.data.Dataset.range(100)
iterator = dataset.make_one_shot_iterator()

next_element = iterator.get_next()

with tf.Session() as sess:
    for i in range(100):
        value = sess.run(next_element)
        assert i == value

In [11]:
# initializable iterator
max_value = tf.placeholder(tf.int64, shape=[])
dataset = tf.data.Dataset.range(max_value)
iterator = dataset.make_initializable_iterator()
next_element = iterator.get_next()

with tf.Session() as sess:
    sess.run(iterator.initializer, feed_dict={max_value: 10})    
    for i in range(10):
        value = sess.run(next_element)
        print(value)
        
    sess.run(iterator.initializer, feed_dict={max_value: 30})    
    for i in range(40):
        print(sess.run(next_element))

In [17]:
# reinitializable iterator
training_dataset = tf.data.Dataset.range(100).map(
    lambda x: x + tf.random_uniform([], -10, 10, dtype=tf.int64))
validation_dataset = tf.data.Dataset.range(50)

iterator = tf.data.Iterator.from_structure(training_dataset.output_types,
                                           training_dataset.output_shapes)
next_element = iterator.get_next()

training_init_op = iterator.make_initializer(training_dataset)
validation_init_op = iterator.make_initializer(validation_dataset)

epochs = 20
with tf.Session() as sess:
    for i in range(epochs):
        sess.run(training_init_op)
        for _ in range(100):
            sess.run(next_element)

        sess.run(validation_init_op)
        for _ in range(50):
            sess.run(next_element)

In [2]:
# feedable iterator
training_dataset = tf.data.Dataset.range(100).map(
    lambda x: x + tf.random_uniform([], -10, 10, dtype=tf.int64)).repeat()
validation_dataset = tf.data.Dataset.range(50)

handle = tf.placeholder(tf.string, shape=[])
iterator = tf.data.Iterator.from_string_handle(
    handle, training_dataset.output_types, training_dataset.output_shapes)
next_element = iterator.get_next()

training_iterator = training_dataset.make_one_shot_iterator()
validation_itertor = validation_dataset.make_initializable_iterator()

with tf.Session() as sess:
    training_handle = sess.run(training_iterator.string_handle())
    validation_handle = sess.run(validation_itertor.string_handle())
      
    for _ in range(50):
        for _ in range(200):
            sess.run(next_element, feed_dict={handle: training_handle})
        
        sess.run(validation_itertor.initializer)
        for _ in range(50):
            sess.run(next_element, feed_dict={handle: validation_handle})

In [6]:
# Consuming values from an iterator
dataset = tf.data.Dataset.range(5)
iterator = dataset.make_initializable_iterator()
next_element = iterator.get_next()

result = tf.add(next_element, next_element) # this two next_elements are the same

with tf.Session() as sess:
    sess.run(iterator.initializer)
    while True:
        try:
            print(sess.run(result))
            
        except tf.errors.OutOfRangeError:
            print("End of dataset")
            break
            
        

0
2
4
6
8
End of dataset


In [7]:
import numpy as np

In [11]:
# Batching dataset
inc_dataset = tf.data.Dataset.range(100)
dec_dataset = tf.data.Dataset.range(0, -100, -1)
dataset = tf.data.Dataset.zip((inc_dataset, dec_dataset))
dataset_batch = dataset.batch(4)

In [12]:
iterator = dataset_batch.make_one_shot_iterator()
next_ = iterator.get_next()

with tf.Session() as sess:
    print(sess.run(next_))
    print(sess.run(next_))
    print(sess.run(next_))

(array([0, 1, 2, 3]), array([ 0, -1, -2, -3]))
(array([4, 5, 6, 7]), array([-4, -5, -6, -7]))
(array([ 8,  9, 10, 11]), array([ -8,  -9, -10, -11]))


In [13]:
# Batching with padding
dataset = tf.data.Dataset.range(10)
dataset = dataset.map(lambda x: tf.fill([tf.cast(x, tf.int32)], x))
dataset = dataset.padded_batch(4, padded_shapes=[None])

iterator = dataset.make_one_shot_iterator()
next_ = iterator.get_next()

with tf.Session() as sess:
    print(sess.run(next_))
    print(sess.run(next_))