In [1]:
import tensorflow as tf
import tensorflow.keras.backend as K
import numpy as np

In [16]:
# 1. Embed input in the graph as constant (bad)

(x, y), _ = tf.keras.datasets.cifar10.load_data()
ds = tf.data.Dataset.from_tensor_slices((x, y)).shuffle(10).repeat().batch(32)
it = ds.make_one_shot_iterator()
with tf.Session() as sess:
    sess.run(it.get_next())

In [18]:
# 2. Feed input into the graph with a placeholder (ok)
# NB: from the docs:
# "While feeding data using a feed_dict offers a high level of flexibility,
#  in general, feed_dict does not provide a scalable solution. Avoid using
#  feed_dict for all but trivial examples."

(x, y), _ = tf.keras.datasets.cifar10.load_data()
xs_ph = tf.placeholder(np.float32, x.shape)
ys_ph = tf.placeholder(np.float32, y.shape)
ds = tf.data.Dataset.from_tensor_slices((xs_ph, ys_ph)).shuffle(10).repeat().batch(32)
it = ds.make_initializable_iterator()
with tf.Session() as sess:
    sess.run(it.initializer, feed_dict={xs_ph : x, ys_ph: y})
    sess.run(it.get_next())

In [40]:
# 3. Stream from TFRecord serialized data on disk (good)
# https://www.tensorflow.org/guide/datasets
# https://www.tensorflow.org/guide/performance/datasets

def parse_fn(example):
    "Parse TFExample records and perform simple data augmentation."
    example_fmt = {
        "image": tf.FixedLenFeature((), tf.string, ""),
        "label": tf.FixedLenFeature((), tf.int64, -1)
    }
    parsed = tf.parse_single_example(example, example_fmt)
    image = tf.decode_raw(parsed["image"], tf.uint8)
    image.set_shape([3 * 32 * 32])
    # Reshape from [depth * height * width] to [depth, height, width].
    image = tf.cast(
        tf.transpose(tf.reshape(image, [3, 32, 32]), [1, 2, 0]),
        tf.float32)
    label = tf.cast(parsed['label'], tf.int32)
    return image, label

filenames = ["./data/train.tfrecords", "./data/test.tfrecords"]
ds = tf.data.TFRecordDataset(filenames).shuffle(10).repeat().map(map_func=parse_fn).batch(32)
it = ds.make_one_shot_iterator()
with tf.Session() as sess:
    sess.run(it.get_next())

In [2]:
# DATASETS

# from numpy
tf.data.Dataset.from_tensor_slices(np.random.sample([10, 2]))
tf.data.Dataset.from_tensor_slices((np.random.sample([10,2]),
                                    np.random.sample([10, 1])))

# from tensors
tf.data.Dataset.from_tensor_slices(tf.random_uniform([10, 2]))

# from placeholder
tf.data.Dataset.from_tensor_slices(tf.placeholder(tf.float32, shape=[None, 2]))

# from generator
seq = [[1], [[2],[3]], [[4],[5],[6]]]
def gen():
    for i in seq:
        yield i
_ = tf.data.Dataset.from_generator(gen,
                                   output_types=tf.int64,
                                   output_shapes=(tf.TensorShape([None, 1])))

In [3]:
# ITERATORS


# one shot
ds = tf.data.Dataset.from_tensor_slices((np.random.sample([5, 2]),
                                        np.random.sample([5, 1])))
it = ds.make_one_shot_iterator()
el = it.get_next()
with tf.Session() as sess:
    val_1 = sess.run(el)
    #print(val_1)
    val_2 = sess.run(el)
    #print(val_2)
    
# initializable
x = tf.placeholder(tf.float32, shape=[None, 2])
y = tf.placeholder(tf.float32, shape=[None, 1])
ds = tf.data.Dataset.from_tensor_slices((x, y))
it = ds.make_initializable_iterator()

train = np.random.sample([10, 2]), np.random.sample([10, 1])
test = np.random.sample([5, 2]), np.random.sample([5, 1])

xs, ys = it.get_next()

with tf.Session() as sess:
    sess.run(it.initializer, feed_dict={x : train[0], y : train[1]})
    for _ in range(3):
        sess.run((xs, ys))
    sess.run(it.initializer, feed_dict={x : test[0], y:test[1]})
    for _ in range(3):
        sess.run((xs, ys))
    
# reinitializable
train = np.random.sample([10, 2]), np.random.sample([10, 1])
test = np.random.sample([5, 2]), np.random.sample([5, 1])
train_ds = tf.data.Dataset.from_tensor_slices(train)
test_ds = tf.data.Dataset.from_tensor_slices(test)

it = tf.data.Iterator.from_structure(train_ds.output_types, train_ds.output_shapes)
train_init_op = it.make_initializer(train_ds)
test_init_op = it.make_initializer(test_ds)

xs, ys = it.get_next()

with tf.Session() as sess:
    sess.run(train_init_op)
    for _ in range(3):
        sess.run((xs, ys))
    sess.run(test_init_op)
    for _ in range(3):
        sess.run((xs, ys))
        
# feedable

x, y = tf.placeholder(tf.float32, shape=[None, 2]), tf.placeholder(tf.float32, shape=[None, 1])
train_ds = tf.data.Dataset.from_tensor_slices((x, y))
test_ds = tf.data.Dataset.from_tensor_slices((x, y))

train_it = train_ds.make_initializable_iterator()
test_it = test_ds.make_initializable_iterator()

handle = tf.placeholder(tf.string, shape=[])
it = tf.data.Iterator.from_string_handle(handle, train_ds.output_types, train_ds.output_shapes)

xs, ys = it.get_next()

train = np.random.sample([10, 2]), np.random.sample([10, 1])
test = np.random.sample([5, 2]), np.random.sample([5, 1])

with tf.Session() as sess:
    train_handle = sess.run(train_it.string_handle())
    test_handle = sess.run(test_it.string_handle())
    
    sess.run(train_it.initializer, feed_dict={x : train[0], y : train[1]})
    sess.run(test_it.initializer, feed_dict={x : test[0], y : test[1]})
    
    for _ in range(3):
        x, y = sess.run((xs, ys), feed_dict={handle:train_handle})
        #print(x,y)
    
    for _ in range(3):
        x, y = sess.run((xs, ys), feed_dict={handle:test_handle})
        #print(x,y)
        

In [58]:
# DATASET -> ITERATOR -> TF MODEL
# LINEAR REGRESSION

x = tf.placeholder(tf.float32, shape=[None, 2])
y = tf.placeholder(tf.float32, shape=[None, 1])
n = 1000
bs = 32
ds = tf.data.Dataset.from_tensor_slices((x, y)).shuffle(100).repeat().batch(bs)
it = ds.make_initializable_iterator()
xs, ys = it.get_next()
out = tf.layers.dense(xs, 1)
loss = tf.losses.mean_squared_error(ys, out)
train = tf.train.GradientDescentOptimizer(0.1).minimize(loss)

train_x = np.random.sample([n, 2]).astype(np.float32)
train_y = (train_x @ np.array([[1], [1]])).astype(np.float32)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    sess.run(it.initializer, feed_dict={x : train_x, y : train_y})
    for e in range(10):
        total_loss = 0
        n_batches = int(n / train_bs)
        for b in range(n_batches):
            _, loss_val = sess.run([train, loss])
            total_loss += loss_val
        print(f'Iter: {e + 1}  | loss: {total_loss/n_batches}')

Iter: 1  | loss: 0.17061021323165587
Iter: 2  | loss: 0.042011099597138744
Iter: 3  | loss: 0.018752338605061654
Iter: 4  | loss: 0.00871591495289918
Iter: 5  | loss: 0.004065411135313972
Iter: 6  | loss: 0.0019269170044290444
Iter: 7  | loss: 0.0009110278239653956
Iter: 8  | loss: 0.00043823936345776724
Iter: 9  | loss: 0.00020794451237663686
Iter: 10  | loss: 9.888636169865006e-05


In [99]:
# DATASET -> KERAS
# LINEAR REGRESSION

x = np.random.sample([1000, 2]).astype(np.float32)
y = (x @ np.array([[1], [1]])).astype(np.float32)
ds = tf.data.Dataset.from_tensor_slices((x, y)).batch(32).repeat()
it = ds.make_one_shot_iterator()
xs, ys = it.get_next()
model = tf.keras.Sequential([tf.keras.layers.InputLayer(input_shape=(2,)),
                             tf.keras.layers.Dense(1)])
opt = tf.keras.optimizers.SGD(lr=0.5)
model.compile(opt,'mse')
model.fit(xs, ys, steps_per_epoch=10, epochs=10)
model.get_weights()

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[array([[0.99937516],
        [0.9993751 ]], dtype=float32), array([0.00066875], dtype=float32)]

In [60]:
# NUMPY -> KERAS
# LINEAR REGRESSION

x = np.random.sample([1000, 2]).astype(np.float32)
y = (x @ np.array([[1], [1]])).astype(np.float32)
model = tf.keras.Sequential([tf.keras.layers.Dense(1)])
opt = tf.keras.optimizers.SGD(lr=0.5)
model.compile(opt,'mse')
model.fit(x=x, y=y, epochs=10)
model.get_weights()

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[array([[0.9999999],
        [0.9999999]], dtype=float32), array([1.0378426e-07], dtype=float32)]

In [101]:
# NOTE: DATASET -> KERAS MODEL usage, as specified in the docs, is broken: https://github.com/tensorflow/tensorflow/issues/22207

model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(10, activation='softmax')])
model.compile(optimizer=tf.train.AdamOptimizer(0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

data = np.random.random((1000, 32)).astype(np.float32)
labels = np.random.random((1000, 10)).astype(np.float32)
dataset = tf.data.Dataset.from_tensor_slices((data, labels)).batch(32).repeat()
model.fit(dataset, steps_per_epoch=1, epochs=5, verbose=2) # crashes