In [2]:
from __future__ import absolute_import, division, print_function, unicode_literals

In [3]:
import pathlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import tensorflow as tf
np.set_printoptions(precision=4)

In [4]:
dataset = tf.data.Dataset.from_tensor_slices([8, 3, 0, 8, 2, 1])
dataset

<TensorSliceDataset shapes: (), types: tf.int32>

In [6]:
for elem in dataset:
    print(elem.numpy())

8
3
0
8
2
1


In [7]:
it = iter(dataset)
print(next(it).numpy())

8


In [8]:
print(dataset.reduce(0, lambda state, value: state + value).numpy())

22


In [9]:
# Dataset Structure
dataset1 = tf.data.Dataset.from_tensor_slices(tf.random.uniform([4, 10]))
dataset1.element_spec

TensorSpec(shape=(10,), dtype=tf.float32, name=None)

In [15]:
dataset2 = tf.data.Dataset.from_tensor_slices(
    (tf.random.uniform([4]),
    tf.random.uniform([4, 100], maxval=100, dtype=tf.int32))
)
dataset2.element_spec

(TensorSpec(shape=(), dtype=tf.float32, name=None),
 TensorSpec(shape=(100,), dtype=tf.int32, name=None))

In [16]:
dataset3 = tf.data.Dataset.zip((dataset1, dataset2))
dataset3.element_spec

(TensorSpec(shape=(10,), dtype=tf.float32, name=None),
 (TensorSpec(shape=(), dtype=tf.float32, name=None),
  TensorSpec(shape=(100,), dtype=tf.int32, name=None)))

In [17]:
dataset4 = tf.data.Dataset.from_tensors(
    tf.SparseTensor(indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])
)
dataset4.element_spec

SparseTensorSpec(TensorShape([3, 4]), tf.int32)

In [18]:
dataset4.element_spec.value_type

tensorflow.python.framework.sparse_tensor.SparseTensor

In [19]:
dataset1 = tf.data.Dataset.from_tensor_slices(
    tf.random.uniform([4, 10], minval=1, maxval=10, dtype=tf.int32)
)
dataset1

<TensorSliceDataset shapes: (10,), types: tf.int32>

In [20]:
for z in dataset1:
    print(z.numpy())

[5 8 1 7 2 9 7 7 3 4]
[1 2 2 9 2 6 7 7 5 5]
[5 1 5 5 2 4 5 6 9 6]
[2 7 1 6 7 5 1 3 4 3]


In [21]:
dataset2 = tf.data.Dataset.from_tensor_slices(
    (tf.random.uniform([4]), tf.random.uniform([4, 100], maxval=100, dtype=tf.int32))
)

In [22]:
dataset2

<TensorSliceDataset shapes: ((), (100,)), types: (tf.float32, tf.int32)>

In [23]:
dataset3 = tf.data.Dataset.zip((dataset1, dataset2))
dataset3

<ZipDataset shapes: ((10,), ((), (100,))), types: (tf.int32, (tf.float32, tf.int32))>

In [24]:
for a, (b, c) in dataset3:
    print(f'Shapes: {a.shape}, {b.shape}, {c.shape}')

Shapes: (10,), (), (100,)
Shapes: (10,), (), (100,)
Shapes: (10,), (), (100,)
Shapes: (10,), (), (100,)


In [25]:
# Reading Input Data
train, test = tf.keras.datasets.fashion_mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [26]:
images, labels = train
images = images/255
dataset = tf.data.Dataset.from_tensor_slices((images, labels))
dataset

<TensorSliceDataset shapes: ((28, 28), ()), types: (tf.float64, tf.uint8)>

In [27]:
# Consuming Python Generators
def count(stop):
    i = 0
    while i < stop:
        yield i
        i += 1

In [28]:
for n in count(5):
    print(n)

0
1
2
3
4


In [29]:
ds_counter = tf.data.Dataset.from_generator(
    count, args=[25], output_types=tf.int32, output_shapes=(),
)

In [30]:
for count_batch in ds_counter.repeat().batch(10).take(10):
    print(count_batch.numpy())

[0 1 2 3 4 5 6 7 8 9]
[10 11 12 13 14 15 16 17 18 19]
[20 21 22 23 24  0  1  2  3  4]
[ 5  6  7  8  9 10 11 12 13 14]
[15 16 17 18 19 20 21 22 23 24]
[0 1 2 3 4 5 6 7 8 9]
[10 11 12 13 14 15 16 17 18 19]
[20 21 22 23 24  0  1  2  3  4]
[ 5  6  7  8  9 10 11 12 13 14]
[15 16 17 18 19 20 21 22 23 24]


In [31]:
def gen_series():
    i = 0
    while True:
        size = np.random.randint(0, 10)
        yield i, np.random.normal(size=(size,))
        i += 1

In [32]:
for i, series in gen_series():
    print(i, ":", str(series))
    if(i > 5):
        break

0 : [-1.4569 -0.0409  0.6639  0.9623  0.1095 -0.1368  1.4254 -0.0551 -1.2091]
1 : [ 1.6655 -0.5723 -1.6929  1.7009 -0.0332  0.0287  0.4377]
2 : [ 1.7643 -0.6325  0.935   0.6433 -0.3792  0.3642 -0.648  -0.8988]
3 : [ 0.296   0.8404 -0.2564  1.3818  1.2996  1.5471 -0.0028  1.5006]
4 : [ 0.1767 -2.2221 -1.6871  1.5355  0.9395]
5 : [ 0.7851  0.5636 -0.1917 -0.8702]
6 : []


In [36]:
ds_series = tf.data.Dataset.from_generator(
    gen_series,
    output_types=(tf.int32, tf.float32),
    output_shapes=((), (None,))
)
ds_series

<FlatMapDataset shapes: ((), (None,)), types: (tf.int32, tf.float32)>

In [37]:
ds_series_batch = ds_series.shuffle(20).padded_batch(10)

ids, sequence_batch = next(iter(ds_series_batch))
print(ids.numpy())
print()
print(sequence_batch.numpy())

TypeError: padded_batch() missing 1 required positional argument: 'padded_shapes'