In [1]:
import tensorflow as tf

In [2]:
import pathlib
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

np.set_printoptions(precision=4)

In [3]:
dataset = tf.data.Dataset.from_tensor_slices([8, 3, 0, 8, 2, 1])
dataset

<TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>

In [4]:
for elem in dataset:
    print(elem.numpy())

8
3
0
8
2
1


In [5]:
it = iter(dataset)

print(next(it).numpy())

8


In [6]:
print(dataset.reduce(0, lambda state, value: state + value).numpy())

22


In [7]:
dataset1 = tf.data.Dataset.from_tensor_slices(tf.random.uniform([4, 10]))

dataset1.element_spec

TensorSpec(shape=(10,), dtype=tf.float32, name=None)

In [8]:
dataset2 = tf.data.Dataset.from_tensor_slices(
    (tf.random.uniform([4]),
     tf.random.uniform([4, 100], maxval=100, dtype=tf.int32)))

dataset2.element_spec

(TensorSpec(shape=(), dtype=tf.float32, name=None),
 TensorSpec(shape=(100,), dtype=tf.int32, name=None))

In [9]:
dataset3 = tf.data.Dataset.zip((dataset1, dataset2))

dataset3.element_spec

(TensorSpec(shape=(10,), dtype=tf.float32, name=None),
 (TensorSpec(shape=(), dtype=tf.float32, name=None),
  TensorSpec(shape=(100,), dtype=tf.int32, name=None)))

In [10]:
tf.random.uniform(shape=[2])

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([0.5281, 0.3847], dtype=float32)>

In [11]:
tf.random.uniform(shape=[], minval=-1., maxval=0.)

<tf.Tensor: shape=(), dtype=float32, numpy=-0.6370691>

In [12]:
dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3])
for element in dataset:
    print(element)

tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(2, shape=(), dtype=int32)
tf.Tensor(3, shape=(), dtype=int32)


In [13]:
dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3])
dataset = dataset.map(lambda x: x * 2)
list(dataset.as_numpy_iterator())

[2, 4, 6]

In [14]:
dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3])
dataset.element_spec

TensorSpec(shape=(), dtype=tf.int32, name=None)

In [15]:
dataset1 = tf.data.Dataset.from_tensor_slices(tf.random.uniform([4, 10]))

dataset1.element_spec

TensorSpec(shape=(10,), dtype=tf.float32, name=None)

In [16]:
for ele in dataset1:
    print(ele)

tf.Tensor([0.9766 0.7579 0.7854 0.6316 0.9005 0.3292 0.3619 0.783  0.3718 0.4392], shape=(10,), dtype=float32)
tf.Tensor([0.4514 0.0595 0.9553 0.9261 0.6981 0.9049 0.189  0.9202 0.7505 0.8985], shape=(10,), dtype=float32)
tf.Tensor([0.122  0.0525 0.6763 0.3958 0.5565 0.2342 0.4786 0.1929 0.7341 0.4178], shape=(10,), dtype=float32)
tf.Tensor([0.3539 0.9571 0.7417 0.2575 0.9509 0.4461 0.8497 0.0139 0.5344 0.2181], shape=(10,), dtype=float32)


In [17]:
dataset2 = tf.data.Dataset.from_tensor_slices(
    (tf.random.uniform([4]),
     tf.random.uniform([4, 100], maxval=100, dtype=tf.int32)))

dataset2.element_spec

(TensorSpec(shape=(), dtype=tf.float32, name=None),
 TensorSpec(shape=(100,), dtype=tf.int32, name=None))

In [18]:
for ele in dataset2:
    print(ele)

(<tf.Tensor: shape=(), dtype=float32, numpy=0.1791029>, <tf.Tensor: shape=(100,), dtype=int32, numpy=
array([81, 71, 73, 46, 37, 82, 88, 48, 71, 80, 26, 49, 89, 79,  4, 28, 72,
       20, 53, 55, 31, 63, 89, 67, 72, 29, 76, 75, 27, 47, 87, 27, 73, 77,
       40, 86, 96, 69, 52, 26, 50, 48, 93,  4, 54, 23, 34, 56, 91, 49, 17,
       91, 51, 91, 80, 52, 92, 81, 23,  2, 98, 54,  4, 13, 51, 95, 77,  7,
       70, 25, 27, 27, 81, 35, 90, 86, 76, 96, 74, 72,  1, 82, 45, 47, 31,
        3, 17, 50, 31, 63, 91, 25, 63,  7, 51, 90, 53, 78, 60, 49])>)
(<tf.Tensor: shape=(), dtype=float32, numpy=0.633023>, <tf.Tensor: shape=(100,), dtype=int32, numpy=
array([65, 56, 14,  1, 97, 57, 39, 75, 57, 53, 31, 76, 54, 64, 86, 10, 84,
       85, 43, 38, 71, 63, 87,  2, 23, 28, 33, 64,  0, 26, 62, 11, 54, 69,
       80, 34, 27, 97, 37, 61, 22,  5, 40, 12, 39, 47, 36,  1, 69, 74, 20,
       12, 53,  1, 64,  3, 57,  2, 30, 50, 72, 44, 84, 28, 64, 78,  0, 18,
        0, 25, 49, 42, 44,  2, 61, 89, 31, 65, 89, 9

In [19]:
dataset1 = tf.data.Dataset.from_tensor_slices(
    tf.random.uniform([4, 10], minval=1, maxval=10, dtype=tf.int32))

dataset1

<TensorSliceDataset element_spec=TensorSpec(shape=(10,), dtype=tf.int32, name=None)>

In [20]:
for z in dataset1:
    print(z.numpy())

[2 1 8 7 2 5 3 8 3 5]
[2 3 7 8 3 4 8 5 7 9]
[9 5 1 7 6 3 3 5 5 5]
[3 1 3 6 9 5 8 1 3 8]


In [21]:
dataset2 = tf.data.Dataset.from_tensor_slices(
    (tf.random.uniform([4]),
     tf.random.uniform([4, 100], maxval=100, dtype=tf.int32)))

dataset2

<TensorSliceDataset element_spec=(TensorSpec(shape=(), dtype=tf.float32, name=None), TensorSpec(shape=(100,), dtype=tf.int32, name=None))>

In [22]:
dataset3 = tf.data.Dataset.zip((dataset1, dataset2))

dataset3

<ZipDataset element_spec=(TensorSpec(shape=(10,), dtype=tf.int32, name=None), (TensorSpec(shape=(), dtype=tf.float32, name=None), TensorSpec(shape=(100,), dtype=tf.int32, name=None)))>

In [23]:
for a, (b, c) in dataset3:
    print('shapes: {a.shape}, {b.shape}, {c.shape}'.format(a=a, b=b, c=c))

shapes: (10,), (), (100,)
shapes: (10,), (), (100,)
shapes: (10,), (), (100,)
shapes: (10,), (), (100,)


In [3]:
train, test = tf.keras.datasets.fashion_mnist.load_data()

In [4]:
images, labels = train
images = images / 255

dataset = tf.data.Dataset.from_tensor_slices((images, labels))
dataset

<TensorSliceDataset element_spec=(TensorSpec(shape=(28, 28), dtype=tf.float64, name=None), TensorSpec(shape=(), dtype=tf.uint8, name=None))>

In [5]:
def count(stop):
    i = 0
    while i < stop:
        yield i
        i += 1


In [6]:
for n in count(5):
    print(n)


0
1
2
3
4


In [7]:
ds_counter = tf.data.Dataset.from_generator(
    count,
    args=[25],
    output_types=tf.int32,
    output_shapes=(),
)


In [8]:
for count_batch in ds_counter.repeat().batch(10).take(10):
    print(count_batch.numpy())


[0 1 2 3 4 5 6 7 8 9]
[10 11 12 13 14 15 16 17 18 19]
[20 21 22 23 24  0  1  2  3  4]
[ 5  6  7  8  9 10 11 12 13 14]
[15 16 17 18 19 20 21 22 23 24]
[0 1 2 3 4 5 6 7 8 9]
[10 11 12 13 14 15 16 17 18 19]
[20 21 22 23 24  0  1  2  3  4]
[ 5  6  7  8  9 10 11 12 13 14]
[15 16 17 18 19 20 21 22 23 24]


In [9]:
dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3])
dataset = dataset.repeat(3)
list(dataset.as_numpy_iterator())


[1, 2, 3, 1, 2, 3, 1, 2, 3]

In [12]:
dataset = tf.data.Dataset.range(8)
dataset = dataset.batch(3)
for item in dataset:
    print(len(item))


3
3
2


In [13]:
def gen_series():
    i = 0
    while True:
        size = np.random.randint(0, 10)
        yield i, np.random.normal(size=(size,))
        i += 1


In [16]:
for i, series in gen_series():
    print(i, ":", str(series))
    if i > 5:
        break


0 : [ 0.8223 -0.6759]
1 : [ 2.3253 -1.2528 -0.5926 -1.5631 -1.8309]
2 : []
3 : [0.175  1.1214]
4 : [-0.4031 -0.7058  1.2841  0.6471 -1.3033 -0.2215  0.5515  0.3227  0.6459]
5 : [ 0.2952  2.4168  0.7396 -2.0503]
6 : [-0.8275 -0.2598  0.2118  0.4374  0.328   1.1694 -0.1542  1.3179  0.4902]


In [17]:
ds_series = tf.data.Dataset.from_generator(
    gen_series, output_types=(tf.int32, tf.float32), output_shapes=((), (None,))
)

ds_series


<FlatMapDataset element_spec=(TensorSpec(shape=(), dtype=tf.int32, name=None), TensorSpec(shape=(None,), dtype=tf.float32, name=None))>

In [18]:
ds_series_batch = ds_series.shuffle(20).padded_batch(10)

ids, sequence_batch = next(iter(ds_series_batch))
print(ids.numpy())
print()
print(sequence_batch.numpy())


[15 10  7 22 20  0 13 25  8 28]

[[ 1.0022  1.0062  0.      0.      0.      0.      0.      0.      0.    ]
 [ 0.      0.      0.      0.      0.      0.      0.      0.      0.    ]
 [ 0.5929 -0.328  -0.4827 -0.5278 -1.3783  0.      0.      0.      0.    ]
 [-0.4807  0.2437  0.      0.      0.      0.      0.      0.      0.    ]
 [ 1.5879  0.5689  0.5637  0.      0.      0.      0.      0.      0.    ]
 [-0.0503 -1.8119 -0.0366  0.0242  1.4388  0.      0.      0.      0.    ]
 [ 0.0029  0.0527  0.6838 -0.74   -2.0448 -0.2151  0.687   0.      0.    ]
 [ 0.705   2.3204  0.561  -1.2484 -0.5143  0.9329  1.8451 -0.5465 -0.8862]
 [-0.3402  0.7142  0.9689  0.0651  0.1284 -0.7291 -0.2206  0.0974 -0.1446]
 [ 0.1133  0.      0.      0.      0.      0.      0.      0.      0.    ]]


In [19]:
A = tf.data.Dataset.range(1, 5, output_type=tf.int32)
                   .map(lambda x: tf.fill([x], x))
# Pad to the smallest per-batch size that fits all elements.
B = A.padded_batch(2)
for element in B.as_numpy_iterator():
    print(element)


[[1 0]
 [2 2]]
[[3 3 3 0]
 [4 4 4 4]]
