In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## 3.3.1 from_tensor_slices

In [1]:
import tensorflow as tf

dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3, 4, 5])

dataset

<TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>

In [2]:
for data in dataset:
  print(data.numpy())

1
2
3
4
5


## 3.3.2 as_numpy_iterator

In [3]:
dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3, 4, 5])

for data in dataset:
  print(data)

tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(2, shape=(), dtype=int32)
tf.Tensor(3, shape=(), dtype=int32)
tf.Tensor(4, shape=(), dtype=int32)
tf.Tensor(5, shape=(), dtype=int32)


In [4]:
for data in dataset.as_numpy_iterator():
  print(data)

1
2
3
4
5


## 3.3.3 range

In [5]:
dataset = tf.data.Dataset.range(10) # range(start=0, stop=10)

list(dataset.as_numpy_iterator())

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [6]:
dataset = tf.data.Dataset.range(0, 10) # range(start, stop)

list(dataset.as_numpy_iterator())

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [7]:
dataset = tf.data.Dataset.range(0, 10, 2) # range(start, stop, step)

list(dataset.as_numpy_iterator())

[0, 2, 4, 6, 8]

In [8]:
dataset = tf.data.Dataset.range(10, 0, -1) # range(start, stop, step)

list(dataset.as_numpy_iterator())

[10, 9, 8, 7, 6, 5, 4, 3, 2, 1]

In [9]:
dataset = tf.data.Dataset.range(10, 0, -1, output_type=tf.float32)

list(dataset.as_numpy_iterator())

[10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0]

## 3.3.4 random

In [None]:
dataset1 = tf.data.Dataset.random(seed=0).take(10)
dataset2 = tf.data.Dataset.random(seed=1).take(10)
dataset3 = tf.data.Dataset.random().take(10)

print(list(dataset1.as_numpy_iterator()), '\n')
print(list(dataset2.as_numpy_iterator()), '\n')
print(list(dataset3.as_numpy_iterator()))

[361556241, 1459367533, 3832325198, 1250307605, 2508600833, 2749596746, 2322339719, 3744817846, 3225503313, 2547244461] 

[2870909127, 3455438759, 4052121393, 1279227022, 15397477, 3296944994, 2768979786, 2402972633, 3873597779, 777939678] 

[3335648395, 2027697215, 1869671144, 2345491860, 3633128765, 1119329926, 2359366176, 3130902701, 2505344664, 2193098651]


## 3.3.5 shuffle

In [None]:
dataset = tf.data.Dataset.range(10)
print(list(dataset.as_numpy_iterator()), '\n')

dataset = dataset.shuffle(buffer_size=10, seed=0)
print(list(dataset.as_numpy_iterator()))
print(list(dataset.as_numpy_iterator()))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] 

[2, 0, 1, 4, 5, 6, 9, 7, 8, 3]
[9, 3, 6, 5, 7, 2, 1, 8, 0, 4]


In [None]:
dataset = tf.data.Dataset.range(10)
print(list(dataset.as_numpy_iterator()), '\n')

dataset = dataset.shuffle(buffer_size=10, seed=0, reshuffle_each_iteration=False)

print(list(dataset.as_numpy_iterator()))
print(list(dataset.as_numpy_iterator()))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] 

[1, 2, 8, 4, 9, 6, 3, 0, 5, 7]
[1, 2, 8, 4, 9, 6, 3, 0, 5, 7]


## 3.3.6 concatenate

In [None]:
a = tf.data.Dataset.range(0, 5)  
b = tf.data.Dataset.range(5, 10)  

print(list(a.as_numpy_iterator()))
print(list(b.as_numpy_iterator()), '\n')

dataset = a.concatenate(b)
print(list(dataset.as_numpy_iterator()))

[0, 1, 2, 3, 4]
[5, 6, 7, 8, 9] 

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


In [None]:
c = tf.data.Dataset.from_tensor_slices(["abc"])
a.concatenate(c)

TypeError: ignored

## 3.3.7 take, take_while

In [None]:
dataset = tf.data.Dataset.range(10)
print(list(dataset.as_numpy_iterator()), '\n')

list(dataset.take(3).as_numpy_iterator())

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] 



[0, 1, 2]

In [None]:
dataset = dataset.take_while(lambda x: x < 5)

list(dataset.as_numpy_iterator())

[0, 1, 2, 3, 4]

## 3.3.8 filter

In [None]:
dataset = tf.data.Dataset.range(10)

dataset = dataset.filter(lambda x: x < 5)
list(dataset.as_numpy_iterator())

[0, 1, 2, 3, 4]

In [None]:
dataset = tf.data.Dataset.range(10)

def filter_fn(x):
  return tf.math.equal(x, 5)
  
list(dataset.filter(filter_fn).as_numpy_iterator())

[5]

## 3.3.9 apply

In [None]:
dataset = tf.data.Dataset.range(10)
def transformation_fn(ds):
  return ds.filter(lambda x: x < 5)

list(dataset.apply(transformation_fn).as_numpy_iterator())

[0, 1, 2, 3, 4]

## 3.3.10 batch

In [None]:
dataset = tf.data.Dataset.range(10)

list(dataset.batch(3).as_numpy_iterator())

[array([0, 1, 2]), array([3, 4, 5]), array([6, 7, 8]), array([9])]

In [None]:
dataset = tf.data.Dataset.range(10)

list(dataset.batch(3, drop_remainder=True).as_numpy_iterator())

[array([0, 1, 2]), array([3, 4, 5]), array([6, 7, 8])]

## 3.3.11 map

In [None]:
dataset = tf.data.Dataset.range(5, 10)
print(list(dataset.as_numpy_iterator()), '\n')

print(list(dataset.map(lambda x: x * 2).as_numpy_iterator()))

[5, 6, 7, 8, 9] 

[10, 12, 14, 16, 18]


## 3.3.12 flat_map

In [None]:
dataset = tf.data.Dataset.range(10).batch(3)
print(list(dataset.as_numpy_iterator()), '\n')

list(dataset.flat_map(lambda x: tf.data.Dataset.from_tensor_slices(x))
    .as_numpy_iterator())

[array([0, 1, 2]), array([3, 4, 5]), array([6, 7, 8]), array([9])] 



[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

## 3.3.13 Windows

In [None]:
dataset = tf.data.Dataset.range(10).window(size=5)

for window in dataset:
  print(list(window.as_numpy_iterator()))

[0, 1, 2, 3, 4]
[5, 6, 7, 8, 9]


In [None]:
# window(size=4)
dataset = tf.data.Dataset.range(10).window(size=4)

for window in dataset:
  print(list(window.as_numpy_iterator()))

[0, 1, 2, 3]
[4, 5, 6, 7]
[8, 9]


In [None]:
# drop_remainder=True)
dataset = tf.data.Dataset.range(10).window(size=4, drop_remainder=True)

for window in dataset:
  print(list(window.as_numpy_iterator()))

[0, 1, 2, 3]
[4, 5, 6, 7]


In [None]:
# shift=1
dataset = tf.data.Dataset.range(10).window(size=4, drop_remainder=True, shift=1)

for window in dataset:
  print(list(window.as_numpy_iterator()))

[0, 1, 2, 3]
[1, 2, 3, 4]
[2, 3, 4, 5]
[3, 4, 5, 6]
[4, 5, 6, 7]
[5, 6, 7, 8]
[6, 7, 8, 9]


In [None]:
# shift=2
dataset = tf.data.Dataset.range(10).window(size=4, drop_remainder=True, shift=2)

for window in dataset:
  print(list(window.as_numpy_iterator()))

[0, 1, 2, 3]
[2, 3, 4, 5]
[4, 5, 6, 7]
[6, 7, 8, 9]


In [None]:
# stride=1
dataset = tf.data.Dataset.range(10).window(size=4, drop_remainder=True, shift=2, stride=1)

for window in dataset:
  print(list(window.as_numpy_iterator()))

[0, 1, 2, 3]
[2, 3, 4, 5]
[4, 5, 6, 7]
[6, 7, 8, 9]


In [None]:
# stride=2
dataset = tf.data.Dataset.range(10).window(size=4, drop_remainder=True, shift=2, stride=2)

for window in dataset:
  print(list(window.as_numpy_iterator()))

[0, 2, 4, 6]
[2, 4, 6, 8]
