In [3]:
import tensorflow as tf

In [14]:
# generating example dataset as range from 0 to 10 with shift
dataset = tf.data.Dataset.range(10)
# split dataset values into windows with specified size and drop other values
# as well as create new windows by shifting dataset by 1 until reach the end 
dataset = dataset.window(5, shift=1, drop_remainder=True)
for window in dataset:
    for val in window:
        print(val.numpy(), end=" ")
    print()

0 1 2 3 4 
1 2 3 4 5 
2 3 4 5 6 
3 4 5 6 7 
4 5 6 7 8 
5 6 7 8 9 


## .map() vs .flat_map()

In [29]:
# flat_map vs map difference - it flattens the result
dataset_flat = dataset.flat_map(lambda window: window.batch(5))
for i in dataset_flat:
    print(i)

tf.Tensor([0 1 2 3 4], shape=(5,), dtype=int64)
tf.Tensor([1 2 3 4 5], shape=(5,), dtype=int64)
tf.Tensor([2 3 4 5 6], shape=(5,), dtype=int64)
tf.Tensor([3 4 5 6 7], shape=(5,), dtype=int64)
tf.Tensor([4 5 6 7 8], shape=(5,), dtype=int64)
tf.Tensor([5 6 7 8 9], shape=(5,), dtype=int64)


In [21]:
# flat_map vs map difference - it flattens the result
dataset_nonflat = dataset.map(lambda window: window.batch(5))
for i in dataset_nonflat:
    print(i)
    for val in i:
        print(val)
    print()

<_VariantDataset shapes: (None,), types: tf.int64>
tf.Tensor([0 1 2 3 4], shape=(5,), dtype=int64)

<_VariantDataset shapes: (None,), types: tf.int64>
tf.Tensor([1 2 3 4 5], shape=(5,), dtype=int64)

<_VariantDataset shapes: (None,), types: tf.int64>
tf.Tensor([2 3 4 5 6], shape=(5,), dtype=int64)

<_VariantDataset shapes: (None,), types: tf.int64>
tf.Tensor([3 4 5 6 7], shape=(5,), dtype=int64)

<_VariantDataset shapes: (None,), types: tf.int64>
tf.Tensor([4 5 6 7 8], shape=(5,), dtype=int64)

<_VariantDataset shapes: (None,), types: tf.int64>
tf.Tensor([5 6 7 8 9], shape=(5,), dtype=int64)



## create x and y

In [35]:
# create x and y
dataset_flat = dataset.flat_map(lambda window: window.batch(5))
dataset_flat = dataset_flat.map(lambda window: (window[:-1], window[-1:]))
for x,y in dataset_flat:
    print(x.numpy(),y.numpy())

[0 1 2 3] [4]
[1 2 3 4] [5]
[2 3 4 5] [6]
[3 4 5 6] [7]
[4 5 6 7] [8]
[5 6 7 8] [9]


## .shuffle()

In [37]:

for x,y in dataset_flat.shuffle(buffer_size=10):
    print(x.numpy(),y.numpy())

[2 3 4 5] [6]
[1 2 3 4] [5]
[4 5 6 7] [8]
[0 1 2 3] [4]
[3 4 5 6] [7]
[5 6 7 8] [9]


## .batch()

In [56]:
for x,y in dataset_flat.shuffle(10).batch(2):
    print("x = ", x.numpy())
    print("y = ", y.numpy())
    print()

x =  [[2 3 4 5]
 [5 6 7 8]]
y =  [[6]
 [9]]

x =  [[0 1 2 3]
 [3 4 5 6]]
y =  [[4]
 [7]]

x =  [[1 2 3 4]
 [4 5 6 7]]
y =  [[5]
 [8]]

