In [45]:
import tensorflow as tf
import numpy as np

In [4]:
import sys

In [6]:
sys.version

'3.7.6 (default, Jan  8 2020, 20:23:39) [MSC v.1916 64 bit (AMD64)]'

In [8]:
tf.__version__

'2.2.0-dev20200429'

In [2]:
range_ds = tf.data.Dataset.range(100000)

In [3]:
# 将数据生成batch_size=10的批数据。其中，drop_remainder 表示
# 在batch_size不足批大小的情况下是否删除该批次数据；默认不删除较小的批次。
batches = range_ds.batch(10, drop_remainder=True)

# 从批次数据中，取出五个批次并打印
for batch in batches.take(5):
    print(batch.numpy())

[0 1 2 3 4 5 6 7 8 9]
[10 11 12 13 14 15 16 17 18 19]
[20 21 22 23 24 25 26 27 28 29]
[30 31 32 33 34 35 36 37 38 39]
[40 41 42 43 44 45 46 47 48 49]


In [15]:
def dense_1_step(batch):
    # 将单变量时间序列数据与预测标签数据匹配
    return batch[:-2], batch[8:]

# map方法将所有批次数据实现数据与标签的匹配
predict_dense_1_step = batches.map(dense_1_step) 

# 打印三个匹配好的样本
for features, label in predict_dense_1_step.take(3):
    print(features.numpy(), " => ", label.numpy())

[0 1 2 3 4 5 6 7]  =>  [8 9]
[10 11 12 13 14 15 16 17]  =>  [18 19]
[20 21 22 23 24 25 26 27]  =>  [28 29]


In [16]:
batches = range_ds.batch(15, drop_remainder=True)

def label_next_5_steps(batch):
    return (batch[:-5],   # 一个批次内前十个采样点作为输入
            batch[-5:])   # 一个批次内后五个采样点作为标签

predict_5_steps = batches.map(label_next_5_steps)

for features, label in predict_5_steps.take(3):
    print(features.numpy(), " => ", label.numpy())

[0 1 2 3 4 5 6 7 8 9]  =>  [10 11 12 13 14]
[15 16 17 18 19 20 21 22 23 24]  =>  [25 26 27 28 29]
[30 31 32 33 34 35 36 37 38 39]  =>  [40 41 42 43 44]


In [20]:
feature_length = 10 # 窗口宽度
label_length = 5 # 预测输出的长度

features = range_ds.batch(feature_length, drop_remainder=True)
# skip() 方法表示取一个批次之后的数据
# labels[:-5] 表示截取该批次的前五个采样数据
labels = range_ds.batch(feature_length).skip(2).map(lambda labels: labels[:-5])

# zip 方法实现将样本数据与样本标签匹配
predict_5_steps = tf.data.Dataset.zip((features, labels))

for features, label in predict_5_steps.take(3):
    print(features.numpy(), " => ", label.numpy())

[0 1 2 3 4 5 6 7 8 9]  =>  [20 21 22 23 24]
[10 11 12 13 14 15 16 17 18 19]  =>  [30 31 32 33 34]
[20 21 22 23 24 25 26 27 28 29]  =>  [40 41 42 43 44]


In [25]:
dataset = tf.data.Dataset.range(7).window(3, None, 1, True) 
for window in dataset: 
    print(list(window.as_numpy_iterator())) 

[0, 1, 2]
[3, 4, 5]


In [26]:
dataset = tf.data.Dataset.range(7).window(3, 1, 1, True) 
for window in dataset: 
    print(list(window.as_numpy_iterator())) 

[0, 1, 2]
[1, 2, 3]
[2, 3, 4]
[3, 4, 5]
[4, 5, 6]


In [27]:
dataset = tf.data.Dataset.range(7).window(3, 2, 1, True) 
for window in dataset: 
    print(list(window.as_numpy_iterator())) 

[0, 1, 2]
[2, 3, 4]
[4, 5, 6]


In [28]:
dataset = tf.data.Dataset.range(7).window(3, 3, 1, True) 
for window in dataset: 
    print(list(window.as_numpy_iterator())) 

[0, 1, 2]
[3, 4, 5]


In [29]:
dataset = tf.data.Dataset.range(7).window(3, None, 1, True) 
for window in dataset: 
    print(list(window.as_numpy_iterator())) 

[0, 1, 2]
[3, 4, 5]


In [30]:
dataset = tf.data.Dataset.range(7).window(3, None, 2, True) 
for window in dataset: 
    print(list(window.as_numpy_iterator())) 

[0, 2, 4]


In [34]:
dataset = tf.data.Dataset.range(7).window(3, None, 3, True) 
for window in dataset: 
    print(list(window.as_numpy_iterator())) 

[0, 3, 6]


In [31]:
dataset = tf.data.Dataset.range(7).window(3, 1, 1, True) 
for window in dataset: 
    print(list(window.as_numpy_iterator())) 

[0, 1, 2]
[1, 2, 3]
[2, 3, 4]
[3, 4, 5]
[4, 5, 6]


In [32]:
dataset = tf.data.Dataset.range(7).window(3, 1, 2, True) 
for window in dataset: 
    print(list(window.as_numpy_iterator())) 

[0, 2, 4]
[1, 3, 5]
[2, 4, 6]


In [33]:
dataset = tf.data.Dataset.range(7).window(3, 1, 3, True) 
for window in dataset: 
    print(list(window.as_numpy_iterator())) 

[0, 3, 6]


In [86]:
nested = {'a':[1, 2, 3, 4], 'b':[6, 7, 8, 9]}
dataset = tf.data.Dataset.from_tensor_slices(nested).window(2,2,1,True) 
for window in dataset: 
    def to_numpy(ds): 
        return list(ds.as_numpy_iterator()) 
    print(to_numpy(window['a']))

[1, 2]
[3, 4]


In [70]:
window_size = 5
windows = range_ds.window(window_size, shift=1)
for x in windows.flat_map(lambda x: x).take(30):
    print(x.numpy(), end=' ')

Cause: could not parse the source code:

for x in windows.flat_map(lambda x: x).take(30):

This error may be avoided by creating the lambda in a standalone statement.

Cause: could not parse the source code:

for x in windows.flat_map(lambda x: x).take(30):

This error may be avoided by creating the lambda in a standalone statement.

0 1 2 3 4 1 2 3 4 5 2 3 4 5 6 3 4 5 6 7 4 5 6 7 8 5 6 7 8 9 

In [71]:
def sub_to_batch(sub):
    return sub.batch(window_size, drop_remainder=True)

for example in windows.flat_map(sub_to_batch).take(5):
    print(example.numpy())

[0 1 2 3 4]
[1 2 3 4 5]
[2 3 4 5 6]
[3 4 5 6 7]
[4 5 6 7 8]


In [72]:
def make_window_dataset(ds, window_size=5, shift=1, stride=1):
    windows = ds.window(window_size, shift=shift, stride=stride)

    def sub_to_batch(sub):
        return sub.batch(window_size, drop_remainder=True)

    windows = windows.flat_map(sub_to_batch)
    return windows

In [76]:
ds = make_window_dataset(range_ds, window_size=10, shift=1, stride=2)

for example in ds.take(10):
    print(example.numpy())

[ 0  2  4  6  8 10 12 14 16 18]
[ 1  3  5  7  9 11 13 15 17 19]
[ 2  4  6  8 10 12 14 16 18 20]
[ 3  5  7  9 11 13 15 17 19 21]
[ 4  6  8 10 12 14 16 18 20 22]
[ 5  7  9 11 13 15 17 19 21 23]
[ 6  8 10 12 14 16 18 20 22 24]
[ 7  9 11 13 15 17 19 21 23 25]
[ 8 10 12 14 16 18 20 22 24 26]
[ 9 11 13 15 17 19 21 23 25 27]
