# Chapter 13: RNN Sample Generation
## Chapter 13.1: Generating the Training and Testing Samples for RNN Model

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# 最终结果 RNN网络样本的生成方法
dataset = tf.data.Dataset.range(10) # 生成0-9的数组
dataset = dataset.window(5, shift=1, drop_remainder=True) # 移动窗口，形成循环数组；删除不完整序列
dataset = dataset.flat_map(lambda windows: windows.batch(5)) # 转换为numpy数组
dataset = dataset.map(lambda window: (window[:-1], window[-1])) # 分离输入和输出数据
dataset = dataset.shuffle(2) # 打乱数据
dataset = dataset.batch(2).prefetch(1) # 设置数据批量，每两个数据为一批

for x,y in dataset:
    print("x = ", '\n',x.numpy())
    print("y = ", '\n',y.numpy())
    print()

x =  
 [[1 2 3 4]
 [0 1 2 3]]
y =  
 [5 4]

x =  
 [[2 3 4 5]
 [3 4 5 6]]
y =  
 [6 7]

x =  
 [[4 5 6 7]
 [5 6 7 8]]
y =  
 [8 9]



In [3]:
# 生成0-9的数组
dataset = tf.data.Dataset.range(10)
for val in dataset:
    print(val.numpy())

0
1
2
3
4
5
6
7
8
9


In [4]:
# 移动窗口，形成循环数组
dataset = tf.data.Dataset.range(10).window(5, shift=1)
for window_dataset in dataset:
    for val in window_dataset:
        print(val.numpy(), end='')
    print()

01234
12345
23456
34567
45678
56789
6789
789
89
9


In [5]:
# 删除不完整序列
dataset = tf.data.Dataset.range(10).window(5, shift=1, drop_remainder=True)
for window_dataset in dataset:
    for val in window_dataset:
        print(val.numpy(), end='')
    print()

01234
12345
23456
34567
45678
56789


In [6]:
# 转换为numpy数组
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda windows: windows.batch(5))
for window_dataset in dataset:
    for val in window_dataset:
        print(val.numpy(), end='')
    print()

01234
12345
23456
34567
45678
56789


In [7]:
# 分离输入和输出数据
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda windows: windows.batch(5))
dataset = dataset.map(lambda window: (window[:-1], window[-1]))
for x,y in dataset:
    print(x.numpy(), y.numpy())

[0 1 2 3] 4
[1 2 3 4] 5
[2 3 4 5] 6
[3 4 5 6] 7
[4 5 6 7] 8
[5 6 7 8] 9


In [8]:
# 打乱数据
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda windows: windows.batch(5))
dataset = dataset.map(lambda window: (window[:-1], window[-1]))
dataset = dataset.shuffle(buffer_size=10)
for x,y in dataset:
    print(x.numpy(), y.numpy())

[1 2 3 4] 5
[5 6 7 8] 9
[4 5 6 7] 8
[2 3 4 5] 6
[0 1 2 3] 4
[3 4 5 6] 7


In [9]:
# 设置数据批量，每两个数据为一批
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda windows: windows.batch(5))
dataset = dataset.map(lambda window: (window[:-1], window[-1]))
dataset = dataset.shuffle(2)
dataset = dataset.batch(2).prefetch(1)

for x,y in dataset:
    print("x = ", '\n',x.numpy())
    print("y = ", '\n',y.numpy())
    print()

x =  
 [[0 1 2 3]
 [1 2 3 4]]
y =  
 [4 5]

x =  
 [[3 4 5 6]
 [4 5 6 7]]
y =  
 [7 8]

x =  
 [[5 6 7 8]
 [2 3 4 5]]
y =  
 [9 6]

