In [5]:
import numpy as np
import pandas as pd

import tensorflow as tf
import keras

## Rolling Window

สรุปการแบ่ง time series data ให้เป็น window โดยใช้ tensor flow

- แปลงข้อมูลให้เป็น tensor dataset
- จัดข้อมูลให้เป็น window
    - flatmap
- แยกข้อมูล input/output

In [13]:
num_range = 10

window_size = 5
batch_size = 2

In [14]:
ds = tf.data.Dataset.range(num_range)
ds

<RangeDataset shapes: (), types: tf.int64>

In [15]:
for i in ds:
    print(i.numpy())

0
1
2
3
4
5
6
7
8
9


### numpy iterator

In [11]:
for i in ds.as_numpy_iterator():
    print(i)

0
1
2
3
4
5
6
7
8
9


#### จัดข้อมูลให้เป็น windows ด้วย `.window()` 

```python
ds = [...]
ds = ds.window(window_size, shift=1, drop_remainder=True)
```

> `ds` บรรทัดล่างคือ list ของ window

In [16]:
ds = tf.data.Dataset.range(num_range)
ds = ds.window(window_size, shift=1, drop_remainder=True)
ds

<WindowDataset shapes: DatasetSpec(TensorSpec(shape=(), dtype=tf.int64, name=None), TensorShape([])), types: DatasetSpec(TensorSpec(shape=(), dtype=tf.int64, name=None), TensorShape([]))>

#### ตัวอย่างเมื่อนำข้อมูลในแต่ละ window มาแสดงผล
```
0 1 2 3 4 
1 2 3 4 5 
2 3 4 5 6 
3 4 5 6 7 
4 5 6 7 8 
5 6 7 8 9 
```

In [17]:
for window in ds:
    for val in window:
        print(val.numpy(), end=" ")
    print()

0 1 2 3 4 
1 2 3 4 5 
2 3 4 5 6 
3 4 5 6 7 
4 5 6 7 8 
5 6 7 8 9 


In [19]:
for window in ds.as_numpy_iterator():
    print(window)

[0 1 2 3 4]
[1 2 3 4 5]
[2 3 4 5 6]
[3 4 5 6 7]
[4 5 6 7 8]
[5 6 7 8 9]


#### นำข้อมูลออกจาก window ด้วย `flat_map`

จากข้างบน จะเห็นว่า loop ใน windows จะไม่ได้ข้อมูล ต้อง loop อีกชั้นนึง

ใช้

```
ds.flat_map(lambda window: window.batch(window_size))
```

In [18]:
ds = tf.data.Dataset.range(num_range)
ds = ds.window(window_size, shift=1, drop_remainder=True)
ds = ds.flat_map(lambda window: window.batch(window_size))
for window in ds:
    print(window.numpy())

[0 1 2 3 4]
[1 2 3 4 5]
[2 3 4 5 6]
[3 4 5 6 7]
[4 5 6 7 8]
[5 6 7 8 9]


#### แยกข้อมูล x, y

แยกข้อมูลที่เป็น input x, และ output y

- x คือข้อมูลอดีต ... วัน สำหรับทำนายอนาคต y ... วัน

```python
ds = ds.map(lambda window: [window[:-1], window[-1:]])
```
> แบ่งข้อมูลตัวแรกจนถึง `n-1` เป็น x และตัวที่ `n` เป็น y

เช่น ใช้ข้อมูล 4 วันก่อน ทำนาย 1 วันในอนาคต
```
[0 1 2 3] -> [4]
[1 2 3 4] -> [5]
[2 3 4 5] -> [6]
[3 4 5 6] -> [7]
[4 5 6 7] -> [8]
[5 6 7 8] -> [9]
```

In [20]:
ds = tf.data.Dataset.range(num_range)
ds = ds.window(window_size, shift=1, drop_remainder=True)
ds = ds.flat_map(lambda window: window.batch(window_size))
ds = ds.map(lambda window: [window[:-1], window[-1:]])
for x, y in ds:
    print(x.numpy(), "->", y.numpy())

[0 1 2 3] -> [4]
[1 2 3 4] -> [5]
[2 3 4 5] -> [6]
[3 4 5 6] -> [7]
[4 5 6 7] -> [8]
[5 6 7 8] -> [9]


#### จัดกลุ่มเป็น batch size

```python
ds = ds.batch(batch_size).prefetch(1)
```

In [21]:
ds = tf.data.Dataset.range(num_range)
ds = ds.window(window_size, shift=1, drop_remainder=True)
ds = ds.flat_map(lambda window: window.batch(window_size))
ds = ds.map(lambda window: [window[:-1], window[-1:]])
ds = ds.batch(batch_size).prefetch(1)
for x, y in ds:
    print("x: ", x.numpy().tolist())
    print("y: ", y.numpy().tolist())
    print()

x:  [[0, 1, 2, 3], [1, 2, 3, 4]]
y:  [[4], [5]]

x:  [[2, 3, 4, 5], [3, 4, 5, 6]]
y:  [[6], [7]]

x:  [[4, 5, 6, 7], [5, 6, 7, 8]]
y:  [[8], [9]]



#### เพิ่มการ Shuffle
```python
ds = ds.map(lambda window: (window[:-1], window[-1:]))
ds = ds.shuffle(buffer_size=shuffle_buffer)
ds = ds.batch(batch_size).prefetch(1)
```

`shuffle_buffer` = จำนวน sample

## Rolling windows from Dataframe(series)

In [24]:
# df series or numpy array
num_range = 100
series = np.arange(num_range)

ds = tf.data.Dataset.from_tensor_slices(series)
ds = ds.window(window_size, shift=1, drop_remainder=True)
ds = ds.flat_map(lambda window: window.batch(window_size))
ds = ds.map(lambda window: [window[:-1], window[-1:]])

i = 0
for window in ds:
    x, y = window
    print(x.numpy().round(), "->", y.numpy().round())
    i += 1
    if i > 10:
        break

[0 1 2 3] -> [4]
[1 2 3 4] -> [5]
[2 3 4 5] -> [6]
[3 4 5 6] -> [7]
[4 5 6 7] -> [8]
[5 6 7 8] -> [9]
[6 7 8 9] -> [10]
[ 7  8  9 10] -> [11]
[ 8  9 10 11] -> [12]
[ 9 10 11 12] -> [13]
[10 11 12 13] -> [14]


## Windowed dataset function

In [25]:
def windowed_dataset(series, window_size, batch_size, shuffle_buffer):
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(window_size + 1, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda window: window.batch(window_size + 1))
    ds = ds.shuffle(shuffle_buffer)
    ds = ds.map(lambda window: (window[:-1], window[-1:]))
    ds = ds.batch(batch_size).prefetch(1)
    return ds

In [27]:
window_size = 24
batch_size = 32
shuffle_buffer = 10

ds = windowed_dataset(series, window_size, batch_size, shuffle_buffer)

#### จากนั้นสามารถนำไปเทรนโมเดลได้เลย

```python
model.fit(ds)
```


In [28]:
def windowed_dataset_X_Y(series, window_size, batch_size, shuffle_buffer):
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(window_size + 1, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda window: window.batch(window_size + 1))
    ds = ds.shuffle(shuffle_buffer)
    ds = ds.map(lambda window: (window[:-1], window[-1:]))

    
    X = []
    Y = []
    
    for window in ds:
        x, y = window
        X.append(x.numpy())
        Y.append(y.numpy())
    X = np.asarray(X)
    Y = np.asarray(Y)
    
    return X, Y

In [30]:
X, Y = windowed_dataset_X_Y(series, window_size, batch_size, shuffle_buffer)

In [31]:
X.shape

(76, 24)

In [32]:
Y.shape

(76, 1)