In [1]:
import numpy as np
import pandas as pd

import tensorflow as tf
import keras

In [2]:
df = pd.read_csv("./data/sunspots.csv")

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,Date,Monthly Mean Total Sunspot Number
0,0,1749-01-31,96.7
1,1,1749-02-28,104.3
2,2,1749-03-31,116.7
3,3,1749-04-30,92.8
4,4,1749-05-31,141.7


In [4]:
sunspot = df['Monthly Mean Total Sunspot Number']

## Rolling Window

สรุปการแบ่ง time series data ให้เป็น window โดยใช้ tensor flow

In [5]:
window_size = 5
batch_size = 2

In [6]:
ds = tf.data.Dataset.range(10)
ds

<RangeDataset shapes: (), types: tf.int64>

In [7]:
for i in ds:
    print(i.numpy())

0
1
2
3
4
5
6
7
8
9


#### จัดข้อมูลให้เป็น windows ด้วย `.window()` 

```python
ds = [...]
ds = ds.window(window_size, shift=1, drop_remainder=True)
```

> `ds` บรรทัดล่างคือ list ของ window

In [8]:
ds = ds.window(window_size, shift=1, drop_remainder=True)
ds

<WindowDataset shapes: DatasetSpec(TensorSpec(shape=(), dtype=tf.int64, name=None), TensorShape([])), types: DatasetSpec(TensorSpec(shape=(), dtype=tf.int64, name=None), TensorShape([]))>

#### ตัวอย่างเมื่อนำข้อมูลในแต่ละ window มาแสดงผล
```
0 1 2 3 4 
1 2 3 4 5 
2 3 4 5 6 
3 4 5 6 7 
4 5 6 7 8 
5 6 7 8 9 
```

In [9]:
for window in ds:
    for val in window:
        print(val.numpy(), end=" ")
    print()

0 1 2 3 4 
1 2 3 4 5 
2 3 4 5 6 
3 4 5 6 7 
4 5 6 7 8 
5 6 7 8 9 


#### นำข้อมูลออกจาก window ด้วย `flat_map`

จากข้างบน จะเห็นว่า loop ใน windows จะไม่ได้ข้อมูล ต้อง loop อีกชั้นนึง

ใช้

```
ds.flat_map(lambda window: window.batch(window_size))
```

In [10]:
ds = tf.data.Dataset.range(10)
ds = ds.window(window_size, shift=1, drop_remainder=True)
ds = ds.flat_map(lambda window: window.batch(window_size))
for window in ds:
    print(window.numpy())

[0 1 2 3 4]
[1 2 3 4 5]
[2 3 4 5 6]
[3 4 5 6 7]
[4 5 6 7 8]
[5 6 7 8 9]


#### แยกข้อมูล x, y

แยกข้อมูลที่เป็น input x, และ output y

- x คือข้อมูลอดีต ... วัน สำหรับทำนายอนาคต y ... วัน

```python
ds = ds.map(lambda window: [window[:-1], window[-1:]])
```
> แบ่งข้อมูลตัวแรกจนถึง `n-1` เป็น x และตัวที่ `n` เป็น y

เช่น ใช้ข้อมูล 4 วันก่อน ทำนาย 1 วันในอนาคต
```
[0 1 2 3] -> [4]
[1 2 3 4] -> [5]
[2 3 4 5] -> [6]
[3 4 5 6] -> [7]
[4 5 6 7] -> [8]
[5 6 7 8] -> [9]
```

In [11]:
ds = tf.data.Dataset.range(10)
ds = ds.window(window_size, shift=1, drop_remainder=True)
ds = ds.flat_map(lambda window: window.batch(window_size))
ds = ds.map(lambda window: [window[:-1], window[-1:]])
for x, y in ds:
    print(x.numpy(), "->", y.numpy())

[0 1 2 3] -> [4]
[1 2 3 4] -> [5]
[2 3 4 5] -> [6]
[3 4 5 6] -> [7]
[4 5 6 7] -> [8]
[5 6 7 8] -> [9]


#### จัดกลุ่มเป็น batch size

```python
ds = ds.batch(batch_size).prefetch(1)
```

In [12]:
ds = tf.data.Dataset.range(10)
ds = ds.window(window_size, shift=1, drop_remainder=True)
ds = ds.flat_map(lambda window: window.batch(window_size))
ds = ds.map(lambda window: [window[:-1], window[-1:]])
ds = ds.batch(batch_size).prefetch(1)
for x, y in ds:
    print("x: ", x.numpy().tolist())
    print("y: ", y.numpy().tolist())
    print()

x:  [[0, 1, 2, 3], [1, 2, 3, 4]]
y:  [[4], [5]]

x:  [[2, 3, 4, 5], [3, 4, 5, 6]]
y:  [[6], [7]]

x:  [[4, 5, 6, 7], [5, 6, 7, 8]]
y:  [[8], [9]]



#### เพิ่มการ Shuffle
```python
ds = ds.map(lambda window: (window[:-1], window[-1:]))
ds = ds.shuffle(buffer_size=shuffle_buffer)
ds = ds.batch(batch_size).prefetch(1)
```

`shuffle_buffer` = จำนวน sample

## Rolling windows from Dataframe(series)

In [13]:
ds = tf.data.Dataset.from_tensor_slices(sunspot)
ds = ds.window(window_size, shift=1, drop_remainder=True)
ds = ds.flat_map(lambda window: window.batch(window_size))
ds = ds.map(lambda window: [window[:-1], window[-1:]])

i = 0
for window in ds:
    x, y = window
    print(x.numpy().round(), "->", y.numpy().round())
    i += 1
    if i > 10:
        break

[ 97. 104. 117.  93.] -> [142.]
[104. 117.  93. 142.] -> [139.]
[117.  93. 142. 139.] -> [158.]
[ 93. 142. 139. 158.] -> [110.]
[142. 139. 158. 110.] -> [126.]
[139. 158. 110. 126.] -> [126.]
[158. 110. 126. 126.] -> [264.]
[110. 126. 126. 264.] -> [142.]
[126. 126. 264. 142.] -> [122.]
[126. 264. 142. 122.] -> [126.]
[264. 142. 122. 126.] -> [149.]


## Windowed dataset function

In [14]:
def windowed_dataset(series, window_size, batch_size, shuffle_buffer):
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(window_size + 1, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda window: window.batch(window_size + 1))
    ds = ds.shuffle(shuffle_buffer)
    ds = ds.map(lambda window: (window[:-1], window[-1:]))
    ds = ds.batch(batch_size).prefetch(1)
    return ds

In [15]:
window_size = 24
batch_size = 32
shuffle_buffer = 10

ds = windowed_dataset(sunspot, window_size, batch_size, shuffle_buffer)

In [16]:
def windowed_dataset_X_Y(series, window_size, batch_size, shuffle_buffer):
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(window_size + 1, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda window: window.batch(window_size + 1))
    ds = ds.shuffle(shuffle_buffer)
    ds = ds.map(lambda window: (window[:-1], window[-1:]))

    
    X = []
    Y = []
    
    for window in ds:
        x, y = window
        X.append(x.numpy())
        Y.append(y.numpy())
    X = np.asarray(X)
    Y = np.asarray(Y)
    
    return X, Y

In [17]:
X, Y = windowed_dataset_X_Y(sunspot, window_size, batch_size, shuffle_buffer)

In [18]:
X.shape

(3228, 24)

In [19]:
Y.shape

(3228, 1)