# 此筆記介紹 many to one這個種類的 RNN。

所謂many to one即：於多個(many)連續的時間點讀取資訊(例如：$\vec{x}_{t=0},\vec{x}_{t=1},\vec{x}_{t=2}$)，然後來預測接下來單一(one)個時間點的資訊 ($\vec{x}_{t=3}$)。

---

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

import tensorflow as tf

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()
import numpy as np

import pandas as pd

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, LSTM, Dense

---

## Case 1: 使用SimpleRNN學習 $[1,0,0,1,0,0,1,0,0,1,0,0,1,....]$

我們接下來會利用 $x_{t=0},x_{t=1},x_{t=2}$ 來預測 $x_{t=3}$。

### 1.1. 先產生用於訓練和測試的資料。

In [None]:
num_samples = 200 # 建立兩百個序列樣本
num_train = 150   # 其中150個當訓練資料，50個用於測試模型好壞

fake_data = np.array([[0,1,0,0],[1,0,0,1],[0,0,1,0]]) # 每個樣本含四個連續時間：前三個時間的資料會讓網路讀取，後一個時間的資料
                                                    # 是網路的預測目標。
data = np.zeros((num_samples,4),dtype=np.float32 )
for j in range(num_samples):
    rand_idx = np.random.choice(3)
    rand_data = fake_data[rand_idx]
    data[j,:] = rand_data
    
train_x = data[0:num_train,0:3]
test_x = data[num_train:num_samples,0:3]
train_y = data[0:num_train,3].astype(np.int8)
test_y = data[num_train:num_samples,3].astype(np.int8)
print(train_y.shape, train_x.shape, test_y.shape, test_x.shape)

train_x = train_x.reshape((*train_x.shape,1))
test_x = test_x.reshape((*test_x.shape,1))
print(train_y.shape, train_x.shape, test_y.shape, test_x.shape)

### 1.2. 建立並訓練模型，將模型的訓練情形畫出。

In [None]:
hidden_neurons = 30

time_dim = 3
seq_dim = 1

# 建立模型(將SimpleRNN與Dense層依序添加至模型內)。
model = Sequential()  
model.add(SimpleRNN(input_shape=(time_dim,seq_dim),
                    units=hidden_neurons,
                    return_sequences=False))  
model.add(Dense(1,activation="sigmoid"))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

history = model.fit(train_x, train_y,
                    epochs=100, batch_size=32, validation_split=0.3)  # 訓練模型。

# 畫出模型訓練情形。
plt.plot(history.history['acc'], ms=5, marker='o', label='accuracy')
plt.plot(history.history['val_acc'], ms=5, marker='o', label='val accuracy')
plt.legend()
plt.show()

### 1.3. 丟3個樣本進去做預測。

In [None]:
x_pred=np.array([[0,1,0],[0,0,1],[1,0,0]]).reshape((3,3,1))

#print( model.predict(x_pred) )
#print()
print("predictions=", pd.Series(model.predict(x_pred)[:,0]).apply(lambda x:1 if x>0.5 else 0) \
                                     .values)

這三個樣本事實上機器都看過，故應該會預測的很完美。

---

## Case 2. 輸入時間序列： $x_{t=1},x_{t=2},...,x_{t=n}$，預測 $\vec{x}_{t=n+1}$。

### 2.1. 建立訓練用序列資料：$sin(\pi x)$

In [None]:
x = np.linspace(0,np.pi,100)
y = np.sin(x)

從$sin(x)$序列中抽取$x_{t=1},x_{t=2},...,x_{t=25}$。其中，$x_{t=1},x_{t=2},...,x_{t=20}$將於不同的時間點丟入模型，最後，模型會預測出單一向量$\vec{y} = (x_{t=21},x_{t=22},x_{t=23},...,x_{t=25}$)。

In [None]:
n = 20 # 有20個連續的時間點。於每個時間點我們都會輸入資料給模型。
m = 5  # 模型將預測的向量維度是5。
num_samples = 300 # 300個訓練樣本。

train_x = np.zeros((num_samples,n),dtype=np.float32 )
train_y = np.zeros((num_samples,m),dtype=np.float32 )

for j in range(num_samples):
    rand_num = np.random.choice(100-(n+m))
    train_x[j,:] = y[rand_num:rand_num+n]
    train_y[j,:] = y[rand_num+n:rand_num+n+m]
    
plt.scatter(x=np.arange(20), y=train_x[8,:], label="train_x")
plt.scatter(x=np.arange(20,25), y=train_y[8,:], label="train_y")
plt.legend()
plt.show()

輸入給RNN的資料需要是3D，故我們在這裡轉換```train_x```的shape。

In [None]:
train_x=train_x.reshape(*train_x.shape,1) 

### 2.2. 建立並訓練模型，且畫出模型訓練情形

In [None]:
hidden_neurons = 15

time_dim = 20
seq_dim = 1

model = Sequential()  
model.add(SimpleRNN(input_shape=(time_dim,seq_dim),
                    units=hidden_neurons,
                    return_sequences=False))  
model.add(Dense(5,activation='tanh'))
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse'])
model.summary()

history=model.fit(train_x, train_y,
                  epochs=200, batch_size=32, validation_split=0.3)

plt.yscale('log')
plt.plot(history.history['mean_squared_error'], ms=5, marker='o', label='mse')
plt.plot(history.history['val_mean_squared_error'], ms=5, marker='o', label='val mse')
plt.legend()
plt.show()

### 2.3. 丟一個隨機樣本進去做預測。

In [None]:
rand_num=np.random.choice(100-(n+m))
test_x=y[rand_num:rand_num+n]
test_y=y[rand_num+n:rand_num+n+m]

y_pred=model.predict( test_x.reshape(1,20,1) )

plt.scatter(x=np.arange(20), y=test_x, label="x")
plt.scatter(x=np.arange(20,25), y=test_y, label="y")
plt.scatter(x=np.arange(20,25), y=y_pred, label="pred_y")
plt.legend()
plt.show()

一般來說，我們不會拿時序資料來預測一個含有時序資訊的向量($\vec{x}_{t=n+1}$)。因為，該時序向量內的資訊可能也有時間上的關聯性。該向量的末端元素可能和該向量內的前端元素比較無關連，卻和前一個元素有極大關聯性。在這個模型下，時序向量內元素間的關聯性將被忽略。

---

## Case 3. 輸入時間序列： $x_{t=1},x_{t=2},...,x_{t=n}$，預測 $x_{t=n+1}$。

記得剛才Case1的序列，其週期為3。以下我們來嘗試週期為15的序列：

### 3.1. 建立訓練用序列資料：$\cos(2\pi b j +\phi)$, where $b=\frac{1}{15}$, $j=0,1,2,...,299$ and $\phi=0$.

In [None]:
x = np.arange(300)
phi = 0
b = 1./15.
y = np.cos(2.*np.pi*b*x+phi)

n = 20
m = 1
num_samples = 300

train_x = np.zeros((num_samples,n), dtype=np.float32 )
train_y = np.zeros((num_samples,m), dtype=np.float32 )

for j in range(num_samples):
    rand_num = np.random.choice(100-(n+m))
    train_x[j,:] = y[rand_num:rand_num+n]
    train_y[j,:] = y[rand_num+n:rand_num+n+m]

rand_choice = 8
plt.scatter(x=np.arange(20), y=train_x[rand_choice,:], label="train_x")
plt.scatter(x=np.arange(20,21), y=train_y[rand_choice,:], label="train_y")
plt.legend()
plt.show()

train_x = train_x.reshape(*train_x.shape,1)

### 3.2. 建立並訓練模型，且畫出模型訓練情形

In [None]:
hidden_neurons = 15

time_dim = 20
seq_dim = 1

model = Sequential()  
model.add(SimpleRNN(input_shape=(time_dim,seq_dim),
                    units=hidden_neurons,
                    return_sequences=False))  
model.add(Dense(1,activation='tanh'))
model.compile(loss='mean_squared_error',
              optimizer='rmsprop',
              metrics=['mse'])
model.summary()

history=model.fit(train_x, train_y,
                  epochs=200, batch_size=32, validation_split=0.3)

plt.yscale('log')
plt.plot(history.history['mean_squared_error'], ms=5, marker='o', label='mse')
plt.plot(history.history['val_mean_squared_error'], ms=5, marker='o', label='val mse')
plt.legend()
plt.show()

### 3.3. 丟入64個隨機樣本進去做預測。抽看其中一個樣本的預測結果。

In [None]:
x = np.arange(300)
phi = np.pi/3.
b = 1./15.
y = np.cos(2.*np.pi*b*x+phi)


test_samples = 64

tests_x = np.zeros( (test_samples,n)  )
tests_y = np.zeros( (test_samples,1)  )

rand_nums = np.zeros(test_samples)
for j in range(test_samples):
    rand_num = np.random.choice(300-(n+m))
    tests_x[j,:] = y[rand_num:rand_num+n]
    tests_y[j,:] = y[rand_num+n:rand_num+n+m]
    rand_nums[j] = rand_num
    
tests_x = tests_x.reshape(test_samples,20,1)
y_pred = model.predict( tests_x )

choice_idx = 5

plt.scatter(x=np.arange(rand_nums[choice_idx], rand_nums[choice_idx]+n), y=tests_x[choice_idx],label="x")
plt.scatter(x=np.arange(rand_nums[choice_idx]+n, rand_nums[choice_idx]+n+m), y=tests_y[choice_idx],label="y")
plt.scatter(x=np.arange(rand_nums[choice_idx]+n, rand_nums[choice_idx]+n+m), y=y_pred[choice_idx],label="pred_y")
plt.legend()
plt.show()

### 3.4. 看數個預測結果是否貼合真實資料。

In [None]:
pred_y_xy = np.zeros((2,m*test_samples))

for j in range(test_samples):
    pred_y_xy[0,m*j:m*(j+1)] = np.arange(rand_nums[j]+n, rand_nums[j]+n+m)
    pred_y_xy[1,m*j:m*(j+1)] = y_pred[j].T
    
plt.scatter(x,y)

plt.scatter(x=pred_y_xy[0,:], y=pred_y_xy[1,:])
plt.xlim((0,100))

---

## Case 4. 輸入時間序列： $x_{t=1},x_{t=2},...,x_{t=n}$，預測 $x_{t=n+1}$。

以下我們來嘗試一個週期為無限大，卻彷彿有週期的序列：

### 4.1. 建立訓練用序列資料：$\cos(2\pi b j +\phi)$, where $b=\frac{1+\sqrt{5}}{2}$, $j=0,1,2,...,299$ and $\phi=0$.

In [None]:
# 建立該序列
x = np.arange(1000)
phi = 0.
b = (1.+np.sqrt(5.))/2.
y = np.cos(2.*np.pi*b*x+phi)

# 決定如何建立訓練資料
n = 20            # 訓練資料共20個時間點
m = 1             # 我們要來預測第21個時間點的資訊應為何。第21個時間點的向量維度應為1。
num_samples = 500 # 訓練資料五百筆

# 製作訓練資料
train_x = np.zeros((num_samples,n),dtype=np.float32 )
train_y = np.zeros((num_samples,m),dtype=np.float32 )
for j in range(num_samples):
    rand_num = np.random.choice(100-(n+m))
    train_x[j,:] = y[rand_num:rand_num+n]
    train_y[j,:] = y[rand_num+n:rand_num+n+m]

# 抽一個訓練用的樣本畫出來看看
rand_choice = 8
plt.plot(np.arange(n), train_x[rand_choice,:],
         ms=7,marker='o',label="train_x")
plt.scatter(x=np.arange(n,n+1), y=train_y[rand_choice,:],
            label="train_y", color='green')
#plt.legend()
plt.show()

# train_x需轉為3D，方能做為SimpleRNN層的輸入
train_x = train_x.reshape(*train_x.shape,1) 

將局部的$\cos(2\pi b j +\phi)$序列畫出來看一下：

In [None]:
plt.plot(x,y,ms=7,marker='o')
plt.xlim(0,50)

### 4.2. 建立並訓練模型，且畫出模型訓練情形

In [None]:
hidden_neurons = 50

time_dim = 20
seq_dim = 1

model = Sequential()  
model.add(SimpleRNN(input_shape=(time_dim,seq_dim), 
                    units=hidden_neurons,
                    return_sequences=False))  
model.add(Dense(1, activation='tanh'))
model.compile(loss='mean_squared_error',
              optimizer='adam',
              metrics=['mse'])
model.summary()

history=model.fit(train_x, train_y,
                  epochs=200, batch_size=64, validation_split=0.2)

plt.yscale('log')
plt.plot(history.history['mean_squared_error'], ms=5,marker='o', label='mse')
plt.plot(history.history['val_mean_squared_error'], ms=5,marker='o', label='val mse')
plt.legend()
plt.show()

### 4.3. 丟一個隨機樣本進去做預測。

In [None]:
x = np.arange(1000)
phi = 0.123 * np.pi
b = (1. + np.sqrt(5.)) / 2.
y = np.cos(2. * np.pi * b * x + phi)

test_samples = 64

tests_x = np.zeros((test_samples,n))
tests_y = np.zeros((test_samples,1))

rand_nums = np.zeros(test_samples)
for j in range(test_samples):
    rand_num = np.random.choice(300-(n+m))
    tests_x[j,:] = y[rand_num:rand_num+n]
    tests_y[j,:] = y[rand_num+n:rand_num+n+m]
    rand_nums[j] = rand_num

tests_x = tests_x.reshape(test_samples, 20, 1)
y_pred = model.predict(tests_x)

choice_idx = 5
plt.scatter(x=np.arange(rand_nums[choice_idx], rand_nums[choice_idx]+n), y=tests_x[choice_idx], label="x")
plt.scatter(x=np.arange(rand_nums[choice_idx]+n, rand_nums[choice_idx]+n+m), y=tests_y[choice_idx], label="y")
plt.scatter(x=np.arange(rand_nums[choice_idx]+n, rand_nums[choice_idx]+n+m), y=y_pred[choice_idx], label="pred_y")
plt.legend(loc='lower left')
plt.show()

雖然資料可能不具有週期性，但，只要資料彷彿有某種規律，機器就可以學習那個規律。