## RNN(Recurrent Neural Network) : 순환 신경망
#### 순서가 있는 시퀀스 데이터, time series data(시계열 데이터)를 입력하여 예측

In [1]:
import tensorflow as tf
import numpy as np

![image](https://cloud.githubusercontent.com/assets/901975/23383681/9943a9fc-fd82-11e6-8121-bd187994e249.png)

In [2]:
# One hot encoding for each char in 'hello'
h = [1, 0, 0, 0]
e = [0, 1, 0, 0]
l = [0, 0, 1, 0]
o = [0, 0, 0, 1]

In [3]:
# One cell RNN input_dim (4) -> output_dim (2). sequence: 5, batch 3
# 3 batches 'hello', 'eolll', 'lleel'
x_data = np.array([[h, e, l, l, o],
                   [e, o, l, l, l],
                   [l, l, e, e, l]], dtype=np.float32)
# print(x_data)
print(x_data.shape)   # (3, 5, 4) , (N,T,D)

hidden_size = 2       # H

rnn = tf.keras.layers.SimpleRNN(units=hidden_size,return_sequences=True,return_state=True)
outputs,states = rnn(x_data)
print('outputs:',outputs.numpy(),outputs.shape)  # (3,5,2), (N,T,H)
print('states:',states.numpy(),states.shape)     # (3,2)  , (N,H)

(3, 5, 4)
outputs: [[[-0.7028867   0.17087269]
  [-0.6667389  -0.46756342]
  [ 0.77675843 -0.12783077]
  [ 0.6360688   0.8684261 ]
  [-0.78504604  0.36948606]]

 [[-0.5426139   0.18684833]
  [-0.39801806 -0.6726023 ]
  [ 0.8488643   0.13157704]
  [ 0.45818263  0.88715005]
  [-0.2679221   0.78031754]]

 [[ 0.53333986  0.5044049 ]
  [ 0.1102109   0.80299836]
  [-0.8866796   0.31778717]
  [-0.7436174  -0.5948426 ]
  [ 0.821517   -0.20707259]]] (3, 5, 2)
states: [[-0.78504604  0.36948606]
 [-0.2679221   0.78031754]
 [ 0.821517   -0.20707259]] (3, 2)


In [4]:
rnn = tf.keras.layers.SimpleRNN(units=hidden_size,return_sequences=True,return_state=False)
outputs = rnn(x_data)   # states값은 출력안됨
print('outputs:',outputs.numpy(),outputs.shape)  # (3,5,2), (N,T,H)     

outputs: [[[-0.4023105   0.23949122]
  [-0.20422964  0.5384243 ]
  [ 0.40765044 -0.130547  ]
  [ 0.86718524  0.04729978]
  [ 0.05588026 -0.8226699 ]]

 [[ 0.2533225   0.5062543 ]
  [-0.6109802  -0.8252262 ]
  [ 0.72097373  0.83651155]
  [ 0.76305944 -0.7250244 ]
  [ 0.96104896  0.30704167]]

 [[ 0.7237755   0.18066321]
  [ 0.8838294  -0.37178272]
  [ 0.8311353   0.3238    ]
  [ 0.6282779  -0.19197844]
  [ 0.9114376  -0.03334073]]] (3, 5, 2)


In [5]:
rnn = tf.keras.layers.SimpleRNN(units=hidden_size,return_sequences=False)
outputs = rnn(x_data)
print('outputs:',outputs.numpy(),outputs.shape)  # (3,2), (N,,H)  ,  2차원으로 출력

outputs: [[-0.28493956 -0.358719  ]
 [ 0.40505397 -0.87045205]
 [ 0.6506944   0.7326639 ]] (3, 2)


In [6]:
rnn = tf.keras.layers.SimpleRNN(units=hidden_size,return_sequences=False,return_state=True)
outputs,states = rnn(x_data)
print('outputs:',outputs.numpy(),outputs.shape)  # (3,2), (N,,H)
print('states:',states.numpy(),states.shape)     # (3,2), (N,H)

outputs: [[-0.08522145 -0.87587667]
 [ 0.06463614 -0.7621176 ]
 [-0.06561977 -0.6306634 ]] (3, 2)
states: [[-0.08522145 -0.87587667]
 [ 0.06463614 -0.7621176 ]
 [-0.06561977 -0.6306634 ]] (3, 2)


##  RNN 주요 레이어 종류
#### (1) SimpleRNN :가장 간단한 형태의 RNN레이어, 활성화 함수로 tanh가 사용됨(tanh: -1 ~ 1 사이의 값을 반환)
#### (2) LSTM(Long short Term Memory) : 입력 데이터와 출력 사이의 거리가 멀어질수로 연관 관계가 적어진다(Long Term Dependency,장기의존성 문제), LSTM은 장기 의존성 문제를 해결하기 위해 출력값외에 셀상태(cell state)값을 출력함, 활성화 함수로 tanh외에 sigmoid가 사용됨
#### (3) GRU(Gated Recurent Unit) : 뉴욕대 조경현 교수 등이 제안, LSTM보다 구조가 간단하고 성능이 우수함

In [7]:
# Sequence data
X = np.array([[0,1,2,3],
              [1,2,3,4],
              [2,3,4,5],
              [3,4,5,6],
              [4,5,6,7],
              [5,6,7,8]],dtype=np.float32)

x_data = tf.reshape(X,(-1,4,1))  # (6,4,1)  (N,T,D)

y_data = np.array([4,5,6,7,8,9],dtype=np.float32)

print(x_data.shape,y_data.shape)
# print(type(x_data),type(y_data))
x_data

(6, 4, 1) (6,)


<tf.Tensor: shape=(6, 4, 1), dtype=float32, numpy=
array([[[0.],
        [1.],
        [2.],
        [3.]],

       [[1.],
        [2.],
        [3.],
        [4.]],

       [[2.],
        [3.],
        [4.],
        [5.]],

       [[3.],
        [4.],
        [5.],
        [6.]],

       [[4.],
        [5.],
        [6.],
        [7.]],

       [[5.],
        [6.],
        [7.],
        [8.]]], dtype=float32)>

### [1] SimpleRNN
#### 가장 간단한 형태의 RNN 

In [17]:
model = tf.keras.Sequential([

    tf.keras.layers.Input(shape=(4,1)),
    # X: (N,D) , Wx:(D,H), Wh:(H,H) ,b : (H,)
    #    (6,1)    (1,300)    (300,300)   (300,) --> 1*300 + 300*300 + 300 = 90600
    # (N,T,D) : (6,4,1)  --> (N,T,H) : (6,4,300) , T는 sequence_length, H는 Hidden Size
    tf.keras.layers.SimpleRNN(units=300,return_sequences=True),

    #    (6,300)  (300,300)    (300,300)   (300,) --> 300*300 + 300*300 + 300 =  180,300
    tf.keras.layers.SimpleRNN(units=300),
    tf.keras.layers.Dense(1)
])

model.compile(optimizer='adam',loss='mse')
model.summary()

In [18]:
# 학습 및 예측
model.fit(x_data,y_data,epochs=100)
print(model.predict(x_data))

Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - loss: 51.4065
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - loss: 2.3508
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - loss: 4.0082
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - loss: 6.7785
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - loss: 3.8824
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step - loss: 1.3509
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - loss: 2.3738
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - loss: 3.5713
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step - loss: 2.7365
Epoch 10/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - loss: 1.3180
Epoch 11/1

In [19]:
y_data

array([4., 5., 6., 7., 8., 9.], dtype=float32)

In [20]:
# 학습되지 않은 입력 데이터에 대한 예측 결과
print(model.predict(np.array([[[6.],[7.],[8.],[9.]]])))
print(model.predict(np.array([[[-1.],[0.],[1.],[2.]]])))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 496ms/step
[[9.532253]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[[1.4108332]]


In [21]:
# 평가
model.evaluate(x_data,y_data)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 469ms/step - loss: 0.0034


0.00342468055896461

### [2] LSTM(Long short Term Memory)
#### 입력 데이터와 출력 사이의 거리가 멀어질수로 연관 관계가 적어진다(Long Term Dependency,장기의존성 문제)
#### LSTM은 장기 의존성 문제를 해결하기 위해 출력값외에 셀상태(cell state)값을 출력함

In [22]:
model = tf.keras.Sequential([

    tf.keras.layers.Input(shape=(4,1)),
    # X: (N,D) , Wx:(D,4H), Wh:(H,4H) ,b : (4H,)
    #    (6,1)  (1,4*300)   (300,4*300) (4*300,) --> 1*4*300 + 300*4*300 + 4*300 = 4*90600 = 362400
    # (N,T,D) : (6,4,1)  --> (N,T,H) : (6,4,300) , T는 sequence_length, H는 Hidden Size
    tf.keras.layers.LSTM(units=300,return_sequences=True),

    #   (6,300)  (300,4*300)  (300,4*300) (4*300,) --> 300*4*300 + 300*4*300 + 4*300 =  4*180,300 = 721,200
    tf.keras.layers.LSTM(units=300),
    tf.keras.layers.Dense(1)
])

model.compile(optimizer='adam',loss='mse')
model.summary()

In [27]:
# 학습 및 예측
model.fit(x_data,y_data,epochs=100,verbose=0)
print(model.predict(x_data))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 468ms/step
[[3.3768644]
 [5.0869184]
 [6.367822 ]
 [7.3078785]
 [8.005344 ]
 [8.5321865]]


In [33]:
# 양방향 LSTM : 출력이 2배로 증가한다
model = tf.keras.Sequential([

    tf.keras.layers.Input(shape=(4,1)),
    # X: (N,D) , Wx:(D,4H), Wh:(H,4H) ,b : (4H,)
    #    (6,1)  (1,4*300)   (300,4*300) (4*300,) --> 1*4*300 + 300*4*300 + 4*300 = 4*90600 => 362400 * 2
    # (N,T,D) : (6,4,1)  --> (N,T,2H) : (6,4,2*300) , T는 sequence_length, H는 Hidden Size
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=300,return_sequences=True)),

    #  (6,2*300)  (2*300,4*300)  (300,4*300) (4*300,) --> 2*300*4*300 + 300*4*300 + 4*300 =>  1081200*2
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=300)),
    tf.keras.layers.Dense(1)
])

model.compile(optimizer='adam',loss='mse')
model.summary()

In [38]:
1081200*2

2162400

### [3] GRU(Gated Recurent Unit)
#### 뉴욕대 조경현 교수 등이 제안, LSTM보다 구조가 간단하고 성능이 우수

In [29]:
model = tf.keras.Sequential([

    tf.keras.layers.Input(shape=(4,1)),
 
    # (N,T,D) : (6,4,1)  --> (N,T,H) : (6,4,300) , T는 sequence_length, H는 Hidden Size
    tf.keras.layers.GRU(units=300,return_sequences=True),
    tf.keras.layers.GRU(units=300),
    tf.keras.layers.Dense(1)
])

model.compile(optimizer='adam',loss='mse')
model.summary()

In [30]:
# 학습 및 예측
model.fit(x_data,y_data,epochs=100,verbose=0)
print(model.predict(x_data))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 698ms/step
[[3.6348834]
 [5.0266037]
 [6.2159834]
 [7.210813 ]
 [8.031553 ]
 [8.703036 ]]
