In [20]:
pwd

'/Users/aiden/Dropbox/Mac/Desktop/keras_nlp_ywj/ch8_RNN'

# 1. 임의의 입력 생성하기

In [21]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import SimpleRNN, LSTM, Bidirectional

In [22]:
# RNN과 LSTM을 테스트하기 위한 임의의 입력을 만듭니다.
train_X = [[0.1, 4.2, 1.5, 1.1, 2.8], [1.0, 3.1, 2.5, 0.7, 1.1], [0.3, 2.1, 1.5, 2.1, 0.1], [2.2, 1.4, 0.5, 0.9, 1.1]]

print(np.shape(train_X))
train_X

(4, 5)


[[0.1, 4.2, 1.5, 1.1, 2.8],
 [1.0, 3.1, 2.5, 0.7, 1.1],
 [0.3, 2.1, 1.5, 2.1, 0.1],
 [2.2, 1.4, 0.5, 0.9, 1.1]]

In [23]:
train_X = [[[0.1, 4.2, 1.5, 1.1, 2.8], [1.0, 3.1, 2.5, 0.7, 1.1], [0.3, 2.1, 1.5, 2.1, 0.1], [2.2, 1.4, 0.5, 0.9, 1.1]]]
train_X = np.array(train_X, dtype=np.float32)
print(train_X.shape)
train_X  # 입력 텐서는 (1, 4, 5) 크기를 가지는 3D 텐서

(1, 4, 5)


array([[[0.1, 4.2, 1.5, 1.1, 2.8],
        [1. , 3.1, 2.5, 0.7, 1.1],
        [0.3, 2.1, 1.5, 2.1, 0.1],
        [2.2, 1.4, 0.5, 0.9, 1.1]]], dtype=float32)

# 2. SimpleRNN 이해하기

In [24]:
# 은닉 상태의 크기를 3으로 지정
rnn = SimpleRNN(3)
# rnn = SimpleRNN(3, return_sequences=False, return_state=False)와 동일.
hidden_state = rnn(train_X)

print('hidden state : {}, shape: {}'.format(hidden_state, hidden_state.shape))

hidden state : [[-0.9969979  -0.47803527 -0.9990314 ]], shape: (1, 3)


In [26]:
rnn = SimpleRNN(3, return_sequences=True)
hidden_states = rnn(train_X)

print('hidden states : {}', hidden_states)
print('shape: ', hidden_states.shape)
# -> 모든 시점에 대해 은닉 상태의 값인 (1, 4, 3)크기의 텐서 출력

hidden states : {} tf.Tensor(
[[[-0.24955149 -0.3409324   0.9839179 ]
  [ 0.7024925  -0.9878981   0.91960406]
  [ 0.13021053 -0.99412835 -0.50716364]
  [ 0.14922251 -0.9886263   0.9900323 ]]], shape=(1, 4, 3), dtype=float32)
shape:  (1, 4, 3)


In [27]:
# return_sequences=True, return_state=True인 경우
rnn = SimpleRNN(3, return_sequences=True, return_state=True)
hidden_states, last_state = rnn(train_X)

print('hidden states : {}, shape: {}'.format(hidden_states, hidden_states.shape))
print('last hidden state : {}, shape: {}'.format(last_state, last_state.shape))

hidden states : [[[ 0.999782    0.9857592  -0.99005616]
  [ 0.9996902   0.995894   -0.9981255 ]
  [ 0.99361676  0.9923747  -0.9990918 ]
  [ 0.97182816  0.989509   -0.9983537 ]]], shape: (1, 4, 3)
last hidden state : [[ 0.97182816  0.989509   -0.9983537 ]], shape: (1, 3)


# 3. LSTM 이해하기

In [30]:
lstm = LSTM(3, return_sequences=False, return_state=True)
hidden_state, last_state, last_cell_state = lstm(train_X)

print('hidden state : {}, shape: {}'.format(hidden_state, hidden_state.shape))
print('last hidden state : {}, shape: {}'.format(last_state, last_state.shape))
print('last cell state : {}, shape: {}'.format(last_cell_state, last_cell_state.shape))

hidden state : [[ 0.20844159 -0.00946012 -0.06378686]], shape: (1, 3)
last hidden state : [[ 0.20844159 -0.00946012 -0.06378686]], shape: (1, 3)
last cell state : [[ 1.4330432  -0.03410259 -0.21709146]], shape: (1, 3)


In [31]:
lstm = LSTM(3, return_sequences=True, return_state=True)
hidden_states, last_hidden_state, last_cell_state = lstm(train_X)

print('hidden states : {}, shape: {}'.format(hidden_states, hidden_states.shape))
print('last hidden state : {}, shape: {}'.format(last_hidden_state, last_hidden_state.shape))
print('last cell state : {}, shape: {}'.format(last_cell_state, last_cell_state.shape))

hidden states : [[[-0.10702933  0.05555321 -0.3235023 ]
  [-0.22836396  0.12103008 -0.19993871]
  [-0.33012635  0.09225787 -0.25593245]
  [-0.48163924  0.0623228   0.15094148]]], shape: (1, 4, 3)
last hidden state : [[-0.48163924  0.0623228   0.15094148]], shape: (1, 3)
last cell state : [[-0.86355776  0.19970028  0.46225145]], shape: (1, 3)


# 4. Bidirectional LSTM 이해하기

In [32]:
k_init = tf.keras.initializers.Constant(value=0.1)
b_init = tf.keras.initializers.Constant(value=0)
r_init = tf.keras.initializers.Constant(value=0.1)

In [33]:
# 우선 return_sequences가 False이고, return_state가 True인 경우

In [34]:
bilstm = Bidirectional(LSTM(3, return_sequences=False, return_state=True, \
                            kernel_initializer=k_init, bias_initializer=b_init, recurrent_initializer=r_init))

hidden_states, forward_h, forward_c, backward_h, backward_c = bilstm(train_X)

print('hidden states : {}, shape: {}'.format(hidden_states, hidden_states.shape))
print('forward state : {}, shape: {}'.format(forward_h, forward_h.shape))
print('backward state : {}, shape: {}'.format(backward_h, backward_h.shape))

hidden states : [[0.6303138 0.6303138 0.6303138 0.7038734 0.7038734 0.7038734]], shape: (1, 6)
forward state : [[0.6303138 0.6303138 0.6303138]], shape: (1, 3)
backward state : [[0.7038734 0.7038734 0.7038734]], shape: (1, 3)


In [36]:
bilstm = Bidirectional(LSTM(3, return_sequences=True, return_state=True, \
                            kernel_initializer=k_init, bias_initializer=b_init, recurrent_initializer=r_init))

hidden_states, forward_h, forward_c, backward_h, backward_c = bilstm(train_X)

print('hidden states : {}, shape: {}'.format(hidden_states, hidden_states.shape))
print('forward state : {}, shape: {}'.format(forward_h, forward_h.shape))
print('backward state : {}, shape: {}'.format(backward_h, backward_h.shape))

hidden states : [[[0.35906473 0.35906473 0.35906473 0.7038734  0.7038734  0.7038734 ]
  [0.55111325 0.55111325 0.55111325 0.58863586 0.58863586 0.58863586]
  [0.59115744 0.59115744 0.59115744 0.3951699  0.3951699  0.3951699 ]
  [0.6303138  0.6303138  0.6303138  0.21942244 0.21942244 0.21942244]]], shape: (1, 4, 6)
forward state : [[0.6303138 0.6303138 0.6303138]], shape: (1, 3)
backward state : [[0.7038734 0.7038734 0.7038734]], shape: (1, 3)


In [None]:
역방향 LSTM의 첫번째 시점의 은닉 상태는 더 이상 정방향 LSTM의 마지막 시점의 은닉 상태와 연결되는 것이 아니라 
정방향 LSTM의 첫번째 시점의 은닉 상태와 연결됩니다.

그림으로 표현하면 다음과 같이 연결되어 다음층의 입력으로 사용됩니다.