## 08-04 케라스의 SimpleRNN과 LSTM 이해하기

### 임의의 입력 생성하기

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import SimpleRNN, LSTM, Bidirectional

# 단어 벡터의 차원: 5, 문장의 길이: 4
# 시점(timesteps): 4, 각 시점마다 입력된 단어 벡터의 차원: 5
train_X = [[0.1, 4.2, 1.5, 1.1, 2.8], [1.0, 3.1, 2.5, 0.7, 1.1], [0.3, 2.1, 1.5, 2.1, 0.1], [2.2, 1.4, 0.5, 0.9, 1.1]]
print(np.shape(train_X))

(4, 5)


In [2]:
# RNN은 2D 텐서가 아니라 3D 텐서를 입력하므로 배치 크기 1을 추가해주므로서 위에서 만든 2D 텐서를 3D 텐서로 변경
# batch_size는 한 번에 RNN이 학습하는 데이터의 양을 의미하지만, 여기서는 샘플이 1개 밖에 없으므로 batch_size는 1
train_X = [[[0.1, 4.2, 1.5, 1.1, 2.8], [1.0, 3.1, 2.5, 0.7, 1.1], [0.3, 2.1, 1.5, 2.1, 0.1], [2.2, 1.4, 0.5, 0.9, 1.1]]]
train_X = np.array(train_X, dtype=np.float32)
# (batch_size, timesteps, input_dim)에 해당되는 (1, 4, 5)의 크기를 가지는 3D 텐서 생성
print(train_X.shape)

(1, 4, 5)


### SimpleRNN 이해하기

In [3]:
# 은닉 상태의 크기를 3으로 지정하고, 두 인자 값이 모두 False일 때의 출력값
rnn = SimpleRNN(3)
# rnn = SimpleRNN(3, return_sequences=False, return_state=False)와 동일.
hidden_state = rnn(train_X)

# 마지막 시점의 은닉 상태(return_sequences가 False인 경우에는 SimpleRNN은 마지막 시점의 은닉 상태만 출력)
print('hidden state : {}, shape: {}'.format(hidden_state, hidden_state.shape))

hidden state : [[ 0.9521249  -0.1994099   0.07736178]], shape: (1, 3)


In [4]:
# 모든 시점의 은닉 상태를 출력
rnn = SimpleRNN(3, return_sequences=True)
hidden_states = rnn(train_X)

print('hidden states : {}, shape: {}'.format(hidden_states, hidden_states.shape))

hidden states : [[[ 0.98315257  0.9229474   0.98839176]
  [ 0.4544887  -0.6444889   0.14096372]
  [ 0.99061406 -0.5985525  -0.96306676]
  [ 0.9934282   0.04620324 -0.50891125]]], shape: (1, 4, 3)


In [5]:
# return_state가 True일 경우에는 return_sequences의 True/False 여부와 상관없이 마지막 시점의 은닉 상태를 출력
#  return_sequences가 True이면서, return_state를 True로 할 경우 SimpleRNN은 두 개의 출력을 리턴
rnn = SimpleRNN(3, return_sequences=True, return_state=True)
hidden_states, last_state = rnn(train_X)

print('hidden states : {}, shape: {}'.format(hidden_states, hidden_states.shape))
print('last hidden state : {}, shape: {}'.format(last_state, last_state.shape))

hidden states : [[[ 0.99438626 -0.9943884   0.99108875]
  [ 0.9078811  -0.9965469   0.99615955]
  [ 0.98853284 -0.9981373   0.9993842 ]
  [ 0.8652501  -0.9849831   0.5670675 ]]], shape: (1, 4, 3)
last hidden state : [[ 0.8652501 -0.9849831  0.5670675]], shape: (1, 3)


### LSTM 이해하기

In [9]:
lstm = LSTM(3, return_sequences=False, return_state=True)
hidden_state, last_state, last_cell_state = lstm(train_X)

print('hidden state : {}, shape: {}'.format(hidden_state, hidden_state.shape))
print('last hidden state : {}, shape: {}'.format(last_state, last_state.shape))
print('last cell state : {}, shape: {}'.format(last_cell_state, last_cell_state.shape))

hidden state : [[0.40067336 0.07401402 0.4082229 ]], shape: (1, 3)
last hidden state : [[0.40067336 0.07401402 0.4082229 ]], shape: (1, 3)
last cell state : [[1.5561507  0.13756256 1.6736488 ]], shape: (1, 3)


In [8]:
lstm = LSTM(3, return_sequences=True, return_state=True)
hidden_states, last_hidden_state, last_cell_state = lstm(train_X)

print('hidden states : {}, shape: {}'.format(hidden_states, hidden_states.shape))
print('last hidden state : {}, shape: {}'.format(last_hidden_state, last_hidden_state.shape))
print('last cell state : {}, shape: {}'.format(last_cell_state, last_cell_state.shape))

hidden states : [[[-0.5417069  -0.11829665 -0.14663506]
  [-0.5264756  -0.26271948 -0.17941055]
  [-0.5513477  -0.18205467 -0.19268481]
  [-0.63333136 -0.53186375 -0.53853714]]], shape: (1, 4, 3)
last hidden state : [[-0.63333136 -0.53186375 -0.53853714]], shape: (1, 3)
last cell state : [[-1.0985057  -0.88698936 -0.89742243]], shape: (1, 3)


### Bidirectional(LSTM) 이해하기

In [10]:
k_init = tf.keras.initializers.Constant(value=0.1)
b_init = tf.keras.initializers.Constant(value=0)
r_init = tf.keras.initializers.Constant(value=0.1)

In [11]:
# return_sequences=False인 경우
bilstm = Bidirectional(LSTM(3, return_sequences=False, return_state=True, \
                            kernel_initializer=k_init, bias_initializer=b_init, recurrent_initializer=r_init))
hidden_states, forward_h, forward_c, backward_h, backward_c = bilstm(train_X)

print('hidden states : {}, shape: {}'.format(hidden_states, hidden_states.shape))
print('forward state : {}, shape: {}'.format(forward_h, forward_h.shape))
print('backward state : {}, shape: {}'.format(backward_h, backward_h.shape))

hidden states : [[0.6303138 0.6303138 0.6303138 0.7038734 0.7038734 0.7038734]], shape: (1, 6)
forward state : [[0.6303138 0.6303138 0.6303138]], shape: (1, 3)
backward state : [[0.7038734 0.7038734 0.7038734]], shape: (1, 3)


In [12]:
# # return_sequences=True인 경우
bilstm = Bidirectional(LSTM(3, return_sequences=True, return_state=True, \
                            kernel_initializer=k_init, bias_initializer=b_init, recurrent_initializer=r_init))
hidden_states, forward_h, forward_c, backward_h, backward_c = bilstm(train_X)

print('hidden states : {}, shape: {}'.format(hidden_states, hidden_states.shape))
print('forward state : {}, shape: {}'.format(forward_h, forward_h.shape))
print('backward state : {}, shape: {}'.format(backward_h, backward_h.shape))

hidden states : [[[0.35906473 0.35906473 0.35906473 0.7038734  0.7038734  0.7038734 ]
  [0.55111325 0.55111325 0.55111325 0.58863586 0.58863586 0.58863586]
  [0.59115744 0.59115744 0.59115744 0.3951699  0.3951699  0.3951699 ]
  [0.6303138  0.6303138  0.6303138  0.21942244 0.21942244 0.21942244]]], shape: (1, 4, 6)
forward state : [[0.6303138 0.6303138 0.6303138]], shape: (1, 3)
backward state : [[0.7038734 0.7038734 0.7038734]], shape: (1, 3)
