# 케라스의 SimpleRNN과 LSTM 이해하기

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import SimpleRNN, LSTM, Bidirectional

In [2]:
# 단어 벡터의 차원=5, 문장의 길이=4
train_X = [[0.1, 4.2, 1.5, 1.1, 2.8], 
           [1.0, 3.1, 2.5, 0.7, 1.1], 
           [0.3, 2.1, 1.5, 2.1, 0.1], 
           [2.2, 1.4, 0.5, 0.9, 1.1]]
print(np.shape(train_X))

(4, 5)


In [3]:
# RNN의 입력은 3D 텐서 - 배치 크기 추가
train_X = [[[0.1, 4.2, 1.5, 1.1, 2.8], 
            [1.0, 3.1, 2.5, 0.7, 1.1], 
            [0.3, 2.1, 1.5, 2.1, 0.1], 
            [2.2, 1.4, 0.5, 0.9, 1.1]]]
train_X = np.array(train_X, dtype=np.float32)
print(train_X.shape) # (batch_size, timesteps, input_dim)

(1, 4, 5)


## 1. SimpleRNN

In [5]:
rnn = SimpleRNN(3) # return_sequences=False, return_state=False
hidden_state = rnn(train_X) # 하나 리턴

# 마지막 시점의 은닉 상태
print('hidden_state: {}, shape: {}'.format(hidden_state, hidden_state.shape))

hidden_state: [[-0.9967098  -0.87812924 -0.3390168 ]], shape: (1, 3)


In [6]:
rnn = SimpleRNN(3, return_sequences=True) # return_state=False
hidden_states = rnn(train_X) # 하나 리턴

# 모든 시점의 은닉 상태
print('hidden states : {}, shape: {}'.format(hidden_states, hidden_states.shape))

hidden states : [[[-0.08247936  0.6159755  -0.99908835]
  [-0.812468    0.5637501  -0.9922486 ]
  [-0.8988107  -0.7688197  -0.9942987 ]
  [ 0.7542793   0.9940191   0.18331288]]], shape: (1, 4, 3)


In [9]:
rnn = SimpleRNN(3, return_state=True) # return_sequences=False
hidden_state, last_state = rnn(train_X) # 두개 리턴

# 마지막 시점의 은닉 상태
print('hidden state : {}, shape: {}'.format(hidden_state, hidden_state.shape))
print('last hidden state : {}, shape: {}'.format(last_state, last_state.shape))

hidden state : [[0.7955736  0.41378158 0.1346554 ]], shape: (1, 3)
last hidden state : [[0.7955736  0.41378158 0.1346554 ]], shape: (1, 3)


In [7]:
rnn = SimpleRNN(3, return_sequences=True, return_state=True)
hidden_states, last_state = rnn(train_X) # 두개 리턴

print('hidden states : {}, shape: {}'.format(hidden_states, hidden_states.shape))
print('last hidden state : {}, shape: {}'.format(last_state, last_state.shape))

hidden states : [[[-0.03534397  0.9727898  -0.9988976 ]
  [ 0.9122617   0.09265384 -0.99911284]
  [ 0.9466134  -0.55583745 -0.9983327 ]
  [ 0.7517583  -0.1131636   0.17865638]]], shape: (1, 4, 3)
last hidden state : [[ 0.7517583  -0.1131636   0.17865638]], shape: (1, 3)


## 2. LSTM

In [13]:
lstm = LSTM(3, return_sequences=False, return_state=True)
hidden_state, last_state, last_cell_state = lstm(train_X) # 세개 리턴

# 마지막 시점의 은닉 상태, 셀 상태
print('hidden state : {}, shape: {}'.format(hidden_state, hidden_state.shape))
print('last hidden state : {}, shape: {}'.format(last_state, last_state.shape))
print('last cell state : {}, shape: {}'.format(last_cell_state, last_cell_state.shape))

hidden state : [[-0.0773441 -0.6716873  0.3294218]], shape: (1, 3)
last hidden state : [[-0.0773441 -0.6716873  0.3294218]], shape: (1, 3)
last cell state : [[-0.09885489 -1.0926687   0.5672389 ]], shape: (1, 3)


In [14]:
lstm = LSTM(3, return_sequences=True, return_state=True)
hidden_states, last_hidden_state, last_cell_state = lstm(train_X) # 세개 리턴

# 모든 시점의 은닉 상태, 마지막 시점의 은닉 상태, 셀 상태
print('hidden states : {}, shape: {}'.format(hidden_states, hidden_states.shape))
print('last hidden state : {}, shape: {}'.format(last_hidden_state, last_hidden_state.shape))
print('last cell state : {}, shape: {}'.format(last_cell_state, last_cell_state.shape))

hidden states : [[[0.18967618 0.04214519 0.21109577]
  [0.2989427  0.17905194 0.20693749]
  [0.3543825  0.21613444 0.19326994]
  [0.44900817 0.07185657 0.22520518]]], shape: (1, 4, 3)
last hidden state : [[0.44900817 0.07185657 0.22520518]], shape: (1, 3)
last cell state : [[0.5511633  0.43143135 0.7207825 ]], shape: (1, 3)


## Bidirectional(LSTM)

In [16]:
# 은닉 상태 값 고정
k_init = tf.keras.initializers.Constant(value=0.1)
b_init = tf.keras.initializers.Constant(value=0)
r_init = tf.keras.initializers.Constant(value=0.1)

In [17]:
bilstm = Bidirectional(LSTM(3, return_sequences=False, return_state=True, 
                            kernel_initializer=k_init, 
                            bias_initializer=b_init, 
                            recurrent_initializer=r_init))
hidden_states, forward_h, forward_c, backward_h, backward_c = bilstm(train_X)

# 은닉 상태, 정방향 LSTM의 마지막 시점의 은닉 상태, 역방향 LSTM의 첫번째 시점의 은닉 상태
print('hidden states : {}, shape: {}'.format(hidden_states, hidden_states.shape))
print('forward state : {}, shape: {}'.format(forward_h, forward_h.shape))
print('backward state : {}, shape: {}'.format(backward_h, backward_h.shape))

hidden states : [[0.63031393 0.63031393 0.63031393 0.7038734  0.7038734  0.7038734 ]], shape: (1, 6)
forward state : [[0.63031393 0.63031393 0.63031393]], shape: (1, 3)
backward state : [[0.7038734 0.7038734 0.7038734]], shape: (1, 3)


In [18]:
bilstm = Bidirectional(LSTM(3, return_sequences=True, return_state=True, 
                            kernel_initializer=k_init, 
                            bias_initializer=b_init, 
                            recurrent_initializer=r_init))
hidden_states, forward_h, forward_c, backward_h, backward_c = bilstm(train_X)

print('hidden states : {}, shape: {}'.format(hidden_states, hidden_states.shape))
print('forward state : {}, shape: {}'.format(forward_h, forward_h.shape))
print('backward state : {}, shape: {}'.format(backward_h, backward_h.shape))

hidden states : [[[0.35906473 0.35906473 0.35906473 0.7038734  0.7038734  0.7038734 ]
  [0.5511133  0.5511133  0.5511133  0.58863586 0.58863586 0.58863586]
  [0.59115744 0.59115744 0.59115744 0.3951699  0.3951699  0.3951699 ]
  [0.63031393 0.63031393 0.63031393 0.21942244 0.21942244 0.21942244]]], shape: (1, 4, 6)
forward state : [[0.63031393 0.63031393 0.63031393]], shape: (1, 3)
backward state : [[0.7038734 0.7038734 0.7038734]], shape: (1, 3)
