# 리뷰완료

이 자료는 위키독스 딥 러닝을 이용한 자연어 처리 입문의 RNN 튜토리얼 자료입니다.  
링크 : https://wikidocs.net/22886

2021년 10월 12일에 마지막으로 테스트되었습니다.

In [1]:
import tensorflow as tf

In [2]:
tf.__version__

'2.13.0'

# 1. 케라스(Keras)로 RNN 구현하기

In [3]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN

In [4]:
model = Sequential()
model.add(SimpleRNN(3, input_shape=(2,10)))
# model.add(SimpleRNN(3, input_length=2, input_dim=10))와 동일
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn (SimpleRNN)      (None, 3)                 42        
                                                                 
Total params: 42 (168.00 Byte)
Trainable params: 42 (168.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [5]:
model = Sequential()
model.add(SimpleRNN(3, batch_input_shape=(8,2,10)))
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_1 (SimpleRNN)    (8, 3)                    42        
                                                                 
Total params: 42 (168.00 Byte)
Trainable params: 42 (168.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [6]:
model = Sequential()
model.add(SimpleRNN(3, batch_input_shape=(8,2,10), return_sequences=True))
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_2 (SimpleRNN)    (8, 2, 3)                 42        
                                                                 
Total params: 42 (168.00 Byte)
Trainable params: 42 (168.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


# 2. 파이썬으로 RNN 구현하기

In [7]:
import numpy as np

timesteps = 10 # 시점의 수. NLP에서는 보통 문장의 길이가 된다.
input_dim = 4 # 입력의 차원. NLP에서는 보통 단어 벡터의 차원이 된다.
hidden_size = 8 # 은닉 상태의 크기. 메모리 셀의 용량이다.

inputs = np.random.random((timesteps, input_dim)) # 입력에 해당되는 2D 텐서

# 은닉 상태의 크기 hidden_size로 은닉 상태를 만듬.
hidden_state_t = np.zeros((hidden_size,)) # 초기 은닉 상태는 0(벡터)로 초기화

In [8]:
# 은닉 상태의 크기 hidden_size로 은닉 상태를 만듬.# 8의 크기를 가지는 은닉 상태. 현재는 초기 은닉 상태로 모든 차원이 0의 값을 가짐.
print(hidden_state_t)

[0. 0. 0. 0. 0. 0. 0. 0.]


In [9]:
Wx = np.random.random((hidden_size, input_dim))  # (8, 4)크기의 2D 텐서 생성. 입력에 대한 가중치.
Wh = np.random.random((hidden_size, hidden_size)) # (8, 8)크기의 2D 텐서 생성. 은닉 상태에 대한 가중치.
b = np.random.random((hidden_size,)) # (8,)크기의 1D 텐서 생성. 이 값은 편향(bias).

In [10]:
print(np.shape(Wx))
print(np.shape(Wh))
print(np.shape(b))

(8, 4)
(8, 8)
(8,)


In [11]:
total_hidden_states = []

# 메모리 셀 동작
for input_t in inputs: # 각 시점에 따라서 입력값이 입력됨.
  output_t = np.tanh(np.dot(Wx,input_t) + np.dot(Wh,hidden_state_t) + b) # Wx * Xt + Wh * Ht-1 + b(bias)
  total_hidden_states.append(list(output_t)) # 각 시점의 은닉 상태의 값을 계속해서 축적
  print(np.shape(total_hidden_states)) # 각 시점 t별 메모리 셀의 출력의 크기는 (timestep, output_dim)
  hidden_state_t = output_t

total_hidden_states = np.stack(total_hidden_states, axis = 0) 
# 출력 시 값을 깔끔하게 해준다.

print(total_hidden_states) # (timesteps, output_dim)의 크기. 이 경우 (10, 8)의 크기를 가지는 메모리 셀의 2D 텐서를 출력.

(1, 8)
(2, 8)
(3, 8)
(4, 8)
(5, 8)
(6, 8)
(7, 8)
(8, 8)
(9, 8)
(10, 8)
[[0.87889807 0.78186141 0.98887684 0.79895445 0.6656949  0.89436287
  0.97307661 0.90852305]
 [0.9999965  0.99988781 0.99998641 0.99993497 0.99995229 0.99996719
  0.99997236 0.99965331]
 [0.99999951 0.99994198 0.99999363 0.99997809 0.99999153 0.99998792
  0.9999839  0.99987764]
 [0.99999933 0.99993169 0.99998509 0.99998178 0.99998688 0.99998223
  0.99994896 0.99975049]
 [0.99999936 0.99991798 0.99999288 0.99997458 0.99998843 0.99998958
  0.9999813  0.99988153]
 [0.99999803 0.9998885  0.9999761  0.99996448 0.99996573 0.99998677
  0.99994397 0.99963002]
 [0.99999895 0.99994009 0.99998706 0.99997489 0.99998266 0.99998645
  0.99997015 0.99973515]
 [0.99999968 0.99995414 0.99999127 0.99998577 0.99999403 0.99998015
  0.9999696  0.99983628]
 [0.99999951 0.99993486 0.99999591 0.99997349 0.99999192 0.99999138
  0.99999109 0.99992154]
 [0.99999909 0.99994902 0.99998771 0.99997967 0.99998419 0.99998582
  0.99996822 0.99973164]

# 3. 깊은 순환 신경망(Deep Recurrent Neural Network)


In [12]:
model = Sequential()
model.add(SimpleRNN(hidden_size, input_length=10, input_dim=5, return_sequences = True))
model.add(SimpleRNN(hidden_size, return_sequences = True))
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_3 (SimpleRNN)    (None, 10, 8)             112       
                                                                 
 simple_rnn_4 (SimpleRNN)    (None, 10, 8)             136       
                                                                 
Total params: 248 (992.00 Byte)
Trainable params: 248 (992.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


# 4. 양방향 순환 신경망(Bidirectional Recurrent Neural Network)

In [13]:
from tensorflow.keras.layers import Bidirectional

In [14]:
timesteps = 10
input_dim = 5

model = Sequential()
model.add(Bidirectional(SimpleRNN(hidden_size, return_sequences = True), input_shape=(timesteps, input_dim)))
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional (Bidirection  (None, 10, 16)            224       
 al)                                                             
                                                                 
Total params: 224 (896.00 Byte)
Trainable params: 224 (896.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [15]:
model = Sequential()
model.add(Bidirectional(SimpleRNN(hidden_size, return_sequences = True), input_shape=(timesteps, input_dim)))
model.add(Bidirectional(SimpleRNN(hidden_size, return_sequences = True)))
model.add(Bidirectional(SimpleRNN(hidden_size, return_sequences = True)))
model.add(Bidirectional(SimpleRNN(hidden_size, return_sequences = True)))
model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional_1 (Bidirecti  (None, 10, 16)            224       
 onal)                                                           
                                                                 
 bidirectional_2 (Bidirecti  (None, 10, 16)            400       
 onal)                                                           
                                                                 
 bidirectional_3 (Bidirecti  (None, 10, 16)            400       
 onal)                                                           
                                                                 
 bidirectional_4 (Bidirecti  (None, 10, 16)            400       
 onal)                                                           
                                                                 
Total params: 1424 (5.56 KB)
Trainable params: 1424 (5