# 양 방향 RNN

In [None]:
import numpy as np
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.datasets import imdb

In [None]:
n_unique_words = 10000
maxlen = 200
batch_size = 128
# 파라미터 num_words는 데이터에서 등장 빈도 순위로 몇 번째에 해당하는 단어까지 사용할지를 의미
# 등장 빈도 순위가 1 ~ 10000에 해당하는 단어만 사용

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=n_unique_words)

'''
전체 훈련 셋에서 각 샘플의 길이는 서로 다를수 있음
또한 각 문장은 단어 수가 제 각각
모델의 입력으로 사용하려면 모든 샘플 길이를 동일하게 맞추어야함
이를 자연어 처리에서는 패딩작업이라고 함, 보통 숫자 0을 넣어서 길이를 맞춤
케라스에서는 pad_sequence()를 사용
- 첫 번쨰 인자 : 패딩을 진행할 데이터
- maxlen : 모든 데이터에 대해 정규화 할 길이
'''
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)

y_train = np.array(y_train)
y_test = np.array(y_test)

model = Sequential()
model.add(Embedding(n_unique_words, 128, input_length=maxlen))

# 이부분만 추가하면 양방향 
model.add(Bidirectional(LSTM(64))) 
model.add(Dropout(0.5)) 
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=4,
          validation_data=[x_test, y_test])

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7f1c4cbde5d0>

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 200, 128)          1280000   
_________________________________________________________________
bidirectional (Bidirectional (None, 128)               98816     
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense (Dense)                (None, 1)                 129       
Total params: 1,378,945
Trainable params: 1,378,945
Non-trainable params: 0
_________________________________________________________________


In [None]:
loss, acc = model.evaluate(x_train, y_train, batch_size=384, verbose=1)
print('Training accuracy', model.metrics_names, acc)
print('Training accuracy', model.metrics_names, loss)
loss, acc = model.evaluate(x_test, y_test, batch_size=384, verbose=1)
print('Testing accuracy', model.metrics_names, acc)
print('Testing accuracy', model.metrics_names, loss)

Training accuracy ['loss', 'accuracy'] 0.9729599952697754
Training accuracy ['loss', 'accuracy'] 0.08874548971652985
Testing accuracy ['loss', 'accuracy'] 0.8640400171279907
Testing accuracy ['loss', 'accuracy'] 0.3694588541984558
