# BiLSTM with Attention mechanism

### 1. IMDB 리뷰 데이터 전처리하기

In [1]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
vocab_size = 10000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words = vocab_size)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [3]:
print(f'리뷰의 최대 길이 : {max(len(l) for l in X_train):,d}')
print(f'리뷰의 평균 길이 : {sum(map(len, X_train))/len(X_train):.2f}')

리뷰의 최대 길이 : 2,494
리뷰의 평균 길이 : 238.71


In [4]:
max_len = 500
X_train = pad_sequences(X_train, maxlen=max_len)
X_test = pad_sequences(X_test, maxlen=max_len)

### 2. 바다나우 어텐션(Bahdanau Attention)

In [5]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense

In [6]:
class BahdanauAttention(Model):
    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = Dense(units)
        self.W2 = Dense(units)
        self.V = Dense(1)

    def call(self, values, query): # 단, key와 value는 같음
        # query shape == (batch_size, hidden size)
        # hidden_with_time_axis shape == (batch_size, 1, hidden size)
        # score 계산을 위해 뒤에서 할 덧셈을 위해서 차원을 변경해줍니다.
        hidden_with_time_axis = tf.expand_dims(query, 1)

        # score shape == (batch_size, max_length, 1)
        # we get 1 at the last axis because we are applying score to self.V
        # the shape of the tensor before applying self.V is (batch_size, max_length, units)
        score = self.V(tf.nn.tanh(self.W1(values) + self.W2(hidden_with_time_axis)))

        # attention_weights shape == (batch_size, max_length, 1)
        attention_weights = tf.nn.softmax(score, axis=1)

        # context_vector shape after sum == (batch_size, hidden_size)
        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)

        return context_vector, attention_weights

### 3. 양방향 LSTM + 어텐션 메커니즘(BiLSTM with Attention Mechanism)

In [7]:
from tensorflow.keras.layers import Input, Embedding, Bidirectional, LSTM, Concatenate, Dropout

- 모델 설계

In [8]:
sequence_input = Input(shape=(max_len,), dtype='int32')
embedded_sequences = Embedding(vocab_size, 128, input_length=max_len, mask_zero=True)(sequence_input)

In [9]:
# 양방향 LSTM의 첫번째 층
lstm = Bidirectional(LSTM(64, dropout=0.5, return_sequences=True))(embedded_sequences)

In [10]:
# 양방향 LSTM의 두번째 층
# 순방향 LSTM의 은닉 상태와 셀 상태를 forward_h, forward_c에 저장
# 역방향 LSTM의 은닉 상태와 셀 상태를 backward_h, backward_c에 저장
lstm, forward_h, forward_c, backward_h, backward_c = \
    Bidirectional(LSTM(64, dropout=0.5, return_sequences=True, return_state=True))(lstm)

In [11]:
print(lstm.shape, forward_h.shape, forward_c.shape, backward_h.shape, backward_c.shape)

(None, 500, 128) (None, 64) (None, 64) (None, 64) (None, 64)


In [12]:
# 양방향 LSTM의 은닉 상태와 셀 상태를 사용하기 위해 두 방향의 LSTM의 상태들을 연결(concatenate)
state_h = Concatenate()([forward_h, backward_h])    # 은닉 상태
state_c = Concatenate()([forward_c, backward_c])    # 셀 상태

In [13]:
attention = BahdanauAttention(64)   # 가중치 크기 정의
context_vector, attention_weights = attention(lstm, state_h)

In [14]:
dense1 = Dense(20, activation="relu")(context_vector)
dropout = Dropout(0.5)(dense1)
output = Dense(1, activation="sigmoid")(dropout)
model = Model(inputs=sequence_input, outputs=output)
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 500)]        0           []                               
                                                                                                  
 embedding (Embedding)          (None, 500, 128)     1280000     ['input_1[0][0]']                
                                                                                                  
 bidirectional (Bidirectional)  (None, 500, 128)     98816       ['embedding[0][0]']              
                                                                                                  
 bidirectional_1 (Bidirectional  [(None, 500, 128),  98816       ['bidirectional[0][0]']          
 )                               (None, 64),                                                  

In [15]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [16]:
history = model.fit(X_train, y_train, epochs=3, batch_size=256, 
                    validation_data=(X_test, y_test), verbose=1)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [17]:
# 정확도
model.evaluate(X_test, y_test)[1]



0.8787999749183655