In [0]:
## 양방향 LSTM과 어텐션 메커니즘(BiLSTM with Attention mechanism)

import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [0]:
vocab_size = 100
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words = vocab_size)

In [0]:
# 시간이 너무 오래걸려 개수를 많이 줄였습니다.
# (attention 동작 원리에 집중하고자..)
X_train = X_train[:1000]
y_train = y_train[:1000]
X_test = X_test[:1000]
y_test = y_test[:1000]

In [0]:
X_train[0]

array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0

In [0]:
for t in map(len, X_train):
  print(t)
  break

500


In [0]:
print('리뷰의 최대 길이 : {}'.format(max(len(l) for l in X_train)))
print('리뷰의 평균 길이 : {}'.format(sum(map(len, X_train))/len(X_train)))

리뷰의 최대 길이 : 500
리뷰의 평균 길이 : 500.0


In [0]:
max_len = 500
X_train = pad_sequences(X_train, maxlen=max_len)
X_test = pad_sequences(X_test, maxlen=max_len)

In [0]:
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [0]:
y_train[2]

array([1., 0.], dtype=float32)

In [0]:
class BahdanauAttention(tf.keras.Model):
  def __init__(self, units):
    super(BahdanauAttention, self).__init__()
    self.W1 = Dense(units)
    self.W2 = Dense(units)
    self.V = Dense(1)

  def call(self, values, query): # 단, key와 value는 같음
    # hidden shape == (batch_size, hidden size)
    # hidden_with_time_axis shape == (batch_size, 1, hidden size)
    # we are doing this to perform addition to calculate the score
    hidden_with_time_axis = tf.expand_dims(query, 1)

    # score shape == (batch_size, max_length, 1)
    # we get 1 at the last axis because we are applying score to self.V
    # the shape of the tensor before applying self.V is (batch_size, max_length, units)
    print('values -- ', values)
    print('query -- ', query)
    score = self.V(tf.nn.tanh(self.W1(values) + self.W2(hidden_with_time_axis)))

    # attention_weights shape == (batch_size, max_length, 1)
    attention_weights = tf.nn.softmax(score, axis=1)
    print('attention_weights -- ', attention_weights)

    # context_vector shape after sum == (batch_size, hidden_size)
    context_vector = attention_weights * values
    context_vector = tf.reduce_sum(context_vector, axis=1)
    print('context_vector -- ', context_vector)

    return context_vector, attention_weights

In [0]:
from tensorflow.keras.layers import Dense, Embedding, Bidirectional, LSTM, Concatenate, BatchNormalization
from tensorflow.keras import Input, Model
from tensorflow.keras import optimizers
import os

In [0]:
sequence_input = Input(shape=(max_len,), dtype='int32')
embedded_sequences = Embedding(vocab_size, 128, input_length=max_len)(sequence_input)

In [0]:
lstm, forward_h, forward_c, backward_h, backward_c = Bidirectional (
                                                                    LSTM
                                                                    (128,
                                                                    dropout=0.3,
                                                                    return_sequences=True,
                                                                    return_state=True,
                                                                    recurrent_activation='relu',
                                                                    recurrent_initializer='glorot_uniform')
                                                                   )(embedded_sequences)



In [0]:
print(lstm.shape, forward_h.shape, forward_c.shape, backward_h.shape, backward_c.shape)

(None, 500, 256) (None, 128) (None, 128) (None, 128) (None, 128)


In [0]:
state_h = Concatenate()([forward_h, backward_h]) # 은닉 상태
state_c = Concatenate()([forward_c, backward_c]) # 셀 상태

In [0]:
attention = BahdanauAttention(128) # 가중치 크기 정의
context_vector, attention_weights = attention(lstm, state_h)

values --  Tensor("bidirectional_4/Identity:0", shape=(None, 500, 256), dtype=float32)
query --  Tensor("concatenate_8/Identity:0", shape=(None, 256), dtype=float32)
attention_weights --  Tensor("bahdanau_attention_4/transpose_1:0", shape=(None, 500, 1), dtype=float32)
context_vector --  Tensor("bahdanau_attention_4/Sum:0", shape=(None, 256), dtype=float32)


In [0]:
hidden = BatchNormalization()(context_vector)

In [0]:
output = Dense(2, activation='softmax')(hidden)
model = Model(inputs=sequence_input, outputs=output)

In [0]:
Adam = optimizers.Adam(lr=0.001, clipnorm=1.)

In [0]:
model.compile(optimizer=Adam, loss='categorical_crossentropy', metrics=['accuracy'])

In [0]:
history = model.fit(X_train, y_train, epochs=1, batch_size=4, validation_data=(X_test, y_test), verbose=1)

values --  Tensor("model_4/bidirectional_4/concat:0", shape=(4, 500, 256), dtype=float32)
query --  Tensor("model_4/concatenate_8/concat:0", shape=(4, 256), dtype=float32)
attention_weights --  Tensor("model_4/bahdanau_attention_4/transpose_1:0", shape=(4, 500, 1), dtype=float32)
context_vector --  Tensor("model_4/bahdanau_attention_4/Sum:0", shape=(4, 256), dtype=float32)
values --  Tensor("model_4/bidirectional_4/concat:0", shape=(4, 500, 256), dtype=float32)
query --  Tensor("model_4/concatenate_8/concat:0", shape=(4, 256), dtype=float32)
attention_weights --  Tensor("model_4/bahdanau_attention_4/transpose_1:0", shape=(4, 500, 1), dtype=float32)
context_vector --  Tensor("model_4/bahdanau_attention_4/Sum:0", shape=(4, 256), dtype=float32)
query --  Tensor("model_4/concatenate_8/concat:0", shape=(4, 256), dtype=float32)
attention_weights --  Tensor("model_4/bahdanau_attention_4/transpose_1:0", shape=(4, 500, 1), dtype=float32)
context_vector --  Tensor("model_4/bahdanau_attention_4/S