<a href="https://colab.research.google.com/github/UiinKim/UiinKim/blob/main/BiLSTM_with_AttentionMechanism.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
vocab_size=10000
(X_train, y_train), (X_test, y_test)=imdb.load_data(num_words=vocab_size)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [4]:
print("리뷰 최대 길이 : ", max(len(l) for l in X_train))
print("리뷰 평균 길이 : ", sum(map(len, X_train))/len(X_train))

리뷰 최대 길이 :  2494
리뷰 평균 길이 :  238.71364


In [5]:
max_len=500
X_train=pad_sequences(X_train, maxlen=max_len)
X_test=pad_sequences(X_test, maxlen=max_len)

In [6]:
import tensorflow as tf

In [7]:
#바다나우 어텐션
class BahdanauAttention(tf.keras.Model):
  def __init__(self, units):
    super(BahdanauAttention, self).__init__()
    self.W1=Dense(units)
    self.W2=Dense(units)
    self.V=Dense(1)

  def call(self, values, query):#key와 value 같음
    #query shape == (batch_size, hidden_size)
    #hidden_with_time_axis shape==(batch_size, 1, hidden size)
    #score 계산을 위해 뒤에서 할 덧셈을 위해서 차원을 변경해줍니다.
    hidden_with_time_axis=tf.expand_dims(query,1)

    #score shape==(batch_size, max_length, 1)
    #we get 1 at th last axis because we are applying score to self.V
    #the shape of th tensor before applying self.V is (batch_size, max_length, units)
    score=self.V(tf.nn.tanh(self.W1(values)+self.W2(hidden_with_time_axis)))

    #attnetion_weights shape == (batch_size, max_length, 1)
    attention_weights=tf.nn.softmax(score, axis=1)

    #context_vector shape after sum == (batch_size, hidden_size)
    context_vector=attention_weights*values
    context_vector=tf.reduce_sum(context_vector, axis=1)

    return context_vector, attention_weights

In [9]:
#양방향 LSTM
from tensorflow.keras.layers import Dense, Embedding, Bidirectional, LSTM, Concatenate, Dropout
from tensorflow.keras import Input, Model
from tensorflow.keras import optimizers
import os

In [10]:
sequence_input=Input(shape=(max_len,), dtype='int32')
embeded_sequences=Embedding(vocab_size, 128, input_length=max_len, mask_zero=True)(sequence_input)

In [11]:
#LSTM의 첫번째 층
lstm=Bidirectional(LSTM(64, dropout=0.5,return_sequences=True))(embeded_sequences)

In [12]:
#LSTM의 두번째 층, 상태를 리턴 받는다.
lstm, forward_h, forward_c, backward_h, backward_c=Bidirectional(LSTM(64, dropout=0.5, return_sequences=True, return_state=True))(lstm)

In [13]:
print(lstm.shape, forward_h.shape, forward_c.shape, backward_h.shape, backward_c.shape)
#순방향 셀+은닉=128, 역방향 셀+은닉=128 -> 모든 시점에 대해서 존재

(None, 500, 128) (None, 64) (None, 64) (None, 64) (None, 64)


In [14]:
#두 방향의 LSTM의 상태들을 연결한다.
state_h=Concatenate()([forward_h, backward_h]) #은닉 상태
state_c=Concatenate()([forward_c, backward_c]) #셀 상태

In [16]:
attention=BahdanauAttention(64) #가중치의 크기 정의
context_vector, attention_weights=attention(lstm, state_h) #어텐션 메커니즘에서는 은닉상태(h)를 사용한다.

In [17]:
dense1=Dense(20, activation='relu')(context_vector)
dropout=Dropout(0.5)(dense1)
output=Dense(1, activation='sigmoid')(dropout)
model=Model(inputs=sequence_input, outputs=output)

In [18]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
history=model.fit(X_train, y_train, epochs=3, batch_size=256,validation_data=(X_test, y_test), verbose=1)

Epoch 1/3

In [None]:
print("\n 테스트 정확도 : %.4f"%(model.evaluate(X_test, y_test)[1]))