<a href="https://colab.research.google.com/github/baeseungyou/study/blob/main/10%EC%A3%BC%EC%B0%A8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Dense, Embedding, Bidirectional, LSTM, Concatenate, Dropout
from tensorflow.keras import Input, Model
from tensorflow.keras import optimizers
import os
import pandas as pd

In [5]:
vocab_size = 10000

In [7]:
tokenizer = Tokenizer(num_words=vocab_size)

max_len = 500

train_df = pd.read_csv('imdb_train.csv')
X_train, y_train = train_df['text'], train_df['label']
tokenizer.fit_on_texts(X_train)
X_train = tokenizer.texts_to_sequences(X_train)

In [8]:
test_df = pd.read_csv('imdb_test.csv')
X_test, y_test = test_df['text'], test_df['label']
X_test = tokenizer.texts_to_sequences(X_test)

In [9]:
print('리뷰의 최대 길이 : {}'.format(max(len(l) for l in X_train)))
print('리뷰의 평균 길이 : {}'.format(sum(map(len, X_train))/len(X_train)))

리뷰의 최대 길이 : 2194
리뷰의 평균 길이 : 224.05292


In [10]:
print('리뷰의 최대 길이 : {}'.format(max(len(l) for l in X_test)))
print('리뷰의 평균 길이 : {}'.format(sum(map(len, X_test))/len(X_test)))

리뷰의 최대 길이 : 2198
리뷰의 평균 길이 : 217.93796


In [11]:
X_train = pad_sequences(X_train, maxlen=max_len, padding = 'post')
X_test = pad_sequences(X_test, maxlen=max_len, padding = 'post')

In [17]:
import tensorflow as tf
from tensorflow.keras.layers import Dense

class BahanauAttention(tf.keras.Model):
    def __init__(self, units):
        super(BahanauAttention, self).__init__()
        self.W1 = Dense(units)
        self.W2 = Dense(units)
        self.V = Dense(1)

    def call(self, values, query):
        hidden_with_time_axis = tf.expand_dims(query, 1)
        score = self.V(tf.nn.tanh(
            self.W1(values) + self.W2(hidden_with_time_axis)))
        attention_weights = tf.nn.softmax(score, axis=1) # Corrected indentation
        context_vector = attention_weights * values # Corrected indentation
        context_vector = tf.reduce_sum(context_vector, axis=1) # Corrected indentation

        return context_vector, attention_weights # Corrected indentation

In [19]:
from tensorflow.keras.layers import Embedding, Input, LSTM, Bidirectional

# 어휘 크기에 패딩 토큰을 위한 1을 추가
vocab_size = vocab_size + 1

sequence_input = Input(shape=(max_len,), dtype='int32')
# make_zero=True 대신 mask_zero=True 사용
embedded_sequences = Embedding(vocab_size, 128, mask_zero=True)(sequence_input)
lstm = Bidirectional(LSTM(64, dropout=0.5, return_sequences=True))(embedded_sequences)
lstm, forward_h, forward_c, backward_h, backward_c = Bidirectional \
    (LSTM(64, dropout=0.5, return_sequences=True, return_state=True))(lstm)

In [21]:
print(lstm.shape, forward_h.shape, forward_c.shape, backward_h.shape, backward_c.shape)

(None, 500, 128) (None, 64) (None, 64) (None, 64) (None, 64)


In [22]:
state_h = Concatenate()([forward_h, backward_h])
state_c = Concatenate()([forward_c, backward_c])

In [23]:
attention = BahanauAttention(64)
context_vector, attention_weights = attention(lstm, state_h)



In [24]:
dense1 = Dense(20, activation = "relu")(context_vector)
dropout = Dropout(0.5)(dense1)
output = Dense(1, activation="sigmoid")(dropout)
model = Model(inputs=sequence_input, outputs=output)

In [26]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [27]:
history = model.fit(X_train, y_train, epochs = 5, batch_size = 256, validation_data=(X_test, y_test), verbose = 1)

Epoch 1/5




[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 190ms/step - accuracy: 0.6433 - loss: 0.6044 - val_accuracy: 0.8355 - val_loss: 0.3738
Epoch 2/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 190ms/step - accuracy: 0.8963 - loss: 0.2809 - val_accuracy: 0.8806 - val_loss: 0.2909
Epoch 3/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 183ms/step - accuracy: 0.9301 - loss: 0.1993 - val_accuracy: 0.8665 - val_loss: 0.3249
Epoch 4/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 181ms/step - accuracy: 0.9431 - loss: 0.1692 - val_accuracy: 0.8767 - val_loss: 0.3188
Epoch 5/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 180ms/step - accuracy: 0.9562 - loss: 0.1275 - val_accuracy: 0.8758 - val_loss: 0.3649


In [28]:
print("\n 테스트 정확도: %.4f" % (model.evaluate(X_test, y_test)[1]) )

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 27ms/step - accuracy: 0.8825 - loss: 0.3533

 테스트 정확도: 0.8745
