영화 리뷰의 마지막 단어를 예측

In [2]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

In [3]:
max_features = 10000    # max number of words
maxlen = 100            # max length of each sentence

In [4]:
# Load IMDb dataset
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [5]:
# Sequence padding
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

In [6]:
# Define sequence data
X = x_train[:, :-1]
y = x_train[:, -1]      # target variable: final word
X_test = x_test[:, :-1]
y_test = x_test[:, -1]  # target variable: final word

In [7]:
# Map each word into integer
word_index = imdb.get_word_index()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json


In [8]:
# Inverse mapping from integer to word
reverse_word_index = dict([(value+3, key) for (key, value) in word_index.items()])
reverse_word_index[0] = '<PAD>'
reverse_word_index[1] = '<START>'
reverse_word_index[2] = '<UNK>'
reverse_word_index[3] = '<UNUSED>'

In [11]:
# Decode first review
decoded_review = ' '.join([reverse_word_index.get(i, '?') for i in X[0]])
print(decoded_review)

cry at a film it must have been good and this definitely was also <UNK> to the two little boy's that played the <UNK> of norman and paul they were just brilliant children are often left out of the <UNK> list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised for what they have done don't you think the whole story was so lovely because it was true and was someone's life after all that was shared with us


In [12]:
# Build a model
model = Sequential()
model.add(Embedding(max_features, 128))
model.add(LSTM(128, return_sequences=False))
model.add(Dense(max_features, activation='softmax'))

In [13]:
# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [14]:
# Fit the model
model.fit(X, y, batch_size=128, epochs=10, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7f1f496a08e0>

In [15]:
# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print(f'Test loss: {test_loss:.4f}')
print(f'Test accuracy: {test_acc:.4f}')

782/782 - 48s - loss: 6.2826 - accuracy: 0.1970 - 48s/epoch - 61ms/step
Test loss: 6.2826
Test accuracy: 0.1970
