In [4]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# 首先，我们设定数据集的参数
num_words = 10000  # 仅保留数据集中前10,000个最常见的单词
maxlen = 500  # 每条评论的最大长度

# 加载IMDB数据集
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=num_words)

# 将评论填充（或截断）到相同的长度，以便可以将它们送入LSTM层
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)

# 建立序贯模型（sequential model）
model = Sequential()

# 将输入数据embed到128维向量空间
model.add(Embedding(num_words, 128))

# 加入LSTM层，输出的维度为128
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))

# 最后，我们通过一个全连接（dense）层进行分类
model.add(Dense(1, activation='sigmoid'))

# 指定损失函数和优化器，并进行模型编译
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# 训练模型
model.fit(x_train, y_train, batch_size=32, epochs=10, validation_data=(x_test, y_test))

# 在测试集上评估模型表现
score, acc = model.evaluate(x_test, y_test, batch_size=32)
print('Test score:', score)
print('Test accuracy:', acc)


Epoch 1/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m183s[0m 231ms/step - accuracy: 0.7109 - loss: 0.5382 - val_accuracy: 0.8330 - val_loss: 0.3816
Epoch 2/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m183s[0m 234ms/step - accuracy: 0.8427 - loss: 0.3676 - val_accuracy: 0.8468 - val_loss: 0.3664
Epoch 3/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m191s[0m 244ms/step - accuracy: 0.8751 - loss: 0.3115 - val_accuracy: 0.7036 - val_loss: 0.5496
Epoch 4/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m187s[0m 240ms/step - accuracy: 0.8837 - loss: 0.2886 - val_accuracy: 0.8816 - val_loss: 0.2995
Epoch 5/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m204s[0m 261ms/step - accuracy: 0.9373 - loss: 0.1648 - val_accuracy: 0.8779 - val_loss: 0.3185
Epoch 6/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m181s[0m 231ms/step - accuracy: 0.9612 - loss: 0.1103 - val_accuracy: 0.8748 - val_loss: 0.3588
Epoc