In [None]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Multiple Layer LSTM

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals
import matplotlib.pyplot as plt
import tensorflow_datasets as tfds
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, GlobalAveragePooling1D, LSTM, Embedding, Bidirectional
from tensorflow.compat.v1.data import get_output_shapes
from tensorflow.keras.backend import clear_session
from tensorflow.compat.v1 import reset_default_graph
print(tf.__version__)

2.3.0


In [2]:
# Get the data
dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True, as_supervised=True)
train_dataset: tf.raw_ops.PrefetchDataset = dataset["train"]
test_dataset: tf.raw_ops.PrefetchDataset = dataset["test"]



In [3]:
tokenizer: tfds.deprecated.text.SubwordTextEncoder = info.features['text'].encoder

In [5]:
BUFFER_SIZE = 10000
BATCH_SIZE = 64

train_dataset: tf.raw_ops.ShuffleDataset = train_dataset.shuffle(BUFFER_SIZE)
train_dataset: tf.raw_ops.PaddedBatchDataset = train_dataset.padded_batch(BATCH_SIZE, get_output_shapes(train_dataset))
test_dataset: tf.raw_ops.PaddedBatchDataset = test_dataset.padded_batch(BATCH_SIZE, get_output_shapes(test_dataset))

In [None]:
model = Sequential([
    Embedding(input_dim=tokenizer.vocab_size, output_dim=64, name="embedding_1", input_length=None),    # input_length=None表示长度可变
    Bidirectional(LSTM(units=64, return_sequences=True), name="LSTM_2"),
    Bidirectional(LSTM(units=32, return_sequences=False), name="LSTM_3")
    Dense(units=64, activation='relu', name="dense_4"),
    Dense(units=1, activation='sigmoid', name="dense_5")
], name="IMDBsubwords_classifier")
model.compile(
    loss='binary_crossentropy', 
    optimizer='adam', 
    metrics=['accuracy']
)
model.summary()

In [None]:
NUM_EPOCHS = 10
history = model.fit(
    train_dataset, 
    validation_data=test_dataset,
    epochs=NUM_EPOCHS, 
    workers=-1,
    use_multiprocessing=True
)

In [None]:
def plot_graphs(history, string):
    plt.plot(history.history[string])
    plt.plot(history.history['val_'+string])
    plt.xlabel("Epochs")
    plt.ylabel(string)
    plt.legend([string, 'val_'+string])
    plt.show()

In [None]:
plot_graphs(history, 'accuracy')

In [None]:
plot_graphs(history, 'loss')