In [1]:
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds

### Loading the data from TensorFlow Datasets


In [None]:
# We use 10% of the training data as validation data
train_data, validation_data, test_data = tfds.load(
    name="imdb_reviews",
    split=("train[:90%]", "train[10%:]", "test"),
    as_supervised=True,
)

Downloading and preparing dataset 80.23 MiB (download: 80.23 MiB, generated: Unknown size, total: 80.23 MiB) to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Generating splits...:   0%|          | 0/3 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/25000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.5DZ6QO_1.0.0/imdb_reviews-train.tfrecor…

Generating test examples...:   0%|          | 0/25000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.5DZ6QO_1.0.0/imdb_reviews-test.tfrecord…

Generating unsupervised examples...:   0%|          | 0/50000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.5DZ6QO_1.0.0/imdb_reviews-unsupervised.…

Dataset imdb_reviews downloaded and prepared to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0. Subsequent calls will reuse this data.


In [3]:
# Preparing the datasets
BATCH_SIZE = 32

train_data = train_data.shuffle(10000).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
validation_data = validation_data.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_data = test_data.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [4]:
# Print some reviews
for review, label in train_data.take(4):
    print("Review: ", review.numpy()[0])
    print("Label: ", label.numpy()[0])

Review:  b'Mann photographs the Alberta Rocky Mountains in a superb fashion, and Jimmy Stewart and Walter Brennan give enjoyable performances as they always seem to do. <br /><br />But come on Hollywood - a Mountie telling the people of Dawson City, Yukon to elect themselves a marshal (yes a marshal!) and to enforce the law themselves, then gunfighters battling it out on the streets for control of the town? <br /><br />Nothing even remotely resembling that happened on the Canadian side of the border during the Klondike gold rush. Mr. Mann and company appear to have mistaken Dawson City for Deadwood, the Canadian North for the American Wild West.<br /><br />Canadian viewers be prepared for a Reefer Madness type of enjoyable howl with this ludicrous plot, or, to shake your head in disgust.'
Label:  0
Review:  b"This fanciful horror flick has Vincent Price playing a mad magician that realizes his vocational talents have been sold to another. He devise ways of avenging all those that have 

In [5]:
# Generate a vocabulary of the 1,000 most frequent words from the training dataset
vocab_size = 1000
text_vec_layer = tf.keras.layers.TextVectorization(max_tokens=vocab_size)
text_vec_layer.adapt(train_data.map(lambda text, label: text))

In [None]:
# Check how a typical review would be represented
text_vec_layer(
    ["The movie was cool. The animation and the graphics were out of this world."]
)

<tf.Tensor: shape=(1, 14), dtype=int64, numpy=
array([[  2,  18,  14, 641,   2, 750,   3,   2,   1,  67,  46,   5,  11,
        189]])>

### Training a model (with learnable embedding)


In [None]:
# Train a model
embedding_dim = 16
model = tf.keras.models.Sequential(
    [
        text_vec_layer,
        tf.keras.layers.Embedding(vocab_size, embedding_dim),
        tf.keras.layers.GRU(128),
        tf.keras.layers.Dense(1, activation="sigmoid"),
    ]
)

In [8]:
# Call the model on a sample input to build it and infer the output shapes.
# This will provide the necessary shape information for the summary.
model(tf.keras.Input(shape=(1,), dtype=tf.string))
model.summary()

In [9]:
# This is a standard binary classification problem, so use cross-entropy loss
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [10]:
# Train the model
history = model.fit(train_data, validation_data=validation_data, epochs=5)

Epoch 1/5
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 44ms/step - accuracy: 0.4942 - loss: 0.6935 - val_accuracy: 0.5015 - val_loss: 0.6931
Epoch 2/5
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 41ms/step - accuracy: 0.5012 - loss: 0.6932 - val_accuracy: 0.5020 - val_loss: 0.6925
Epoch 3/5
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 42ms/step - accuracy: 0.5173 - loss: 0.7188 - val_accuracy: 0.6434 - val_loss: 0.6648
Epoch 4/5
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 41ms/step - accuracy: 0.6037 - loss: 0.6632 - val_accuracy: 0.6570 - val_loss: 0.6281
Epoch 5/5
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 43ms/step - accuracy: 0.6599 - loss: 0.6122 - val_accuracy: 0.6656 - val_loss: 0.6386


In [None]:
# This model has no skill at all (it's just guessing), which is due to the
# many zeros most reviews will have in the end, as TextVectorization pads
# all reviews with as many zeros in the end as the longest sample as total words

# We can see this here:
test_vectorization = tf.keras.layers.TextVectorization(max_tokens=vocab_size)
test_vectorization.adapt(train_data.map(lambda text, label: text))

# We see here the many zeros in the end of the shorter review:
test_vectorization(
    [
        "This is a short review",
        "This here is a much longer review, which causes problems for the vectorization.",
    ]
)

<tf.Tensor: shape=(2, 13), dtype=int64, numpy=
array([[ 11,   7,   4, 348, 740,   0,   0,   0,   0,   0,   0,   0,   0],
       [ 11, 132,   7,   4,  73,   1, 740,  60,   1, 701,  16,   2,   1]])>

In [None]:
# Try again, this time masking out the training zeros in the reviews
# Train a model
embedding_dim = 16
model = tf.keras.models.Sequential(
    [
        text_vec_layer,
        tf.keras.layers.Embedding(vocab_size, embedding_dim, mask_zero=True),
        tf.keras.layers.GRU(128),
        tf.keras.layers.Dense(1, activation="sigmoid"),
    ]
)

In [13]:
# This is a standard binary classification problem, so use cross-entropy loss
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Train the model
history = model.fit(
    train_data,
    validation_data=validation_data,
    epochs=50,
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=2)],
)

Epoch 1/50
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 43ms/step - accuracy: 0.5600 - loss: 0.6825 - val_accuracy: 0.7212 - val_loss: 0.5632
Epoch 2/50
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 42ms/step - accuracy: 0.7887 - loss: 0.4565 - val_accuracy: 0.8594 - val_loss: 0.3389
Epoch 3/50
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 43ms/step - accuracy: 0.8578 - loss: 0.3395 - val_accuracy: 0.8737 - val_loss: 0.3094
Epoch 4/50
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 44ms/step - accuracy: 0.8726 - loss: 0.3063 - val_accuracy: 0.8740 - val_loss: 0.2996
Epoch 5/50
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 43ms/step - accuracy: 0.8893 - loss: 0.2726 - val_accuracy: 0.8944 - val_loss: 0.2622
Epoch 6/50
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 42ms/step - accuracy: 0.8899 - loss: 0.2686 - val_accuracy: 0.9047 - val_loss: 0.2418
Epoch 7/50
[1m7

In [15]:
# Test the model
model.evaluate(test_data)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 15ms/step - accuracy: 0.8434 - loss: 0.6515


[0.6388649344444275, 0.8445600271224976]

In [None]:
# See how it works on a new positive review
y_pred = model(tf.constant(["What a great movie. I loved every minute of it!"]))
print(f"Probability of the review being positive: {y_pred[0][0]}")

Probability of the review being positive: 0.9995998740196228


In [None]:
# See how it works on a new negative review
y_pred = model(
    tf.constant(
        ["What a waste of time. I will not go to the movies anytime soon again!"]
    )
)
print(f"Probability of the review being positive: {y_pred[0][0]}")

Probability of the review being positive: 0.014939666725695133


In [None]:
# See how it handles sarcasm:
y_pred = model(
    tf.constant(
        [
            "If you love wasting an afternoon, then there is no better way to do it than watching this movie"
        ]
    )
)
print(f"Probability of the review being positive: {y_pred[0][0]}")

Probability of the review being positive: 0.007791566196829081


### Training a model (with a pre-trained embedding)


In [None]:
# Here we use a pre-trained embedding model obtained from TensorFlow hub:

# This is a pre-trained embedding model
embedding = hub.KerasLayer(
    "https://tfhub.dev/google/universal-sentence-encoder/4",
    trainable=False,
    dtype=tf.string,
    input_shape=[],
)

In [20]:
# Wrap the hub.KerasLayer in a custom layer
# (to make TensorFlow tensors and Keras tensors compatible)
class HubLayer(tf.keras.layers.Layer):
    def __init__(self, handle, trainable=False, **kwargs):
        super(HubLayer, self).__init__(trainable=trainable, **kwargs)
        self.handle = handle
        self.hub_layer = hub.KerasLayer(self.handle)

    def call(self, inputs):
        return self.hub_layer(inputs)

In [None]:
inp = tf.keras.Input(shape=(), dtype=tf.string)

# As always, we do not want to re-train the pre-trained model so freeze weights
x = HubLayer(embedding, trainable=False)(inp)  # Using the custom layer

# # We can now use this in place of the Keras' embedding layer (and also don't need the text vectorization layer).
# Note that the embedding layer produces a 2D output here, so for it to be compatible
# with the following GRU layer, we need to add another data dimension after the
# embedding layer
x = tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=1))(x)
x = tf.keras.layers.GRU(128)(x)

# This is our standard classifier
out = tf.keras.layers.Dense(1, activation="sigmoid")(x)

model = tf.keras.Model(inp, out)
model.summary()

In [25]:
# This is a standard binary classification problem, so use cross-entropy loss
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Train the model
history = model.fit(
    train_data,
    validation_data=validation_data,
    epochs=50,
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=2)],
)

Epoch 1/50
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 98ms/step - accuracy: 0.8022 - loss: 0.4429 - val_accuracy: 0.8592 - val_loss: 0.3266
Epoch 2/50
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 74ms/step - accuracy: 0.8602 - loss: 0.3264 - val_accuracy: 0.8659 - val_loss: 0.3157
Epoch 3/50
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 73ms/step - accuracy: 0.8648 - loss: 0.3193 - val_accuracy: 0.8640 - val_loss: 0.3140
Epoch 4/50
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 73ms/step - accuracy: 0.8675 - loss: 0.3130 - val_accuracy: 0.8668 - val_loss: 0.3109
Epoch 5/50
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 75ms/step - accuracy: 0.8674 - loss: 0.3118 - val_accuracy: 0.8672 - val_loss: 0.3091
Epoch 6/50
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 73ms/step - accuracy: 0.8629 - loss: 0.3187 - val_accuracy: 0.8676 - val_loss: 0.3069
Epoch 7/50
[1m7

In [27]:
# Test the model
model.evaluate(test_data)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 37ms/step - accuracy: 0.8310 - loss: 0.5449


[0.5442415475845337, 0.8332399725914001]