In [2]:
#!pip install -q tensorflow_datasets
import numpy as np

import tensorflow_datasets as tfds
import tensorflow as tf

tfds.disable_progress_bar()

In [3]:
import matplotlib.pyplot as plt

def plot_graphs(history, metric):
    plt.plot(history.history[metric])
    plt.plot(history.history['val_'+metric], '')
    plt.xlabel("Epochs")
    plt.ylabel(metric)
    plt.legend([metric, 'val_'+metric])

In [4]:
dataset, info = tfds.load('imdb_reviews', with_info=True,
                          as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']

train_dataset.element_spec

(TensorSpec(shape=(), dtype=tf.string, name=None),
 TensorSpec(shape=(), dtype=tf.int64, name=None))

In [5]:
for example, label in train_dataset.take(1):
    print('text: ', example.numpy())
    print('label: ', label.numpy())

text:  b"This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting could not redeem this movie's ridiculous storyline. This movie is an early nineties US propaganda piece. The most pathetic scenes were those when the Columbian rebels were making their cases for revolutions. Maria Conchita Alonso appeared phony, and her pseudo-love affair with Walken was nothing but a pathetic emotional plug in a movie that was devoid of any real meaning. I am disappointed that there are movies like this, ruining actor's like Christopher Walken's good name. I could barely sit through it."
label:  0


In [6]:
BUFFER_SIZE = 10000
BATCH_SIZE = 64

In [7]:
train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [10]:
for example, label in train_dataset.take(1):
    print('texts: ', example.numpy()[:3])
    print("#################")
    print('labels: ', label.numpy()[:3])

texts:  [b"This is a dramatic film in the whole sense of the word. It tells a tail that here in Greece we live as a routine in everyday life without realizing how sad it is. Sure it has some extremes.. but every now and then real life sorrow surpasses art.It is deeply critical of the goals we pursue and the whole social structure build around them. The film has a deeper understanding of Greek ways of life, stereotypes, and social structure. Unlike most Greek films that have a certain fast-food-mainstream audience, this one does not target anyone in particular but while you watch it you feel that someone put the best possible words and pictures to describe your feelings. I am not a big fan of traditional music either but I wouldn't like to hear anything else when it was played during the film.<br /><br />If someone told me to say something against this film I'd define the following, sometimes the transition between scenes seemed sudden or somewhat cut. I guess the editing had to cut it 

In [14]:
VOCAB_SIZE=1000
encoder = tf.keras.layers.experimental.preprocessing.TextVectorization(
    max_tokens=VOCAB_SIZE)
encoder.adapt(train_dataset.map(lambda text, label: text))

In [15]:
vocab = np.array(encoder.get_vocabulary())
vocab[:20]

array(['', '[UNK]', 'the', 'and', 'a', 'of', 'to', 'is', 'in', 'it', 'i',
       'this', 'that', 'br', 'was', 'as', 'for', 'with', 'movie', 'but'],
      dtype='<U14')

In [16]:
encoded_example = encoder(example)[:3].numpy()
encoded_example

array([[ 11,   7,   4, ...,   0,   0,   0],
       [ 10,  26,  41, ...,   0,   0,   0],
       [  4, 192,  11, ...,   0,   0,   0]], dtype=int64)

In [17]:
for n in range(3):
    print("Original: ", example[n].numpy())
    print("Round-trip: ", " ".join(vocab[encoded_example[n]]))
    print()

Original:  b"This is a dramatic film in the whole sense of the word. It tells a tail that here in Greece we live as a routine in everyday life without realizing how sad it is. Sure it has some extremes.. but every now and then real life sorrow surpasses art.It is deeply critical of the goals we pursue and the whole social structure build around them. The film has a deeper understanding of Greek ways of life, stereotypes, and social structure. Unlike most Greek films that have a certain fast-food-mainstream audience, this one does not target anyone in particular but while you watch it you feel that someone put the best possible words and pictures to describe your feelings. I am not a big fan of traditional music either but I wouldn't like to hear anything else when it was played during the film.<br /><br />If someone told me to say something against this film I'd define the following, sometimes the transition between scenes seemed sudden or somewhat cut. I guess the editing had to cut i

In [22]:
model = tf.keras.Sequential([
    encoder,
    tf.keras.layers.Embedding(
        input_dim=len(encoder.get_vocabulary()),
        output_dim=64,
        # Use masking to handle the variable sequence lengths
        mask_zero=True),
    tf.keras.layers.Bidirectional(tf.keras.layers.SimpleRNN(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

In [23]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['accuracy'])

In [None]:
history = model.fit(train_dataset, epochs=2,
                    validation_data=test_dataset, 
                    validation_steps=3)

Epoch 1/2
 65/391 [===>..........................] - ETA: 9:02 - loss: 0.6950 - accuracy: 0.4940

In [25]:
test_loss, test_acc = model.evaluate(test_dataset)

print('Test Loss: {}'.format(test_loss))
print('Test Accuracy: {}'.format(test_acc))



array([[0.2448675 ],
       [0.91274124],
       [0.38132814],
       ...,
       [0.6267383 ],
       [0.16669379],
       [0.61002487]], dtype=float32)

In [26]:
sample_text = ('The movie was cool. The animation and the graphics '
               'were out of this world. I would recommend this movie.')
predictions = model.predict(np.array([sample_text]))

array([[   0,    0,    0, ...,   14,    6,  717],
       [   0,    0,    0, ...,  125,    4, 3077],
       [  33,    6,   58, ...,    9,   57,  975],
       ...,
       [   0,    0,    0, ...,   21,  846,    2],
       [   0,    0,    0, ..., 2302,    7,  470],
       [   0,    0,    0, ...,   34, 2005, 2643]])