In [None]:
import numpy as np
import tensorflow_datasets as tfds
import tensorflow as tf


tfds.disable_progress_bar()

In [None]:
import matplotlib.pyplot as plt

def plot_graphs(history, metric):
  plt.plot(history.history[metric])
  plt.plot(history.history['val_' + metric], ' ')
  plt.xlabel("Epochs")
  plt.ylabel(metric)
  plt.legend([metric, 'val_' + metric])

In [None]:
dataset, info = tfds.load('imdb_reviews', with_info=True, as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']

train_dataset.element_spec

(TensorSpec(shape=(), dtype=tf.string, name=None),
 TensorSpec(shape=(), dtype=tf.int64, name=None))

In [None]:
for example, label in train_dataset.take(1):
  print(f'text: {example.numpy()}')
  print(f'label: {label.numpy()}')

text: [b'Thank you Hollywood. Yet another movie classic utterly ruined by a cheap, shallow, effect-heavy and redundant remake. The original "Planet of the Apes" was an intelligent and thought-provoking movie with a very clear message. It was a movie that focused almost entirely on dialogue, which sounds very dull but was in fact very interesting. <br /><br />This movie, on the other hand, seems to have done away with pretty much ALL the dialogues. Instead of a great movie we get an incredibly stupid two hour chase movie. Dialogue has been reduced to a mere minimum, character interaction and development are non-existent and most of the time it\'s extremely hard to figure out what\'s going on. Instead, we get a bunch of pointless action scenes, some marginally funny one-liners and some very hollow quasi-intelligent conversations. <br /><br />The only thing worth mentioning about this movie is that it looks absolutely fantastic. The make-up of the apes is magnificent, and the sets and bac

In [None]:
BUFFER_SIZE = 10000
BATCH_SIZE = 64
VOCAB_SIZE = 1000

In [None]:
train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [None]:
encoder = tf.keras.layers.TextVectorization(max_tokens = VOCAB_SIZE)

encoder.adapt(train_dataset.map(lambda text, label: text))

In [None]:
vocab = np.array(encoder.get_vocabulary())

vocab[:20]

array(['', '[UNK]', 'the', 'and', 'a', 'of', 'to', 'is', 'in', 'it', 'i',
       'this', 'that', 'br', 'was', 'as', 'for', 'with', 'movie', 'but'],
      dtype='<U14')

In [None]:
encoded_example = encoder(example)[:10].numpy()

encoded_example

array([[  1,  23, 355, ...,   0,   0,   0],
       [142,   1,  12, ...,   0,   0,   0],
       [ 49,  10, 782, ...,   0,   0,   0],
       ...,
       [ 11,  20,  44, ...,   0,   0,   0],
       [412, 650, 298, ...,   0,   0,   0],
       [  1,   1,   1, ...,   0,   0,   0]])

In [None]:
example.shape

TensorShape([64])

In [None]:
for n in range(3):
  print(f"Original: {example[n].numpy}")
  print(f'Apos o vocab: {"".join(vocab[encoded_example[n]])}')
  print()

Original: <bound method _EagerTensorBase.numpy of <tf.Tensor: shape=(), dtype=string, numpy=b'Thank you Hollywood. Yet another movie classic utterly ruined by a cheap, shallow, effect-heavy and redundant remake. The original "Planet of the Apes" was an intelligent and thought-provoking movie with a very clear message. It was a movie that focused almost entirely on dialogue, which sounds very dull but was in fact very interesting. <br /><br />This movie, on the other hand, seems to have done away with pretty much ALL the dialogues. Instead of a great movie we get an incredibly stupid two hour chase movie. Dialogue has been reduced to a mere minimum, character interaction and development are non-existent and most of the time it\'s extremely hard to figure out what\'s going on. Instead, we get a bunch of pointless action scenes, some marginally funny one-liners and some very hollow quasi-intelligent conversations. <br /><br />The only thing worth mentioning about this movie is that it loo

In [None]:
model = tf.keras.Sequential([
    encoder,
    tf.keras.layers.Embedding(input_dim = len(encoder.get_vocabulary()), 
                              output_dim=64, mask_zero=True),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

In [None]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), 
              optimizer = tf.keras.optimizers.Adam(1e-4),
              metrics=['accuracy'])

In [None]:
history = model.fit(train_dataset, 
                    epochs=10, 
                    validation_data = test_dataset,
                    validation_steps=30)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
