In [1]:
import numpy as np

import tensorflow_datasets as tfds
import tensorflow as tf
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
dataset, info = tfds.load('yelp_polarity_reviews', 
                          with_info=True,
                          as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']

train_dataset.element_spec

(TensorSpec(shape=(), dtype=tf.string, name=None),
 TensorSpec(shape=(), dtype=tf.int64, name=None))

In [4]:
for example, label in train_dataset.take(1):
    print('text: ', example.numpy())
    print('label: ', label.numpy())

text:  b"The Groovy P. and I ventured to his old stomping grounds for lunch today.  The '5 and Diner' on 16th St and Colter left me with little to ask for.  Before coming here I had a preconceived notion that 5 & Diners were dirty and nasty. Not the case at all.\\n\\nWe walk in and let the waitress know we want to sit outside (since it's so nice and they had misters).  We get two different servers bringing us stuff (talk about service) and I ask the one waitress for recommendations.  I didn't listen to her, of course, and ordered the Southwestern Burger w/ coleslaw and started with a nice stack of rings.\\n\\nThe Onion Rings were perfectly cooked.  They looked like they were prepackaged, but they were very crispy and I could actually bite through the onion without pulling the entire thing out (don't you hate that?!!!)\\n\\nThe Southwestern Burger was order Medium Rare and was cooked accordingly.  Soft, juicy, and pink with a nice crispy browned outer layer that can only be achieved on 

In [5]:
BUFFER_SIZE = 10000
BATCH_SIZE = 64

In [6]:
train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [7]:
for example, label in train_dataset.take(1):
    print('texts: ', example.numpy()[:3])
    print()
    print('labels: ', label.numpy()[:3])

texts:  [b'We took the kids here on Friday to celebrate both our graduation. It was a big disappointment and I had to write a review before I forget!! Service was horrible, they were not busy at all, but it took us 45 minutes to get our food. Our waiter was not friendly at all and didn\'t care about us at all. We watch other servers tend to their customers quickly while ours didn\'t. I would see her trying to blame other people for the poor service to her other tables. We were there for over two hours which is ridiculous for this type of restaurant. When we left my husband told the manager their service was horrible...his only response was \\""what really??\\"" No, apologies whatsoever.\\n\\nAlso we love the strawberry freckle lemonade. However, the one here taste like cold water on ice only! We won\'t be going back to this location EVER!'
 b"The one other buffet besides the Wynn that I actually didn't feel completely repulsed afterwards. Being somewhat a germ freak, I'm not the bigges

In [8]:
VOCAB_SIZE = 1000
encoder = tf.keras.layers.TextVectorization(
    max_tokens=VOCAB_SIZE)
encoder.adapt(train_dataset.map(lambda text, label: text))

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


In [9]:
vocab = np.array(encoder.get_vocabulary())
vocab[:20]

array(['', '[UNK]', 'the', 'and', 'i', 'to', 'a', 'was', 'of', 'it',
       'for', 'in', 'is', 'that', 'my', 'we', 'this', 'with', 'but',
       'they'], dtype='<U13')

In [10]:
encoded_example = encoder(example)[:3].numpy()
encoded_example

array([[ 15, 155,   2, ...,   0,   0,   0],
       [  2,  44,  80, ...,   0,   0,   0],
       [  4, 119,   2, ...,   0,   0,   0]], dtype=int64)

In [11]:
for n in range(3):
    print("Original: ", example[n].numpy())
    print("Round-trip: ", " ".join(vocab[encoded_example[n]]))
    print()

Original:  b'We took the kids here on Friday to celebrate both our graduation. It was a big disappointment and I had to write a review before I forget!! Service was horrible, they were not busy at all, but it took us 45 minutes to get our food. Our waiter was not friendly at all and didn\'t care about us at all. We watch other servers tend to their customers quickly while ours didn\'t. I would see her trying to blame other people for the poor service to her other tables. We were there for over two hours which is ridiculous for this type of restaurant. When we left my husband told the manager their service was horrible...his only response was \\""what really??\\"" No, apologies whatsoever.\\n\\nAlso we love the strawberry freckle lemonade. However, the one here taste like cold water on ice only! We won\'t be going back to this location EVER!'
Round-trip:  we took the kids here on friday to [UNK] both our [UNK] it was a big [UNK] and i had to write a review before i [UNK] service was hor

In [12]:
model = tf.keras.Sequential([
    encoder,
    tf.keras.layers.Embedding(
        input_dim=len(encoder.get_vocabulary()),
        output_dim=64,
        mask_zero=True),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

In [13]:
print([layer.supports_masking for layer in model.layers])

[False, True, True, True, True]


In [14]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['accuracy'])

In [15]:
history = model.fit(train_dataset, epochs=1,
                    validation_data=test_dataset,
                    validation_steps=30)



In [16]:
test_loss, test_acc = model.evaluate(test_dataset)

print('Test Loss:', test_loss)
print('Test Accuracy:', test_acc)

Test Loss: 0.2280832678079605
Test Accuracy: 0.894789457321167
