In [1]:
import numpy as np

import tensorflow_datasets as tfds
import tensorflow as tf

# import os
# os.environ["TF_FORCE_GPU_ALLOW_GROWTH"]="true"


tfds.disable_progress_bar()

In [28]:
tf.__version__

'2.5.0'

In [2]:
devices = tf.config.experimental.list_physical_devices('GPU')
devices

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [3]:
physical_devices = tf.config.list_physical_devices('GPU')

try:
    tf.config.experimental.set_memory_growth(devices[0], True)
    print("Success")
except:
    print("Exception occured")
    pass

Success


**Read more about this dataset here: https://ai.stanford.edu/~amaas/data/sentiment/
As per this article:
This is a dataset for binary sentiment classification containing substantially more data than previous benchmark datasets. We provide a set of 25,000 highly polar movie reviews for training, and 25,000 for testing. There is additional unlabeled data for use as well.** 

In [4]:
dataset, info = tfds.load('imdb_reviews', data_dir='.', with_info=True, as_supervised=True)

In [5]:
info

tfds.core.DatasetInfo(
    name='imdb_reviews',
    version=1.0.0,
    description='Large Movie Review Dataset.
This is a dataset for binary sentiment classification containing substantially more data than previous benchmark datasets. We provide a set of 25,000 highly polar movie reviews for training, and 25,000 for testing. There is additional unlabeled data for use as well.',
    homepage='http://ai.stanford.edu/~amaas/data/sentiment/',
    features=FeaturesDict({
        'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=2),
        'text': Text(shape=(), dtype=tf.string),
    }),
    total_num_examples=100000,
    splits={
        'test': 25000,
        'train': 25000,
        'unsupervised': 50000,
    },
    supervised_keys=('text', 'label'),
    citation="""@InProceedings{maas-EtAl:2011:ACL-HLT2011,
      author    = {Maas, Andrew L.  and  Daly, Raymond E.  and  Pham, Peter T.  and  Huang, Dan  and  Ng, Andrew Y.  and  Potts, Christopher},
      title     = {Learning Word

In [6]:
dataset

{'test': <PrefetchDataset shapes: ((), ()), types: (tf.string, tf.int64)>,
 'train': <PrefetchDataset shapes: ((), ()), types: (tf.string, tf.int64)>,
 'unsupervised': <PrefetchDataset shapes: ((), ()), types: (tf.string, tf.int64)>}

In [7]:
train_dataset, test_dataset = dataset['train'], dataset['test']

In [8]:
type(train_dataset)

tensorflow.python.data.ops.dataset_ops.PrefetchDataset

In [9]:
len(train_dataset)

25000

In [11]:
len(test_dataset)

25000

In [9]:
for sample in train_dataset:
    print(sample[0].numpy())
    print(sample[1].numpy())
    break

b"This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting could not redeem this movie's ridiculous storyline. This movie is an early nineties US propaganda piece. The most pathetic scenes were those when the Columbian rebels were making their cases for revolutions. Maria Conchita Alonso appeared phony, and her pseudo-love affair with Walken was nothing but a pathetic emotional plug in a movie that was devoid of any real meaning. I am disappointed that there are movies like this, ruining actor's like Christopher Walken's good name. I could barely sit through it."
0


In [12]:
BUFFER_SIZE = 10000
BATCH_SIZE = 64

In [13]:
train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [14]:
for example, label in train_dataset.take(1):
    print('texts: ', example.numpy()[:3])
    print()
    print('labels: ', label.numpy()[:3])

texts:  [b"This movie was exactly what I expected, not great, but also not that bad either. In my opinion PG13 movies aren't scary enough so that's why I already knew I was going to be bored throughout the entire film. Sure there were scary things going on in the hotel room, but nothing we all haven't already seen. I guess I didn't like it because I thought there were too many twists and turns happening; it got old and repetitive. I also didn't understand if all the things Cusack was experiencing in the room was real or not. There is no explanation for any of the events that occurred. The movie just drags on and when it finally does come to an end you want it to keep going because you are still waiting around for someone to tell you what the whole movie was about. What I did like was the special effects. Other than that there wasn't much enjoyment from it. Maybe its just me but I thought this was below average."
 b"Sad story of a downed B-17 pilot. Brady is shot down over occupied terr

In [15]:
e = tf.keras.layers.experimental.preprocessing.TextVectorization()
e.adapt([
    "I love samosas and jalebi",
    "I love biking and yoga",
    "I love tensorflow"
])

In [16]:
e.get_vocabulary()

['',
 '[UNK]',
 'love',
 'i',
 'and',
 'yoga',
 'tensorflow',
 'samosas',
 'jalebi',
 'biking']

In [17]:
e(["I love pizza"]).numpy()

array([[3, 2, 1]], dtype=int64)

In [18]:
VOCAB_SIZE = 1000
encoder = tf.keras.layers.experimental.preprocessing.TextVectorization(
    max_tokens=VOCAB_SIZE)
encoder.adapt(train_dataset.map(lambda text, label: text))

In [19]:
vocab = np.array(encoder.get_vocabulary())
vocab[:25]

array(['', '[UNK]', 'the', 'and', 'a', 'of', 'to', 'is', 'in', 'it', 'i',
       'this', 'that', 'br', 'was', 'as', 'for', 'with', 'movie', 'but',
       'film', 'on', 'not', 'you', 'are'], dtype='<U14')

In [20]:
example[:2]

<tf.Tensor: shape=(2,), dtype=string, numpy=
array([b"This movie was exactly what I expected, not great, but also not that bad either. In my opinion PG13 movies aren't scary enough so that's why I already knew I was going to be bored throughout the entire film. Sure there were scary things going on in the hotel room, but nothing we all haven't already seen. I guess I didn't like it because I thought there were too many twists and turns happening; it got old and repetitive. I also didn't understand if all the things Cusack was experiencing in the room was real or not. There is no explanation for any of the events that occurred. The movie just drags on and when it finally does come to an end you want it to keep going because you are still waiting around for someone to tell you what the whole movie was about. What I did like was the special effects. Other than that there wasn't much enjoyment from it. Maybe its just me but I thought this was below average.",
       b"Sad story of a downed

In [21]:
encoded_example = encoder(example)[:3].numpy()
encoded_example

array([[ 11,  18,  14, ...,   0,   0,   0],
       [614,  64,   5, ...,   0,   0,   0],
       [ 48,   7,   4, ...,   0,   0,   0]], dtype=int64)

In [22]:
for n in range(3):
    print("Original: ", example[n].numpy())
    print("Round-trip: ", " ".join(vocab[encoded_example[n]]))
    print()

Original:  b"This movie was exactly what I expected, not great, but also not that bad either. In my opinion PG13 movies aren't scary enough so that's why I already knew I was going to be bored throughout the entire film. Sure there were scary things going on in the hotel room, but nothing we all haven't already seen. I guess I didn't like it because I thought there were too many twists and turns happening; it got old and repetitive. I also didn't understand if all the things Cusack was experiencing in the room was real or not. There is no explanation for any of the events that occurred. The movie just drags on and when it finally does come to an end you want it to keep going because you are still waiting around for someone to tell you what the whole movie was about. What I did like was the special effects. Other than that there wasn't much enjoyment from it. Maybe its just me but I thought this was below average."
Round-trip:  this movie was exactly what i expected not great but also n

In [23]:
model = tf.keras.Sequential([
    encoder,
    tf.keras.layers.Embedding(
        input_dim=len(encoder.get_vocabulary()),
        output_dim=64,
        # Use masking to handle the variable sequence lengths
        mask_zero=True),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

In [24]:
sample_text = ('The movie was cool. The animation and the graphics '
               'were out of this world. I would recommend this movie.')
sample_text = ('awesome movie, I loved it so much')
predictions = model.predict(np.array([sample_text]))
print(predictions[0])

[-0.00647295]


In [25]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['accuracy'])

In [26]:

model.fit(train_dataset, epochs=10,
                    validation_data=test_dataset,
                    validation_steps=30)

Epoch 1/10
 32/391 [=>............................] - ETA: 21s - loss: 0.6929 - accuracy: 0.4927

CancelledError:  [_Derived_]RecvAsync is cancelled.
	 [[{{node Adam/Adam/update/AssignSubVariableOp/_57}}]]
	 [[gradient_tape/sequential/embedding/embedding_lookup/Reshape/_54]] [Op:__inference_train_function_22707]

Function call stack:
train_function


In [27]:
import sys
print(sys.version)

3.8.5 (tags/v3.8.5:580fbb0, Jul 20 2020, 15:57:54) [MSC v.1924 64 bit (AMD64)]
