In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import MultiHeadAttention, LayerNormalization, Dropout, Layer
from tensorflow.keras.layers import Embedding, Input, GlobalAveragePooling1D, Dense
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential, Model
import numpy as np
import warnings
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)

In [2]:
class TransformerBlock(Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = Sequential(
            [Dense(ff_dim, activation="relu"), 
             Dense(embed_dim),]
        )
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

In [3]:
class TokenAndPositionEmbedding(Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

In [4]:
vocab_size = 20000  # Only consider the top 20k words
maxlen = 200  # Only consider the first 200 words of each movie review

(x_train, y_train), (x_val, y_val) = imdb.load_data(num_words=vocab_size)
print(len(x_train), "Training sequences")
print(len(x_val), "Validation sequences")

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
25000 Training sequences
25000 Validation sequences


In [5]:
y_val[:5]

array([0, 1, 1, 0, 1])

In [6]:
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
x_val = tf.keras.preprocessing.sequence.pad_sequences(x_val, maxlen=maxlen)

In [7]:
embed_dim = 32  # Embedding size for each token
num_heads = 2  # Number of attention heads
ff_dim = 32  # Hidden layer size in feed forward network inside transformer

inputs = Input(shape=(maxlen,))
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
x = embedding_layer(inputs)
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
x = transformer_block(x)
x = GlobalAveragePooling1D()(x)
x = Dropout(0.1)(x)
x = Dense(20, activation="relu")(x)
x = Dropout(0.1)(x)
outputs = Dense(2, activation="softmax")(x)

model = Model(inputs=inputs, outputs=outputs)

In [8]:
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

history = model.fit(x_train, y_train, 
                    batch_size=64, epochs=2, 
                    validation_data=(x_val, y_val)
                   )

Epoch 1/2
Epoch 2/2


In [9]:
model.save_weights("predict_class.h5")

In [10]:
results = model.evaluate(x_val, y_val, verbose=2)

for name, value in zip(model.metrics_names, results):
    print("%s: %.3f" % (name, value))

782/782 - 2s - loss: 0.3186 - accuracy: 0.8717 - 2s/epoch - 3ms/step
loss: 0.319
accuracy: 0.872


In [11]:
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import numpy as np
import os

In [12]:
train_data, validation_data, test_data = tfds.load(
    name="imdb_reviews", 
    split=('train[:60%]', 'train[60%:]', 'test'),
    as_supervised=True)
    
train_examples_batch, train_labels_batch = next(iter(train_data.batch(10)))
train_examples_batch[:1]

[1mDownloading and preparing dataset imdb_reviews/plain_text/1.0.0 (download: 80.23 MiB, generated: Unknown size, total: 80.23 MiB) to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0...[0m


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]





0 examples [00:00, ? examples/s]

Shuffling and writing examples to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0.incomplete010NVR/imdb_reviews-train.tfrecord


  0%|          | 0/25000 [00:00<?, ? examples/s]

0 examples [00:00, ? examples/s]

Shuffling and writing examples to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0.incomplete010NVR/imdb_reviews-test.tfrecord


  0%|          | 0/25000 [00:00<?, ? examples/s]

0 examples [00:00, ? examples/s]

Shuffling and writing examples to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0.incomplete010NVR/imdb_reviews-unsupervised.tfrecord


  0%|          | 0/50000 [00:00<?, ? examples/s]



[1mDataset imdb_reviews downloaded and prepared to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0. Subsequent calls will reuse this data.[0m


<tf.Tensor: shape=(1,), dtype=string, numpy=
array([b"This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting could not redeem this movie's ridiculous storyline. This movie is an early nineties US propaganda piece. The most pathetic scenes were those when the Columbian rebels were making their cases for revolutions. Maria Conchita Alonso appeared phony, and her pseudo-love affair with Walken was nothing but a pathetic emotional plug in a movie that was devoid of any real meaning. I am disappointed that there are movies like this, ruining actor's like Christopher Walken's good name. I could barely sit through it."],
      dtype=object)>

In [13]:
embedding = "https://tfhub.dev/google/nnlm-en-dim50/2"
hub_layer = hub.KerasLayer(embedding, input_shape=[], 
                           dtype=tf.string, trainable=True)
                           
print(hub_layer(train_examples_batch[:3]))

model = Sequential()
model.add(hub_layer)
model.add(Dense(16, activation='relu'))
model.add(Dense(1))

model.summary()

tf.Tensor(
[[ 0.5423194  -0.01190171  0.06337537  0.0686297  -0.16776839 -0.10581177
   0.168653   -0.04998823 -0.31148052  0.07910344  0.15442258  0.01488661
   0.03930155  0.19772716 -0.12215477 -0.04120982 -0.27041087 -0.21922147
   0.26517656 -0.80739075  0.25833526 -0.31004202  0.2868321   0.19433866
  -0.29036498  0.0386285  -0.78444123 -0.04793238  0.41102988 -0.36388886
  -0.58034706  0.30269453  0.36308962 -0.15227163 -0.4439151   0.19462997
   0.19528405  0.05666233  0.2890704  -0.28468323 -0.00531206  0.0571938
  -0.3201319  -0.04418665 -0.08550781 -0.55847436 -0.2333639  -0.20782956
  -0.03543065 -0.17533456]
 [ 0.56338924 -0.12339553 -0.10862677  0.7753425  -0.07667087 -0.15752274
   0.01872334 -0.08169781 -0.3521876   0.46373403 -0.08492758  0.07166861
  -0.00670818  0.12686071 -0.19326551 -0.5262643  -0.32958236  0.14394784
   0.09043556 -0.54175544  0.02468163 -0.15456744  0.68333143  0.09068333
  -0.45327246  0.23180094 -0.8615696   0.3448039   0.12838459 -0.58759046
 

In [14]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])
              
history = model.fit(train_data.shuffle(10000).batch(512),
                    epochs=5,
                    validation_data=validation_data.batch(512),
                    verbose=1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [15]:
results = model.evaluate(test_data.batch(512), verbose=2)

for name, value in zip(model.metrics_names, results):
    print("%s: %.3f" % (name, value))

49/49 - 2s - loss: 0.3295 - accuracy: 0.8452 - 2s/epoch - 35ms/step
loss: 0.330
accuracy: 0.845


In [16]:
# Pre-Trained Model

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import numpy as np
import os

In [17]:
train_data, validation_data, test_data = tfds.load(
    name="imdb_reviews", 
    split=('train[:60%]', 'train[60%:]', 'test'),
    as_supervised=True)
    
train_examples_batch, train_labels_batch = next(iter(train_data.batch(10)))
train_examples_batch[:1]

<tf.Tensor: shape=(1,), dtype=string, numpy=
array([b"This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting could not redeem this movie's ridiculous storyline. This movie is an early nineties US propaganda piece. The most pathetic scenes were those when the Columbian rebels were making their cases for revolutions. Maria Conchita Alonso appeared phony, and her pseudo-love affair with Walken was nothing but a pathetic emotional plug in a movie that was devoid of any real meaning. I am disappointed that there are movies like this, ruining actor's like Christopher Walken's good name. I could barely sit through it."],
      dtype=object)>

In [19]:
embedding = "https://tfhub.dev/google/nnlm-en-dim50/2"
hub_layer = hub.KerasLayer(embedding, input_shape=[], 
                           dtype=tf.string, trainable=True)
                           
print(hub_layer(train_examples_batch[:3]))

model = Sequential()
model.add(hub_layer)
model.add(Dense(16, activation='relu'))
model.add(Dense(1))

model.summary()

tf.Tensor(
[[ 0.5423194  -0.01190171  0.06337537  0.0686297  -0.16776839 -0.10581177
   0.168653   -0.04998823 -0.31148052  0.07910344  0.15442258  0.01488661
   0.03930155  0.19772716 -0.12215477 -0.04120982 -0.27041087 -0.21922147
   0.26517656 -0.80739075  0.25833526 -0.31004202  0.2868321   0.19433866
  -0.29036498  0.0386285  -0.78444123 -0.04793238  0.41102988 -0.36388886
  -0.58034706  0.30269453  0.36308962 -0.15227163 -0.4439151   0.19462997
   0.19528405  0.05666233  0.2890704  -0.28468323 -0.00531206  0.0571938
  -0.3201319  -0.04418665 -0.08550781 -0.55847436 -0.2333639  -0.20782956
  -0.03543065 -0.17533456]
 [ 0.56338924 -0.12339553 -0.10862677  0.7753425  -0.07667087 -0.15752274
   0.01872334 -0.08169781 -0.3521876   0.46373403 -0.08492758  0.07166861
  -0.00670818  0.12686071 -0.19326551 -0.5262643  -0.32958236  0.14394784
   0.09043556 -0.54175544  0.02468163 -0.15456744  0.68333143  0.09068333
  -0.45327246  0.23180094 -0.8615696   0.3448039   0.12838459 -0.58759046
 

In [20]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])
              
history = model.fit(train_data.shuffle(10000).batch(512),
                    epochs=5,
                    validation_data=validation_data.batch(512),
                    verbose=1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [21]:
results = model.evaluate(test_data.batch(512), verbose=2)

for name, value in zip(model.metrics_names, results):
    print("%s: %.3f" % (name, value))

49/49 - 2s - loss: 0.3318 - accuracy: 0.8481 - 2s/epoch - 37ms/step
loss: 0.332
accuracy: 0.848
