In [1]:
#!pip install tensorflow==2.8.1

In [2]:
!curl -O  https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 80.2M  100 80.2M    0     0  20.1M      0  0:00:03  0:00:03 --:--:-- 20.1M


In [3]:
!tar -xf aclImdb_v1.tar.gz

In [4]:
!rm -r aclImdb/train/unsup/

In [5]:
import os
import pathlib
import shutil
import random
from tensorflow import keras

In [6]:
batch_size = 32
base_dir = pathlib.Path("aclImdb/")
val_dir = base_dir/"val"
train_dir = base_dir/"train"

In [7]:
print(base_dir)

aclImdb


In [8]:
val_dir

PosixPath('aclImdb/val')

In [9]:
for category in ("neg","pos"):
    os.makedirs(val_dir/category)
    files = os.listdir(train_dir/category)
    random.Random(1337).shuffle(files)
    num_val_samples = int(0.2*len(files))
    val_files = files[-num_val_samples:]
    for fname in val_files:
        shutil.move(
           train_dir/category/fname,
            val_dir/category/fname

        )

In [10]:
train_ds = keras.utils.text_dataset_from_directory(
    "aclImdb/train",batch_size = batch_size
)
val_ds = keras.utils.text_dataset_from_directory(
    "aclImdb/val",batch_size = batch_size
)
test_ds = keras.utils.text_dataset_from_directory(
    "aclImdb/test",batch_size = batch_size
)

Found 20000 files belonging to 2 classes.
Found 5000 files belonging to 2 classes.
Found 25000 files belonging to 2 classes.


In [11]:
text_only_train_ds = train_ds.map(lambda x, y :x)

In [12]:
train_ds

<_PrefetchDataset element_spec=(TensorSpec(shape=(None,), dtype=tf.string, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>

In [13]:
for x,y in train_ds.take(1):
  print(x)
  print(y)

tf.Tensor(
[b'I think a great many viewers missed entirely the fact that this is obviously a parody of western films.<br /><br />This is not a bad movie - it is a clever tongue in cheek take on westerns. I don\'t believe this film was taking itself seriously for a moment.<br /><br />What makes this film even more unique is the fact it is centered around 4 strong, beautiful women, two of which are black, one Asian, and a Mexican/Hispanic character.<br /><br />These aren\'t your usual western women--they\'re tough--they can draw fast and shoot straight.<br /><br />They\'re so tough even the bartender is shaking when he pours their whiskey.<br /><br />The plot which moves this story along is typical of westerns--in the vein of "you shot my brother--so I\'m gonna get you!" Only in this western, a woman\'s sister has been shot and she\'s out for vengeance on the gang who did it.<br /><br />So she goes and rounds up her old cronies from her bank robbing days.<br /><br />One of them, Maria, i

In [14]:
from tensorflow.keras import layers

In [15]:
max_length = 600
max_tokens = 20000

In [16]:
text_vectorization = layers.TextVectorization(
max_tokens=max_tokens,
output_mode ="int",
output_sequence_length=max_length,
)

In [17]:
text_vectorization.adapt(text_only_train_ds)

In [18]:
int_train_ds = train_ds.map(
    lambda x,y:(text_vectorization(x),y),
    num_parallel_calls=4
)
int_val_ds = val_ds.map(
    lambda x,y:(text_vectorization(x),y),
    num_parallel_calls=4
)
int_test_ds = test_ds.map(
    lambda x,y:(text_vectorization(x),y),
    num_parallel_calls=4
)

In [19]:
int_train_ds

<_ParallelMapDataset element_spec=(TensorSpec(shape=(None, None), dtype=tf.int64, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>

In [20]:
for x,y in int_train_ds.take(1):
  print(x)
  print(y)

tf.Tensor(
[[  10   41  202 ...    0    0    0]
 [   2  248 3816 ...    0    0    0]
 [ 292 2288 1876 ...    0    0    0]
 ...
 [ 828  833    1 ... 2359 6700   77]
 [  10   68    3 ...    0    0    0]
 [ 407   43    2 ...    0    0    0]], shape=(32, 600), dtype=int64)
tf.Tensor([1 0 0 1 0 1 1 0 0 0 0 1 0 1 1 1 1 1 1 0 1 1 0 0 0 1 0 0 1 1 1 0], shape=(32,), dtype=int32)


In [21]:
# Create Transformer Encoder

In [22]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [23]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

class TransformerEncoder(layers.Layer):
    def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
        super().__init__(**kwargs)
        self.embed_dim = embed_dim
        self.dense_dim = dense_dim
        self.num_heads = num_heads
        self.attention = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim)
        self.dense_proj = keras.Sequential(
            [layers.Dense(dense_dim, activation="relu"),
             layers.Dense(embed_dim),]
        )
        self.layernorm_1 = layers.LayerNormalization()
        self.layernorm_2 = layers.LayerNormalization()

    def call(self, inputs, mask=None):
        if mask is not None:
            mask = mask[:, tf.newaxis, :]
        attention_output = self.attention(
            inputs, inputs, attention_mask=mask)
        proj_input = self.layernorm_1(inputs + attention_output)
        proj_output = self.dense_proj(proj_input)
        return self.layernorm_2(proj_input + proj_output)

    def get_config(self):
        config = super().get_config()
        config.update({
            "embed_dim": self.embed_dim,
            "num_heads": self.num_heads,
            "dense_dim": self.dense_dim,
        })
        return config

In [24]:
#lets train model

In [25]:
vocab_size = 20000
embed_dim = 256
num_heads = 2
dense_dim = 32

inputs = keras.Input(shape=(None,), dtype="int64")
x = layers.Embedding(vocab_size, embed_dim)(inputs)
x = TransformerEncoder(embed_dim, dense_dim, num_heads)(x)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1, activation="sigmoid")(x)
model = keras.Model(inputs, outputs)
model.compile(optimizer="rmsprop",
              loss="binary_crossentropy",
              metrics=["accuracy"])
model.summary()

In [26]:
callbacks = [
    keras.callbacks.ModelCheckpoint("transformer_encoder.keras",
                                    save_best_only=True)
]

In [27]:
model.fit(int_train_ds, validation_data=int_val_ds, epochs=20, callbacks=callbacks)

Epoch 1/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 100ms/step - accuracy: 0.5390 - loss: 0.8586 - val_accuracy: 0.8306 - val_loss: 0.3773
Epoch 2/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 97ms/step - accuracy: 0.8148 - loss: 0.4092 - val_accuracy: 0.8482 - val_loss: 0.3462
Epoch 3/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 98ms/step - accuracy: 0.8416 - loss: 0.3518 - val_accuracy: 0.8590 - val_loss: 0.3281
Epoch 4/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 97ms/step - accuracy: 0.8686 - loss: 0.3157 - val_accuracy: 0.8642 - val_loss: 0.3238
Epoch 5/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 96ms/step - accuracy: 0.8757 - loss: 0.2918 - val_accuracy: 0.8640 - val_loss: 0.3261
Epoch 6/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 97ms/step - accuracy: 0.8870 - loss: 0.2719 - val_accuracy: 0.8636 - val_loss: 0.3292
Epoch 7/20
[1m

<keras.src.callbacks.history.History at 0x78a43241dd20>

In [28]:
model = keras.models.load_model(
    "transformer_encoder.keras",
    custom_objects={"TransformerEncoder": TransformerEncoder})
print(f"Test acc: {model.evaluate(int_test_ds)[1]:.3f}")



[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 17ms/step - accuracy: 0.8649 - loss: 0.3281
Test acc: 0.866


In [29]:
# Implementing positional embedding as a subclassed layer