In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
from tensorflow.keras import backend as K
import os, pathlib, shutil, random
import shutil

2023-05-15 11:37:11.030281: E tensorflow/core/lib/monitoring/collection_registry.cc:77] Cannot register 2 metrics with the same name: /tensorflow/core/saved_model/write/count
2023-05-15 11:37:11.030955: E tensorflow/core/lib/monitoring/collection_registry.cc:77] Cannot register 2 metrics with the same name: /tensorflow/core/saved_model/read/count
2023-05-15 11:37:11.030998: E tensorflow/core/lib/monitoring/collection_registry.cc:77] Cannot register 2 metrics with the same name: /tensorflow/core/saved_model/write/api
2023-05-15 11:37:11.031014: E tensorflow/core/lib/monitoring/collection_registry.cc:77] Cannot register 2 metrics with the same name: /tensorflow/core/saved_model/read/api


In [2]:
K.set_floatx('posit160')

In [3]:
class TransformerEncoder(layers.Layer):
    def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
        super().__init__(**kwargs)
        self.embed_dim = embed_dim 
        self.dense_dim = dense_dim 
        self.num_heads = num_heads 
        self.attention = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.dense_proj = keras.Sequential([layers.Dense(dense_dim, activation="relu"),layers.Dense(embed_dim),])
        self.layernorm_1 = layers.LayerNormalization()
        self.layernorm_2 = layers.LayerNormalization()

    def call(self, inputs, mask=None): 
        if mask is not None: 
            mask = mask[:, tf.newaxis, :] 
        attention_output = self.attention(inputs, inputs, attention_mask=mask)
        proj_input = self.layernorm_1(inputs + attention_output)
        proj_output = self.dense_proj(proj_input)
        return self.layernorm_2(proj_input + proj_output)
    
    def get_config(self): 
        config = super().get_config()
        config.update({
        "embed_dim": self.embed_dim,
        "num_heads": self.num_heads,
        "dense_dim": self.dense_dim,
        })
        return config


class PositionalEmbedding(layers.Layer):
    def __init__(self, sequence_length, input_dim, output_dim, **kwargs): 
        super().__init__(**kwargs)
        self.token_embeddings = layers.Embedding(input_dim=input_dim, output_dim=output_dim)
        self.position_embeddings = layers.Embedding(input_dim=sequence_length, output_dim=output_dim) 
        self.sequence_length = sequence_length
        self.input_dim = input_dim
        self.output_dim = output_dim

    def call(self, inputs):
        length = tf.shape(inputs)[-1]
        positions = tf.range(start=0, limit=length, delta=1)
        embedded_tokens = self.token_embeddings(inputs)
        embedded_positions = self.position_embeddings(positions)
        return embedded_tokens + embedded_positions
    
    def compute_mask(self, inputs, mask=None): 
        return tf.math.not_equal(inputs, 0) 
    
    def get_config(self): 
        config = super().get_config()
        config.update({
        "output_dim": self.output_dim,
        "sequence_length": self.sequence_length,
        "input_dim": self.input_dim,
        })
        return config

In [4]:
url = "https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"

dataset = tf.keras.utils.get_file("aclImdb_v1", url,
                                    untar=True, cache_dir='.',
                                    cache_subdir='')

shutil.rmtree('aclImdb/train/unsup')

Downloading data from https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz


In [6]:
base_dir = pathlib.Path("aclImdb")
val_dir = base_dir / "val"
train_dir = base_dir / "train"
for category in ("neg", "pos"):
    os.makedirs(val_dir / category)
    files = os.listdir(train_dir / category)
    random.Random(1337).shuffle(files) 
    num_val_samples = int(0.2 * len(files)) 
    val_files = files[-num_val_samples:] 
    for fname in val_files: 
        shutil.move(train_dir / category / fname, val_dir / category / fname)

In [7]:
batch_size = 32
train_ds = keras.utils.text_dataset_from_directory( "aclImdb/train", batch_size=batch_size)
val_ds = keras.utils.text_dataset_from_directory("aclImdb/val", batch_size=batch_size)
test_ds = keras.utils.text_dataset_from_directory("aclImdb/test", batch_size=batch_size)

text_only_train_ds = train_ds.map(lambda x, y: x) 

max_length = 600
max_tokens = 20000
text_vectorization = TextVectorization(
 max_tokens=max_tokens,
 output_mode="int",
 output_sequence_length=max_length, 
)
text_vectorization.adapt(text_only_train_ds)
int_train_ds = train_ds.map(lambda x, y: (text_vectorization(x), y), num_parallel_calls=4)
int_val_ds = val_ds.map(lambda x, y: (text_vectorization(x), y), num_parallel_calls=4)
int_test_ds = test_ds.map(lambda x, y: (text_vectorization(x), y), num_parallel_calls=4)

Found 20000 files belonging to 2 classes.
Found 5000 files belonging to 2 classes.
Found 25000 files belonging to 2 classes.


2023-05-15 11:51:20.154718: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:164] None of the MLIR Optimization Passes are enabled (registered 2)


In [8]:
vocab_size = 20000
sequence_length = 600
embed_dim = 256
num_heads = 2
dense_dim = 32

inputs = keras.Input(shape=(None,), dtype="int64")
x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(inputs) 
x = TransformerEncoder(embed_dim, dense_dim, num_heads)(x)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1, activation="sigmoid")(x)
model = keras.Model(inputs, outputs)
model.compile(optimizer="adam",
 loss="binary_crossentropy",
 metrics=[tf.keras.metrics.BinaryAccuracy()])

In [5]:
model.load_weights("model/best_weights")

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f8850fd0820>

In [7]:
print(model.weights)

[<tf.Variable 'positional_embedding/embedding/embeddings:0' shape=(20000, 256) dtype=float32, numpy=
array([[ 0.03739503, -0.06100216,  0.05121558, ...,  0.03595575,
         0.0024836 ,  0.00336652],
       [-0.00596904, -0.02471746,  0.01501249, ...,  0.00329701,
        -0.02990483,  0.01573665],
       [-0.04841526, -0.00022331, -0.0136003 , ..., -0.01757703,
         0.0250284 , -0.04963886],
       ...,
       [ 0.02936665,  0.02552247, -0.01700588, ..., -0.01531127,
         0.04560084, -0.00388065],
       [ 0.00868333,  0.03133388,  0.00624219, ...,  0.03978611,
        -0.01659929,  0.0709249 ],
       [ 0.02855214,  0.03318401,  0.02619697, ..., -0.07201208,
        -0.0371513 ,  0.02101573]], dtype=float32)>, <tf.Variable 'positional_embedding/embedding_1/embeddings:0' shape=(600, 256) dtype=float32, numpy=
array([[ 0.02885527, -0.03422101, -0.0183788 , ...,  0.00225723,
         0.00010679,  0.03315101],
       [-0.03442131, -0.06718445, -0.01684681, ...,  0.00540662,
    

In [6]:
print(f"Test acc: {model.evaluate(int_test_ds)[1]:.3f}")

 72/782 [=>............................] - ETA: 2:14 - loss: 1.0331 - accuracy: 0.5516

KeyboardInterrupt: 

In [7]:
callbacks = [
 keras.callbacks.ModelCheckpoint("model/full_transformer_encoder_posit.keras",
 save_best_only=True)
] 
model.fit(int_train_ds, validation_data=int_val_ds, epochs=20, 
callbacks=callbacks)


Epoch 1/20

KeyboardInterrupt: 

In [10]:
model = keras.models.load_model(
    "model.h5",
    custom_objects={"TransformerEncoder": TransformerEncoder,
                    "PositionalEmbedding": PositionalEmbedding})


ValueError: Unknown optimizer: Custom>Adam. Please ensure this object is passed to the `custom_objects` argument. See https://www.tensorflow.org/guide/keras/save_and_serialize#registering_the_custom_object for details.

In [5]:
print(model.weights)

[<tf.Variable 'positional_embedding/embedding/embeddings:0' shape=(20000, 256) dtype=float32, numpy=
array([[-0.03150397,  0.00373997, -0.03244636, ...,  0.03043887,
        -0.05167284,  0.03323204],
       [-0.00318759, -0.00405039,  0.04254161, ..., -0.01668963,
         0.04274717,  0.02822473],
       [ 0.0233761 , -0.04159024,  0.04559259, ...,  0.00379981,
        -0.03726941,  0.03221758],
       ...,
       [-0.01635022,  0.04602778, -0.03338681, ..., -0.02754178,
        -0.04257899, -0.05332807],
       [-0.05452348, -0.04753804,  0.034722  , ...,  0.01256795,
        -0.01627147,  0.0019415 ],
       [ 0.04122843,  0.00530811, -0.01852891, ..., -0.02590084,
        -0.03846217,  0.03773504]], dtype=float32)>, <tf.Variable 'positional_embedding/embedding_1/embeddings:0' shape=(600, 256) dtype=float32, numpy=
array([[ 0.01230673, -0.03902837, -0.05263768, ...,  0.02777611,
        -0.00754173, -0.02190994],
       [ 0.00509173,  0.01386093, -0.02932673, ...,  0.0414314 ,
    

In [6]:
print(f"Test acc: {model.evaluate(int_test_ds)[1]:.3f}")



KeyboardInterrupt: 