In [104]:

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_datasets as tfds

"""
## Implement a Transformer block as a layer
"""


class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1, name=None):
        super().__init__(name=name)
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim,)
        self.ffn = keras.Sequential(
            [
                layers.Dense(ff_dim, activation="relu"),
                layers.Dense(embed_dim),
            ]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)


"""
## Implement embedding layer

Two seperate embedding layers, one for tokens, one for token index (positions).
"""


class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim, name=None):
        super().__init__(name=name)
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim, mask_zero=True)
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)        
        return x + positions


class TextVectorizer(layers.Layer):
    def __init__(self, vectorizer, name=None):
        super().__init__(name=name)
        self.vectorizer = vectorizer    
        
    def call(self, x):
        vectors = self.vectorizer(x)
        start_token = tf.ones(shape=[tf.shape(x)[0],1], dtype=tf.int64) * 2
        return tf.concat([start_token,vectors], axis=1)[:,:-1]
    
    def get_vocabulary(self):
        return self.vectorizer.get_vocabulary()
    
class extractStartToken(layers.Layer):
    
    def call(self,x):
        return x[:,0,:]       


In [70]:
"""
## Download and prepare dataset
"""


dataset, info = tfds.load('imdb_reviews', with_info=True,
                          as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']
print(f"Length of the training data {len(train_dataset)}")
print(f"Length of the testing data {len(test_dataset)}")

Length of the training data 25000
Length of the testing data 25000


In [71]:
vocab_size = 20000  # Only consider the top 20k words
maxlen = 200  # Only consider the first 200 words of each movie review

vectorizer = tf.keras.layers.TextVectorization(            
            max_tokens = vocab_size,
            output_mode='int',
            output_sequence_length=maxlen        
        )
vectorizer.adapt(train_dataset.map(lambda text,label: text))

In [72]:
## Set the custom vocabulary to include the [START] token
vocab = vectorizer.get_vocabulary()
vocab = vocab[:2] + ['[START]',] + vocab[2:-1]
vectorizer.set_vocabulary(vocab)
print(vectorizer.get_vocabulary()[:10])

['', '[UNK]', '[START]', 'the', 'and', 'a', 'of', 'to', 'is', 'in']


In [109]:

"""
## Create classifier model using transformer layer

Transformer layer outputs one vector for each time step of our input sequence.
Here, we take the mean across all time steps and
use a feed forward network on top of it to classify text.
"""


embed_dim = 32  # Embedding size for each token
num_heads = 2  # Number of attention heads
ff_dim = 32  # Hidden layer size in feed forward network inside transformer


text_vectorizer = TextVectorizer(vectorizer, name='vectorization_layer')
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim, name='embedding_layer')
transformer_block1 = TransformerBlock(embed_dim, num_heads, ff_dim,  name='tranformer_block1')
transformer_block2 = TransformerBlock(embed_dim, num_heads, ff_dim, name='tranformer_block2')
transformer_block3 = TransformerBlock(embed_dim, num_heads, ff_dim, name='tranformer_block3')

input_text = layers.Input(shape=(1,), dtype='string', name='input_layer')
x = text_vectorizer(input_text)
x = embedding_layer(x)
x = transformer_block1(x)
x = transformer_block2(x)
#x = transformer_block3(x)
#print(x.shape)
#x = layers.GlobalAveragePooling1D()(x)
x = extractStartToken(name='layer_start_token_extractor')(x)
x = layers.Dropout(0.1)(x)
x = layers.Dense(20, activation="relu")(x)
x = layers.Dropout(0.1)(x)
outputs = layers.Dense(1, activation="sigmoid")(x)

model = keras.Model(inputs=input_text, outputs=outputs)




In [74]:
BUFFER_SIZE = 10000
BATCH_SIZE = 32

train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [110]:
"""
## Train and Evaluate
"""

model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-4), loss="binary_crossentropy", metrics=["accuracy"]
)

es = tf.keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)

history = model.fit(
    train_dataset, batch_size=BATCH_SIZE, epochs=20, validation_data=test_dataset, callbacks=[es]
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20


In [111]:
model.summary()

Model: "model_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_layer (InputLayer)    [(None, 1)]               0         
                                                                 
 vectorization_layer (TextVe  (None, 200)              0         
 ctorizer)                                                       
                                                                 
 embedding_layer (TokenAndPo  (None, 200, 32)          646400    
 sitionEmbedding)                                                
                                                                 
 tranformer_block1 (Transfor  (None, 200, 32)          10656     
 merBlock)                                                       
                                                                 
 tranformer_block2 (Transfor  (None, 200, 32)          10656     
 merBlock)                                                 

In [77]:
input_examples = [
    "An uplifting masterpiece, this film inspires with powerful performances, a touching story, and a message of hope. A must-see.",
    "The film wasn't dull or uninspiring; it was absolutely captivating, brimming with excitement, and left me thoroughly impressed and delighted."
]

In [94]:
examples = [
    'this is such an amazing movie!',  # this is the same sentence tried earlier
    'The movie was great!',
    'The movie was meh.',
    'The movie was okish.',
    'The movie was terrible...',
    'The movie failed to engage or entertain, featuring a weak plot, dull performances, and left me utterly disappointed and disinterested.'
]


In [95]:
model.predict(examples)



array([[0.95053595],
       [0.95358866],
       [0.9351276 ],
       [0.9351276 ],
       [0.05845563],
       [0.03809845]], dtype=float32)

In [112]:
model.save(f"saved_models/transformer/imdb_keras_base")



INFO:tensorflow:Assets written to: saved_models/transformer/imdb_keras_base\assets


INFO:tensorflow:Assets written to: saved_models/transformer/imdb_keras_base\assets


In [102]:
with open(f"saved_models/transformer/imdb_keras_base_vocab.txt", "w") as f:
    # write elements of list
    for word in vectorizer.get_vocabulary():
        f.write('%s\n' %word)

In [99]:
model.get_layer(index=1).get_vocabulary()

['',
 '[UNK]',
 '[START]',
 'the',
 'and',
 'a',
 'of',
 'to',
 'is',
 'in',
 'it',
 'i',
 'this',
 'that',
 'br',
 'was',
 'as',
 'for',
 'with',
 'movie',
 'but',
 'film',
 'on',
 'not',
 'you',
 'are',
 'his',
 'have',
 'he',
 'be',
 'one',
 'its',
 'at',
 'all',
 'by',
 'an',
 'they',
 'from',
 'who',
 'so',
 'like',
 'her',
 'just',
 'or',
 'about',
 'has',
 'if',
 'out',
 'some',
 'there',
 'what',
 'good',
 'when',
 'more',
 'very',
 'even',
 'she',
 'my',
 'no',
 'up',
 'would',
 'which',
 'only',
 'time',
 'really',
 'story',
 'their',
 'were',
 'had',
 'see',
 'can',
 'me',
 'than',
 'we',
 'much',
 'well',
 'been',
 'get',
 'will',
 'into',
 'also',
 'because',
 'other',
 'do',
 'people',
 'bad',
 'great',
 'first',
 'how',
 'most',
 'him',
 'dont',
 'made',
 'then',
 'movies',
 'make',
 'films',
 'could',
 'way',
 'them',
 'any',
 'too',
 'after',
 'characters',
 'think',
 'watch',
 'two',
 'many',
 'being',
 'seen',
 'character',
 'never',
 'little',
 'acting',
 'where',
 