https://keras.io/examples/nlp/text_classification_with_transformer/

In [1]:
import tensorflow as tf

In [2]:
import numpy as np

In [3]:
from tensorflow import keras
from tensorflow.keras import layers

# layer

In [4]:
class TransformerBlock(
    layers.Layer,
    ):
    
    def __init__(
        self,
        embed_dim,
        num_heads,
        ff_dim,
        rate = 0.1,
        ):
        
        super(TransformerBlock, self).__init__()
        
        self.att = layers.MultiHeadAttention(
            num_heads = num_heads,
            key_dim = embed_dim,
            )
        
        self.ffn = keras.Sequential(
            [
                layers.Dense(
                    ff_dim,
                    activation="relu",
                ),
                layers.Dense(
                    ff_dim,
                ),                
            ]
        )
        
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(
        self,
        inputs,
        training,
        ):
        
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training = training)      
        
        out1 = self.layernorm1(inputs + attn_output)
        #print(out1.shape)
        
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(
            ffn_output,
            training=training
        )       
        
                
        return self.layernorm2(out1+ffn_output)

In [5]:
transformer_block = TransformerBlock(
    embed_dim = 10,
    num_heads = 2,
    ff_dim = 10
    )

In [6]:
x = np.random.rand(
    3, 4, 10
    )

x.shape

(3, 4, 10)

In [7]:
y = transformer_block(x, training = False)

In [8]:
y.shape

TensorShape([3, 4, 10])

In [9]:
class TokenAndPositionEmbedding(
    layers.Layer,
    ):
    
    def __init__(
        self,
        maxlen,
        vocab_size,
        embed_dim,
        ):
        super(TokenAndPositionEmbedding, self).__init__()
        
        self.token_emb = layers.Embedding(
            input_dim=vocab_size,
            output_dim=embed_dim,
            )
        
        self.pos_emb = layers.Embedding(
            input_dim=maxlen,
            output_dim=embed_dim,
            )
    
    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(
            start = 0, 
            limit = maxlen,
            delta = 1,
            )
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x+positions

In [10]:
embedding_layer = TokenAndPositionEmbedding(
    maxlen=3,
    vocab_size=100,
    embed_dim=10,
    )

In [11]:
x = np.random.randint(
    100,
    size = (5,3))

y = embedding_layer(
    x
    )

In [12]:
y.shape

TensorShape([5, 3, 10])

# data

In [13]:
vocab_size = 20000
maxlen = 200

In [14]:
(x_train, y_train), (x_val, y_val) = keras.datasets.imdb.load_data(
    num_words=vocab_size,
    )

In [15]:
len(x_train)

25000

In [16]:
len(x_val)

25000

# padding

In [17]:
x_train = keras.preprocessing.sequence.pad_sequences(
    x_train,
    maxlen = maxlen
    )

In [18]:
x_val = keras.preprocessing.sequence.pad_sequences(
    x_val,
    maxlen=maxlen,
    )

# parameters

In [19]:
embed_dim = 32
num_heads = 2
ff_dim = 32

# model

In [20]:
inputs = layers.Input(
    shape=(maxlen,)
    )

In [21]:
embedding_layer = TokenAndPositionEmbedding(
    maxlen,
    vocab_size,
    embed_dim,
    )

In [22]:
transformer_block = TransformerBlock(
    embed_dim,
    num_heads,
    ff_dim,
    )

In [23]:
x = embedding_layer(inputs)
x = transformer_block(x)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dropout(0.1)(x)
x = layers.Dense(20, activation="relu")(x)
x = layers.Dropout(0.1)(x)
outputs = layers.Dense(2, activation= "softmax")(x)

In [24]:
model = keras.Model(
    inputs = inputs,
    outputs = outputs,
)

# train

In [25]:
model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"],
    )

In [26]:
history = model.fit(
    x_train,
    y_train,
    batch_size=32,
    epochs=2,
    validation_data=(x_val, y_val),
    )

Epoch 1/2
Epoch 2/2


# end