In [1]:
# install necessary packages
!pip install -q tensorflow keras

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m644.9/644.9 MB[0m [31m572.3 kB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.5/57.5 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.5/24.5 MB[0m [31m46.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m56.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.1/5.1 MB[0m [31m55.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.6/6.6 MB[0m [31m57.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m15.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.5/72.5 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# import required libraries
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np

# toy dataset: hobby reviews with binary sentiment
samples = [
    "Painting is very relaxing", "I can't stand puzzles", "Knitting is joyful",
    "Boring and repetitive", "Photography is inspiring", "Too hard to enjoy", "I enjoy sculpting"
]
targets = [1, 0, 1, 0, 1, 0, 1]  # 1 = positive, 0 = negative

In [3]:
# tokenization and padding
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=1000, oov_token="[UNK]")
tokenizer.fit_on_texts(samples)
token_seqs = tokenizer.texts_to_sequences(samples)
padded_inputs = tf.keras.preprocessing.sequence.pad_sequences(token_seqs, maxlen=12)
target_tensor = tf.convert_to_tensor(targets)

In [4]:
# define positional embedding
class CustomPositionalEmbedding(layers.Layer):
    def __init__(self, max_len, vocab_size, embed_size):
        super().__init__()
        self.token_embedding = layers.Embedding(vocab_size, embed_size)
        self.position_embedding = layers.Embedding(max_len, embed_size)

    def call(self, input_tensor):
        positions = tf.range(start=0, limit=tf.shape(input_tensor)[-1])
        position_encoded = self.position_embedding(positions)
        token_encoded = self.token_embedding(input_tensor)
        return token_encoded + position_encoded

In [5]:
# define transformer block
class MiniTransformerBlock(layers.Layer):
    def __init__(self, embedding_size, heads, ff_units):
        super().__init__()
        self.attention = layers.MultiHeadAttention(num_heads=heads, key_dim=embedding_size)
        self.ff_layer = tf.keras.Sequential([
            layers.Dense(ff_units, activation="relu"),
            layers.Dense(embedding_size)
        ])
        self.norm1 = layers.LayerNormalization()
        self.norm2 = layers.LayerNormalization()

    def call(self, x):
        attn_output = self.attention(x, x)
        x = self.norm1(x + attn_output)
        ff_output = self.ff_layer(x)
        return self.norm2(x + ff_output)

In [6]:
# model configuration
vocab_cap = 1000
embedding_dim = 32
attention_heads = 2
feedforward_dim = 64
input_length = 12

# build the transformer model
input_layer = layers.Input(shape=(input_length,))
embedding_output = CustomPositionalEmbedding(input_length, vocab_cap, embedding_dim)(input_layer)
transform_output = MiniTransformerBlock(embedding_dim, attention_heads, feedforward_dim)(embedding_output)
pooled_output = layers.GlobalAveragePooling1D()(transform_output)
dense_output = layers.Dense(24, activation="relu")(pooled_output)
dropout_output = layers.Dropout(0.15)(dense_output)
final_output = layers.Dense(2, activation="softmax")(dropout_output)

transformer_model = tf.keras.Model(inputs=input_layer, outputs=final_output)

In [7]:
# compile the model
transformer_model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

# train the transformer model
transformer_model.fit(padded_inputs, target_tensor, batch_size=2, epochs=10)

Epoch 1/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 13ms/step - accuracy: 0.6190 - loss: 0.6961
Epoch 2/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.4381 - loss: 0.7970
Epoch 3/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.2310 - loss: 0.9349    
Epoch 4/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.5619 - loss: 0.7686
Epoch 5/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.8595 - loss: 0.5262
Epoch 6/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.6690 - loss: 0.7156
Epoch 7/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.8024 - loss: 0.5892
Epoch 8/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.6690 - loss: 0.4535
Epoch 9/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

<keras.src.callbacks.history.History at 0x7f8a1aba51d0>