In [2]:
pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.16.1-cp311-cp311-win_amd64.whl (2.1 kB)
Collecting tensorflow-intel==2.16.1
  Downloading tensorflow_intel-2.16.1-cp311-cp311-win_amd64.whl (377.0 MB)
     ------------------------------------ 377.0/377.0 MB 297.0 kB/s eta 0:00:00
Collecting absl-py>=1.0.0
  Downloading absl_py-2.1.0-py3-none-any.whl (133 kB)
     -------------------------------------- 133.7/133.7 kB 2.0 MB/s eta 0:00:00
Collecting astunparse>=1.6.0
  Downloading astunparse-1.6.3-py2.py3-none-any.whl (12 kB)
Collecting flatbuffers>=23.5.26
  Downloading flatbuffers-24.3.25-py2.py3-none-any.whl (26 kB)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1
  Downloading gast-0.5.4-py3-none-any.whl (19 kB)
Collecting google-pasta>=0.1.1
  Downloading google_pasta-0.2.0-py3-none-any.whl (57 kB)
     ---------------------------------------- 57.5/57.5 kB 3.0 MB/s eta 0:00:00
Collecting h5py>=3.10.0
  Downloading h5py-3.11.0-cp311-cp311-win_amd64.whl (3.0 MB)
     --------------------


[notice] A new release of pip available: 22.3.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import numpy as np
import tensorflow as tf

# Dataset sederhana
dataset = [
    ["I", "love", "AI"],
    ["AI", "is", "cool"],
    ["Transformers", "are", "powerful"]
]

# Membuat kamus kata
vocab = set(word for sentence in dataset for word in sentence)
word2idx = {word: idx for idx, word in enumerate(vocab)}
idx2word = {idx: word for word, idx in word2idx.items()}
vocab_size = len(vocab)

# Membuat model Transformer sederhana
class Transformer(tf.keras.Model):
    def __init__(self, vocab_size, d_model, num_heads, dff, num_layers):
        super(Transformer, self).__init__()
        self.encoder = Encoder(vocab_size, d_model, num_heads, dff, num_layers)

    def call(self, inputs):
        return self.encoder(inputs)

class Encoder(tf.keras.layers.Layer):
    def __init__(self, vocab_size, d_model, num_heads, dff, num_layers):
        super(Encoder, self).__init__()
        self.embedding = tf.keras.layers.Embedding(vocab_size, d_model)
        self.pos_encoding = positional_encoding(maximum_position_encoding, d_model)
        self.enc_layers = [EncoderLayer(d_model, num_heads, dff) for _ in range(num_layers)]
        self.dropout = tf.keras.layers.Dropout(0.1)

    def call(self, inputs):
        seq_len = tf.shape(inputs)[1]
        x = self.embedding(inputs)
        x += self.pos_encoding[:, :seq_len, :]
        x = self.dropout(x)
        for enc_layer in self.enc_layers:
            x = enc_layer(x)
        return x

class EncoderLayer(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, dff):
        super(EncoderLayer, self).__init__()
        self.mha = MultiHeadAttention(d_model, num_heads)
        self.ffn = point_wise_feed_forward_network(d_model, dff)
        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = tf.keras.layers.Dropout(0.1)
        self.dropout2 = tf.keras.layers.Dropout(0.1)

    def call(self, x):
        attn_output, _ = self.mha(x, x, x)
        attn_output = self.dropout1(attn_output)
        out1 = self.layernorm1(x + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output)
        out2 = self.layernorm2(out1 + ffn_output)
        return out2

def positional_encoding(position, d_model):
    angle_rads = get_angles(np.arange(position)[:, np.newaxis], np.arange(d_model)[np.newaxis, :], d_model)
    # Membuat posisi genap menggunakan sin dan posisi ganjil menggunakan cos
    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
    pos_encoding = angle_rads[np.newaxis, ...]
    return tf.cast(pos_encoding, dtype=tf.float32)

def get_angles(pos, i, d_model):
    angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(d_model))
    return pos * angle_rates

def point_wise_feed_forward_network(d_model, dff):
    return tf.keras.Sequential([
        tf.keras.layers.Dense(dff, activation='relu'),  # (batch_size, seq_len, dff)
        tf.keras.layers.Dense(d_model)  # (batch_size, seq_len, d_model)
    ])

# Hyperparameters
d_model = 4
num_heads = 2
dff = 8
num_layers = 2
maximum_position_encoding = 10000

# Membuat model Transformer
transformer = Transformer(vocab_size, d_model, num_heads, dff, num_layers)

# Membuat input dan output (untuk latihan sederhana, kita gunakan satu input untuk encoder)
inputs = np.array([[word2idx[word] for word in sentence] for sentence in dataset])
outputs = transformer(inputs)
print(outputs.shape)  # Output: (3, 3, 4) - (batch_size, sequence_length, d_model)


NameError: name 'MultiHeadAttention' is not defined