<a href="https://colab.research.google.com/github/elangbijak4/LLM-SLM-Examples/blob/main/Rev2_Hitung_Bobot_WQ_WK_WV_Layer_Enkoder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
!pip install --upgrade tensorflow

Collecting tensorflow
  Downloading tensorflow-2.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (589.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m589.8/589.8 MB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
Collecting h5py>=3.10.0 (from tensorflow)
  Downloading h5py-3.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.3/5.3 MB[0m [31m34.4 MB/s[0m eta [36m0:00:00[0m
Collecting ml-dtypes~=0.3.1 (from tensorflow)
  Downloading ml_dtypes-0.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m44.9 MB/s[0m eta [36m0:00:00[0m
Collecting tensorboard<2.17,>=2.16 (from tensorflow)
  Downloading tensorboard-2.16.2-py3-none-any.whl (5.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m28.0 MB/s[0m eta [36m0:00:00[0m
[?25

In [4]:
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Dense, TimeDistributed
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.optimizers import Adam

# Fungsi untuk membuat model Language Model
def build_language_model(vocab_size, embedding_dim, lstm_units):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dim, mask_zero=True),
        tf.keras.layers.LSTM(lstm_units, return_sequences=True),
        tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(vocab_size, activation='softmax'))
    ])
    return model

# Contoh data input dan target
input_data = tf.constant([[1, 2, 3, 4], [5, 6, 7, 8]])  # Contoh batch dengan panjang sekuens 4
target_data = tf.constant([[2, 3, 4, 5], [6, 7, 8, 9]])  # Target adalah token selanjutnya dalam setiap sekuens

# Hyperparameters
vocab_size = 10000
embedding_dim = 256
lstm_units = 512
learning_rate = 0.001
batch_size = 2
num_epochs = 10

# Inisialisasi model
model = build_language_model(vocab_size, embedding_dim, lstm_units)

# Inisialisasi optimizer dan loss function
optimizer = Adam(learning_rate=learning_rate)
loss_fn = SparseCategoricalCrossentropy()

# Ubah data input dan target menjadi objek Dataset
input_dataset = tf.data.Dataset.from_tensor_slices(input_data)
target_dataset = tf.data.Dataset.from_tensor_slices(target_data)

# Gabungkan input dan target menjadi satu dataset
dataset = tf.data.Dataset.zip((input_dataset, target_dataset))

# Batch dataset
batched_dataset = dataset.batch(batch_size)

# Proses pelatihan
for epoch in range(num_epochs):
    total_loss = 0.0

    for batch_inputs, batch_targets in batched_dataset:
        with tf.GradientTape() as tape:
            # Forward pass
            predictions = model(batch_inputs)
            loss = loss_fn(batch_targets, predictions)

        # Hitung gradien
        gradients = tape.gradient(loss, model.trainable_variables)

        # Update bobot model menggunakan optimizer Adam
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        total_loss += loss

    avg_loss = total_loss / (len(input_data) // batch_size)
    print(f'Epoch {epoch + 1}, Loss: {avg_loss}')

    # Tampilkan bobot dari layer Embedding, LSTM, dan Dense
    print("Embedding weights:")
    print(model.layers[0].get_weights()[0])  # Bobot layer Embedding
    print("LSTM weights:")
    print(model.layers[1].get_weights())     # Bobot layer LSTM
    print("Dense weights:")
    print(model.layers[2].get_weights())     # Bobot layer Dense

print("Training selesai!")

Epoch 1, Loss: 9.210573196411133
Embedding weights:
[[-0.02512415  0.04174279 -0.04002928 ... -0.0102726   0.00034293
   0.01374712]
 [ 0.03294295  0.03596812  0.0426126  ...  0.04474736 -0.00488594
  -0.01463312]
 [-0.01046726  0.03904081 -0.05069197 ... -0.02469442  0.0338889
   0.00599896]
 ...
 [-0.02806597 -0.0109309  -0.01019118 ... -0.03515537  0.03566002
  -0.02802562]
 [-0.00375626 -0.00140073  0.03481344 ...  0.01442082  0.01386089
   0.00569982]
 [ 0.0470213   0.00272004  0.00178163 ...  0.02184321 -0.0297488
  -0.03445563]]
LSTM weights:
[array([[ 0.00336694,  0.0400167 , -0.0029756 , ...,  0.01870552,
        -0.0404182 , -0.01368228],
       [-0.03944996, -0.01463773, -0.0434203 , ..., -0.02625771,
         0.00800121,  0.01783141],
       [-0.04359723, -0.00613987,  0.01508364, ...,  0.03381531,
         0.00322672,  0.02918964],
       ...,
       [-0.01126161, -0.00544745, -0.02711342, ...,  0.01078841,
        -0.01612252, -0.01007359],
       [-0.02091834, -0.0444020