<a href="https://colab.research.google.com/github/elangbijak4/LLM-Research/blob/main/Simple_1Enkoder_1Dekoder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
# Definisikan model transformer
class TransformerModel(nn.Module):
    def __init__(self, input_dim, embed_dim, nhead, dim_feedforward, output_dim):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Embedding(input_dim, embed_dim)
        self.transformer = nn.Transformer(
            d_model=embed_dim,
            nhead=nhead,
            num_encoder_layers=1,
            num_decoder_layers=1,
            dim_feedforward=dim_feedforward
        )
        self.fc_out = nn.Linear(embed_dim, output_dim)

    def forward(self, src, tgt):
        src = self.embedding(src)
        tgt = self.embedding(tgt)
        src = src.permute(1, 0, 2)  # (seq_len, batch_size, embed_dim)
        tgt = tgt.permute(1, 0, 2)  # (seq_len, batch_size, embed_dim)
        output = self.transformer(src, tgt)
        output = output.permute(1, 0, 2)  # (batch_size, seq_len, embed_dim)
        output = self.fc_out(output)
        return output

# Hyperparameters
input_dim = 10  # jumlah token dalam input
embed_dim = 32  # dimensi embedding
nhead = 2  # jumlah heads dalam multihead attention
dim_feedforward = 64  # dimensi feedforward network
output_dim = 10  # jumlah token dalam output
seq_length = 5  # panjang sequence
batch_size = 2  # ukuran batch
num_epochs = 100  # jumlah epochs
learning_rate = 0.001  # laju pembelajaran

# Data dummy untuk training
src = torch.randint(0, input_dim, (batch_size, seq_length))
tgt = torch.randint(0, output_dim, (batch_size, seq_length))
target_output = tgt.clone()

# Model, loss function, dan optimizer
model = TransformerModel(input_dim, embed_dim, nhead, dim_feedforward, output_dim)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    output = model(src, tgt[:, :-1])
    loss = criterion(output.reshape(-1, output_dim), target_output[:, 1:].reshape(-1))
    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Model inference (autoregressive decoding)
model.eval()
with torch.no_grad():
    src = torch.randint(0, input_dim, (1, seq_length))  # input baru untuk prediksi
    tgt = torch.zeros((1, seq_length), dtype=torch.long)  # inisialisasi output sequence
    for i in range(seq_length):
        output = model(src, tgt[:, :i+1])
        tgt[:, i] = output.argmax(dim=-1)[:, i]

    print("Predicted sequence:", tgt)



Epoch [10/100], Loss: 1.7158
Epoch [20/100], Loss: 1.1568
Epoch [30/100], Loss: 0.8491
Epoch [40/100], Loss: 0.5094
Epoch [50/100], Loss: 0.3378
Epoch [60/100], Loss: 0.2680
Epoch [70/100], Loss: 0.1942
Epoch [80/100], Loss: 0.1575
Epoch [90/100], Loss: 0.1190
Epoch [100/100], Loss: 0.1050
Predicted sequence: tensor([[6, 6, 6, 6, 6]])
