In [1]:
# imports
!pip install datasets tokenizers
import os
import math
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
from pathlib import Path
from datasets import load_dataset
from tqdm import tqdm
from tokenizers import Tokenizer
from tokenizers.models import BPE
from tokenizers.trainers import BpeTrainer
from tokenizers.pre_tokenizers import Whitespace

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (

In [2]:
dirs = ["./malaygpt", "./tokenizer_en", "./tokenizer_my"]
for dir in dirs:
  if os.path.exists(dir):
    continue
  os.mkdir(dir)

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


# Download Dataset

In [4]:
# English/Malay pairs from HuggingFace
train_dataset = load_dataset("Helsinki-NLP/opus-100", "en-ms", split='train')
validation_dataset = load_dataset("Helsinki-NLP/opus-100", "en-ms", split='validation')

# Limit the amount of data for training purposes
raw_train_dataset, rt_to_skip = random_split(train_dataset, [1500, len(train_dataset) - 1500])
raw_validation_dataset, vt_to_skip = random_split(validation_dataset, [50, len(validation_dataset) - 50])

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/65.4k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/132k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/57.1M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/132k [00:00<?, ?B/s]

Generating test split:   0%|          | 0/2000 [00:00<?, ? examples/s]

Generating train split:   0%|          | 0/1000000 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/2000 [00:00<?, ? examples/s]

# Tokenizer

In [5]:
# Returns a generator list from a dataset of the given language
def get_ds_iterator(raw_train_dataset, lang):
  for data in raw_train_dataset:
    yield data["translation"][lang]

# Create English source tokenizer
tokenizer_en = Tokenizer(BPE(unk_token="[UNK]"))
trainer_en = BpeTrainer(min_frequency=2, special_tokens=["[PAD]", "[UNK]", "[CLS]", "[SEP]", "[MASK]"])
# Pre-tokenizer to split input into words
tokenizer_en.pre_tokenizer = Whitespace()
tokenizer_en.train_from_iterator(get_ds_iterator(raw_train_dataset, "en"), trainer=trainer_en)
tokenizer_en.save("./tokenizer_en/tokenizer_en.json")

# Create Malay source tokenizer
tokenizer_my = Tokenizer(BPE(unk_token="[UNK]"))
trainer_my = BpeTrainer(min_frequency=2, special_tokens=["[PAD]", "[UNK]", "[CLS]", "[SEP]", "[MASK]"])
# Pre-tokenizer to split input into words
tokenizer_my.pre_tokenizer = Whitespace()
tokenizer_my.train_from_iterator(get_ds_iterator(raw_train_dataset, "ms"), trainer=trainer_my)
tokenizer_my.save("./tokenizer_my/tokenizer_my.json")

In [6]:
# Retrieve tokenizers we made
tokenizer_en = Tokenizer.from_file("./tokenizer_en/tokenizer_en.json")
tokenizer_my = Tokenizer.from_file("./tokenizer_my/tokenizer_my.json")

# Get the vocab sizes
source_vocab_size = tokenizer_en.get_vocab_size()
target_vocab_size = tokenizer_my.get_vocab_size()

In [7]:
max_seq_len_source = 0
max_seq_len_target = 0

# Calculate the max sequence length in the training dataset for source/target
for data in raw_train_dataset:
  enc_ids = tokenizer_en.encode(data["translation"]["en"]).ids
  dec_ids = tokenizer_my.encode(data["translation"]["ms"]).ids
  max_seq_len_source = max(max_seq_len_source, len(enc_ids))
  max_seq_len_target = max(max_seq_len_target, len(dec_ids))

print("Source vocab max sequence length:", max_seq_len_source)
print("Target vocab max sequence length:", max_seq_len_target)

Source vocab max sequence length: 80
Target vocab max sequence length: 127


In [8]:
# Standard max sequence length for training, with buffer for padding, the classification token, unknown tokens, separator tokens, etc.
max_seq_len = 155

# Dataset and Dataloader

In [9]:
# Causal mask to hide future tokens
def causal_mask(size):
  # Square matrix with ones in the lower triangle: size x size
  mask = torch.triu(torch.ones(1, size, size), diagonal=1).type(torch.int)
  return mask == 0

In [10]:
# Encode raw dataset to be processed by the model
class EncodeDataset(Dataset):
  def __init__(self, raw_dataset, max_seq_len):
    super().__init__()
    self.raw_dataset = raw_dataset
    self.max_seq_len = max_seq_len

  def __len__(self):
    return len(self.raw_dataset)

  def __getitem__(self, index):
    # Fetch data (in both English and Malay) for the given index
    raw_text = self.raw_dataset[index]

    # Separate text into source and target
    source_text = raw_text["translation"]["en"]
    target_text = raw_text["translation"]["ms"]

    # Encode text
    source_text_encoded = tokenizer_en.encode(source_text).ids
    target_text_encoded = tokenizer_my.encode(target_text).ids

    # Convert CLS, SEP, and PAD to their vocab index id using the tokenizer
    # Start of sentence token
    CLS_ID = torch.tensor([tokenizer_my.token_to_id("[CLS]")], dtype=torch.int64)
    # End of sentence token
    SEP_ID = torch.tensor([tokenizer_my.token_to_id("[SEP]")], dtype=torch.int64)
    # Padding token
    PAD_ID = torch.tensor([tokenizer_my.token_to_id("[PAD]")], dtype=torch.int64)

    # Amount to pad the encoded text
    num_source_padding = self.max_seq_len - len(source_text_encoded) - 2
    num_target_padding = self.max_seq_len - len(target_text_encoded) - 1
    encoder_padding = torch.tensor([PAD_ID] * num_source_padding, dtype=torch.int64)
    decoder_padding = torch.tensor([PAD_ID] * num_target_padding, dtype=torch.int64)

    # CLS + source encoding + SEP + padding
    encoder_input = torch.cat([CLS_ID, torch.tensor(source_text_encoded, dtype=torch.int64), SEP_ID, encoder_padding], dim=0)
    # CLS + target encoding + padding
    decoder_input = torch.cat([CLS_ID, torch.tensor(target_text_encoded, dtype=torch.int64), decoder_padding], dim=0)

    # target encoding + SEP + padding
    target_label = torch.cat([torch.tensor(target_text_encoded, dtype=torch.int64), SEP_ID, decoder_padding], dim=0)

    # Masks to ignore padding
    encoder_mask = (encoder_input != PAD_ID).unsqueeze(0).unsqueeze(0).int()
    # Apply causal mask to decoder mask, so that the decoder can't see future tokens when predicting the next token
    decoder_mask = (decoder_input != PAD_ID).unsqueeze(0).unsqueeze(0).int() & causal_mask(decoder_input.size(0))

    return {
        "encoder_input": encoder_input,
        "decoder_input": decoder_input,
        "target_label": target_label,
        "encoder_mask": encoder_mask,
        "decoder_mask": decoder_mask,
        "source_text": source_text,
        "target_text": target_text
    }

In [11]:
# Create encoded datasets
train_ds = EncodeDataset(raw_train_dataset, max_seq_len)
val_ds = EncodeDataset(raw_validation_dataset, max_seq_len)

# Create dataloaders to use in the model
train_dataloader = DataLoader(train_ds, batch_size=5, shuffle=True)
val_dataloader = DataLoader(val_ds, batch_size=1, shuffle=True)

In [12]:
'''
encoder_input: Encoded source text with start and end of sentence tokens and padding
decoder_input: Encoded target text with start of sentence token and padding
target_label: Encoded target text with padding
encoder_mask: Mask to ignore padding in the encoder input
decoder_mask: (Causal) mask to ignore padding in the decoder input
source_text: Original source text
target_text: Original target text
'''
train_ds.__getitem__(0)

{'encoder_input': tensor([  2, 660,  31,   3,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0]),
 'decoder_input': tensor([  2, 901,  30,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
  

# Input Embedding and Positional Encoding

In [13]:
# Embedding layer with normalized embeddings
class EmbeddingLayer(nn.Module):
  def __init__(self, d_model: int, vocab_size: int):
    super().__init__()
    self.d_model = d_model
    # Embedding layer to map token ids to embeddings (vocab_size x d_model)
    self.embedding = nn.Embedding(vocab_size, d_model)

  def forward(self, input):
    # Multiply embedding by the sqrt(d_model) to normalize the output
    embedding_output = self.embedding(input) * math.sqrt(self.d_model)
    return embedding_output

In [14]:
# Positional encoding layer
class PositionalEncoding(nn.Module):
  def __init__(self, d_model: int, max_seq_len: int, dropout_rate: float):
    super().__init__()
    self.dropout = nn.Dropout(dropout_rate)

    # Init positional encodings, positions
    pe = torch.zeros(max_seq_len, d_model)
    pos = torch.arange(0, max_seq_len, dtype=torch.float).unsqueeze(1)
    # 1 / (10000 ** (2 * i / d_model))
    div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))

    # Apply div term to positional encodings, with sin/cos depending on even/odd dimensions
    pe[:, 0::2] = torch.sin(pos * div_term)
    pe[:, 1::2] = torch.cos(pos * div_term)

    # Add batch dimension
    # pe: 1 x seq_len x d_model
    pe = pe.unsqueeze(0)
    # Ensure that the positional encodings are a part of the model, but not trainable
    self.register_buffer("pe", pe)

  def forward(self, input_embedding):
    # input_embedding: batch_size x seq_len x d_model
    input_embedding = input_embedding + (self.pe[:, :input_embedding.shape[1], :]).requires_grad_(False)
    return self.dropout(input_embedding)

# Multi-Head Attention

In [15]:
# Multihead attention block to get context
class MultiHeadAttention(nn.Module):
  def __init__(self, d_model: int, num_heads: int, dropout_rate: float):
    super().__init__()
    self.dropout = nn.Dropout(dropout_rate)
    self.num_heads = num_heads

    # d_model must be divisible by the number of heads
    assert d_model % num_heads == 0

    # Dimension of each self attention head
    self.d_k = d_model // num_heads

    # Init weight matrices
    self.W_q = nn.Linear(d_model, d_model, bias=False)
    self.W_k = nn.Linear(d_model, d_model, bias=False)
    self.W_v = nn.Linear(d_model, d_model, bias=False)
    self.W_o = nn.Linear(d_model, d_model, bias=False)

  def forward(self, q, k, v, encoder_mask):
    # q, k, v: batch_size x seq_len x d_model

    # Multiply input embeddings by weights
    query = self.W_q(q)
    key = self.W_k(k)
    value = self.W_v(v)

    # Divide query, key, and value into the number of heads
    # query, key, value: batch_size x num_heads x seq_len x d_k
    query = query.view(query.shape[0], query.shape[1], self.num_heads, self.d_k).transpose(1, 2)
    key = key.view(key.shape[0], key.shape[1], self.num_heads, self.d_k).transpose(1, 2)
    value = value.view(value.shape[0], value.shape[1], self.num_heads, self.d_k).transpose(1, 2)

    # SELF ATTENTION BLOCK
    # -------------------------

    # Attention score based on the similarity between the query and key
    # attention_score: batch_size x num_heads x seq_len x seq_len
    attention_score = (query @ key.transpose(-2, -1)) / math.sqrt(self.d_k)

    # Apply encoder/causal mask
    if encoder_mask is not None:
      attention_score.masked_fill_(encoder_mask == 0, -1e9)

    # Apply softmax
    attention_score = attention_score.softmax(dim=-1)

    # Apply dropout
    if self.dropout is not None:
      attention_score = self.dropout(attention_score)

    # Multiply attention score with the value
    # attention_output: batch_size x num_heads x seq_len x d_k
    attention_output = attention_score @ value

    # -------------------------

    # Concatenate all the output heads
    # attention_output: batch_size x seq_len x d_model
    attention_output = attention_output.transpose(1, 2).contiguous().view(attention_output.shape[0], -1, self.num_heads * self.d_k)

    # Multiply attention output by output weights
    multihead_output = self.W_o(attention_output)

    return multihead_output

# Feedforward, Layer Normalization, and AddAndNorm

In [16]:
# Two linear layers, with dropout and ReLU activation
class FeedForward(nn.Module):
  def __init__(self, d_model: int, d_ff: int, dropout_rate: float):
    super().__init__()
    self.dropout = nn.Dropout(dropout_rate)
    self.layer_1 = nn.Linear(d_model, d_ff)
    self.layer_2 = nn.Linear(d_ff, d_model)

  def forward(self, input):
    return self.layer_2(self.dropout(torch.relu(self.layer_1(input))))

In [17]:
# Layer normalization with scaling (gamma) and shifting (beta)
class LayerNorm(nn.Module):
  def __init__(self, eps: float = 1e-5):
    super().__init__()
    # Epsilon is for divide-by-zero errors
    self.eps = eps
    # Extra learning params to scale and shift embedding values; same number of weights as d_model
    self.gamma = nn.Parameter(torch.ones(512))
    self.beta = nn.Parameter(torch.zeros(512))

  def forward(self, input):
    mean = input.mean(dim=-1, keepdim=True)
    std = input.std(dim=-1, keepdim=True)
    return self.gamma * (input - mean) / (std + self.eps) + self.beta

In [18]:
# Layer normalization and skip connection
class AddAndNorm(nn.Module):
  def __init__(self, dropout_rate: float):
    super().__init__()
    self.dropout = nn.Dropout(dropout_rate)
    self.layer_norm = LayerNorm()

  def forward(self, input, sub_layer):
    return input + self.dropout(sub_layer(self.layer_norm(input)))

# Encoder Block and Encoder

In [19]:
# Multihead attention and feed forward blocks, with add-and-norm
class EncoderBlock(nn.Module):
  def __init__(self, multihead_attention: MultiHeadAttention, feed_forward: FeedForward, dropout_rate: float) -> None:
    super().__init__()
    self.multihead_attention = multihead_attention
    self.feed_forward = feed_forward
    self.addnorm_1 = AddAndNorm(dropout_rate)
    self.addnorm_2 = AddAndNorm(dropout_rate)

  def forward(self, encoder_input, encoder_mask):
    # Encoder input from skip connection and Multihead Attention block
    encoder_input = self.addnorm_1(encoder_input, lambda encoder_input: self.multihead_attention(encoder_input, encoder_input, encoder_input, encoder_mask))
    # Multihead Attention output from skip connection and Feed Forward block
    encoder_input = self.addnorm_2(encoder_input, self.feed_forward)

    return encoder_input

In [20]:
# Multiple encoder blocks and layer normalization
class Encoder(nn.Module):
  def __init__(self, encoderblocklist: nn.ModuleList) -> None:
    super().__init__()
    self.encoderblocklist = encoderblocklist
    self.layer_norm = LayerNorm()

  def forward(self, encoder_input, encoder_mask):
    # Loop input through all encoder blocks
    for encoderblock in self.encoderblocklist:
      encoder_input = encoderblock(encoder_input, encoder_mask)
    # Normalize the final encoder block output
    encoder_output = self.layer_norm(encoder_input)
    return encoder_output

# Decoder Block, Decoder, and Projection Layer

In [21]:
# Masked multihead attention, cross multihead attention from encoder output, and feed forward blocks, with add-and-norm
class DecoderBlock(nn.Module):
  def __init__(self, masked_multihead_attention: MultiHeadAttention, cross_multihead_attention: MultiHeadAttention, feed_forward: FeedForward, dropout_rate: float) -> None:
    super().__init__()
    # Uses a causal mask
    self.masked_multihead_attention = masked_multihead_attention
    # Uses multihead attention from the output of the encoder
    self.cross_multihead_attention = cross_multihead_attention
    self.feed_forward = feed_forward
    self.addnorm_1 = AddAndNorm(dropout_rate)
    self.addnorm_2 = AddAndNorm(dropout_rate)
    self.addnorm_3 = AddAndNorm(dropout_rate)

  def forward(self, decoder_input, encoder_output, encoder_mask, decoder_mask):
    # Decoder input from skip connection and Masked Multihead Attention block
    decoder_input = self.addnorm_1(decoder_input, lambda decoder_input: self.masked_multihead_attention(decoder_input, decoder_input, decoder_input, decoder_mask))
    # Masked Multihead Attention output from skip connection and Cross Multihead Attention block
    decoder_input = self.addnorm_2(decoder_input, lambda decoder_input: self.cross_multihead_attention(decoder_input, encoder_output, encoder_output, encoder_mask))
    # Cross Multihead Attention output from skip connection and Feed Forward block
    decoder_input = self.addnorm_3(decoder_input, self.feed_forward)
    return decoder_input

In [22]:
# Multiple decoder blocks and layer normalization
class Decoder(nn.Module):
  def __init__(self, decoderblocklist: nn.ModuleList) -> None:
    super().__init__()
    self.decoderblocklist = decoderblocklist
    self.layer_norm = LayerNorm()

  def forward(self, decoder_input, encoder_output, encoder_mask, decoder_mask):
    # Loop input through all decoder blocks
    for decoderblock in self.decoderblocklist:
      decoder_input = decoderblock(decoder_input, encoder_output, encoder_mask, decoder_mask)
    # Normalize the final decoder block output
    decoder_output = self.layer_norm(decoder_input)
    return decoder_output

In [23]:
# Linear layer and softmax activation
class ProjectionLayer(nn.Module):
  def __init__(self, d_model: int, vocab_size: int) -> None:
    super().__init__()
    self.projection_layer = nn.Linear(d_model, vocab_size)

  def forward(self, decoder_output):
    # output: batch_size x seq_len x vocab_size
    output = self.projection_layer(decoder_output)
    return output

# Transformer

In [24]:
# Full transformer model; encodes embeddings, decodes outputs, and projects predictions
class Transformer(nn.Module):
  def __init__(self, encoder: Encoder, decoder: Decoder, source_embed: EmbeddingLayer, target_embed: EmbeddingLayer, source_pos: PositionalEncoding, target_pos: PositionalEncoding, projection_layer: ProjectionLayer) -> None:
    super().__init__()
    # Encode
    self.source_embed = source_embed
    self.source_pos = source_pos
    self.encoder = encoder

    # Decode
    self.target_embed = target_embed
    self.target_pos = target_pos
    self.decoder = decoder

    # Maps decoder output to vocabulary
    self.projection_layer = projection_layer

  def encode(self, encoder_input, encoder_mask):
    encoder_input = self.source_embed(encoder_input)
    encoder_input = self.source_pos(encoder_input)
    encoder_output = self.encoder(encoder_input, encoder_mask)
    return encoder_output

  def decode(self, encoder_output, encoder_mask, decoder_input, decoder_mask):
    decoder_input = self.target_embed(decoder_input)
    decoder_input = self.target_pos(decoder_input)
    decoder_output = self.decoder(decoder_input, encoder_output, encoder_mask, decoder_mask)
    return decoder_output

  def project(self, decoder_output):
    return self.projection_layer(decoder_output)

In [25]:
def build_model(source_vocab_size, target_vocab_size, source_seq_len, target_seq_len, d_model=512, num_blocks=6, num_heads=8, dropout_rate=0.1, d_ff=2048):
  # Embedding layers
  source_embed = EmbeddingLayer(d_model, source_vocab_size)
  target_embed = EmbeddingLayer(d_model, target_vocab_size)

  # Positional encoding layers
  source_pos = PositionalEncoding(d_model, source_seq_len, dropout_rate)
  target_pos = PositionalEncoding(d_model, target_seq_len, dropout_rate)

  # Encoder block list
  encoderblocklist = []
  for _ in range(num_blocks):
    multihead_attention = MultiHeadAttention(d_model, num_heads, dropout_rate)
    feed_forward = FeedForward(d_model, d_ff, dropout_rate)
    encoder_block = EncoderBlock(multihead_attention, feed_forward, dropout_rate)
    encoderblocklist.append(encoder_block)
  # Encoder
  encoder = Encoder(nn.ModuleList(encoderblocklist))

  # Decoder block list
  decoderblocklist = []
  for _ in range(num_blocks):
    masked_multihead_attention = MultiHeadAttention(d_model, num_heads, dropout_rate)
    cross_multihead_attention = MultiHeadAttention(d_model, num_heads, dropout_rate)
    feed_forward = FeedForward(d_model, d_ff, dropout_rate)
    decoder_block = DecoderBlock(masked_multihead_attention, cross_multihead_attention, feed_forward, dropout_rate)
    decoderblocklist.append(decoder_block)
  # Decoder
  decoder = Decoder(nn.ModuleList(decoderblocklist))

  # Projection layer
  projection_layer = ProjectionLayer(d_model, target_vocab_size)

  # Transformer
  model = Transformer(encoder, decoder, source_embed, target_embed, source_pos, target_pos, projection_layer)

  # Init model params
  for p in model.parameters():
    if p.dim() > 1:
      nn.init.xavier_uniform_(p)

  return model

In [26]:
# Create model
model = build_model(tokenizer_en.get_vocab_size(), tokenizer_my.get_vocab_size(), max_seq_len, max_seq_len, d_model=512).to(device)

print(model)

Transformer(
  (source_embed): EmbeddingLayer(
    (embedding): Embedding(1939, 512)
  )
  (source_pos): PositionalEncoding(
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): Encoder(
    (encoderblocklist): ModuleList(
      (0-5): 6 x EncoderBlock(
        (multihead_attention): MultiHeadAttention(
          (dropout): Dropout(p=0.1, inplace=False)
          (W_q): Linear(in_features=512, out_features=512, bias=False)
          (W_k): Linear(in_features=512, out_features=512, bias=False)
          (W_v): Linear(in_features=512, out_features=512, bias=False)
          (W_o): Linear(in_features=512, out_features=512, bias=False)
        )
        (feed_forward): FeedForward(
          (dropout): Dropout(p=0.1, inplace=False)
          (layer_1): Linear(in_features=512, out_features=2048, bias=True)
          (layer_2): Linear(in_features=2048, out_features=512, bias=True)
        )
        (addnorm_1): AddAndNorm(
          (dropout): Dropout(p=0.1, inplace=False)
         

# Validation

In [27]:
def run_validation(model, validation_ds, tokenizer_en, tokenizer_my, max_seq_len, device, print_msg, global_step):
  # Change model to only evaluate
  model.eval()
  count = 0

  # Don"t calculate gradients during evaluation
  with torch.no_grad():
    for batch in validation_ds:
      count += 1

      # Get input and mask
      encoder_input = batch["encoder_input"].to(device)
      encoder_mask = batch["encoder_mask"].to(device)

      # Begin and end of sentence tokens
      cls_id = tokenizer_my.token_to_id("[CLS]")
      sep_id = tokenizer_my.token_to_id("[SEP]")

      # Calculate output of the encoder from the val sequence
      encoder_output = model.encode(encoder_input, encoder_mask)

      # Decoder input first token is the beginning of sentence token
      decoder_input = torch.empty(1, 1).fill_(cls_id).type_as(encoder_input).to(device)

      # Iteratively add tokens
      while True:
        # Decoder input is the max length
        if decoder_input.size(1) == max_seq_len:
          break

        # Recreate causal mask for token prediction with a new decoder input
        decoder_mask = causal_mask(decoder_input.size(1)).type_as(encoder_mask).to(device)

        # Get probabilities for the next token
        out = model.decode(encoder_output, encoder_mask, decoder_input, decoder_mask)
        prob = model.project(out[:, -1])

        # Greedily get the next token with the highest probability
        _, next_word = torch.max(prob, dim=1)

        # Add predicted token to the decoder input
        decoder_input = torch.cat([decoder_input, torch.empty(1, 1).type_as(encoder_input).fill_(next_word.item()).to(device)], dim=1)

        # Next token is the end of sentence token
        if next_word == sep_id:
          break

      model_out = decoder_input.squeeze(0)

      # Get source text, target text, and predicted text
      source_text = batch["source_text"][0]
      target_text = batch["target_text"][0]
      model_out_text = tokenizer_my.decode(model_out.detach().cpu().numpy())

      print_msg("-" * 55)
      print_msg(f"Source Text: {source_text}")
      print_msg(f"Target Text: {target_text}")
      print_msg(f"Predicted by MalayGPT: {model_out_text}")

      if count == 2:
        break

# Training

In [28]:
def train_model(preload_epoch=None):
  EPOCHS = 100
  initial_epoch = 0
  global_step = 0

  optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, eps=1e-9)

  # Start at preloaded epoch, weights, and optimizer
  if preload_epoch is not None:
    # Load model
    model_filename = f"./malaygpt/model_{preload_epoch}.pth"
    state = torch.load(model_filename)
    model.load_state_dict(state["model_state_dict"])
    # Get initial epoch
    initial_epoch = state["epoch"] + 1
    # Get initial optimizer
    optimizer.load_state_dict(state["optimizer_state_dict"])
    global_step = state["global_step"]

  loss_fn = nn.CrossEntropyLoss(ignore_index=tokenizer_en.token_to_id("[PAD]"), label_smoothing=0.1).to(device)

  for epoch in range(initial_epoch, EPOCHS):
    # Change model to train
    model.train()
    # Load dataset batches
    batch_iterator = tqdm(train_dataloader, desc=f"Processing Epoch {epoch:02d}")
    for batch in batch_iterator:
      # batch_size x seq_len
      encoder_input = batch["encoder_input"].to(device)
      # batch_size x seq_len
      decoder_input = batch["decoder_input"].to(device)
      # batch_size x 1 x 1 x seq_len
      encoder_mask = batch["encoder_mask"].to(device)
      # batch_size x 1 x seq_len x seq_len
      decoder_mask = batch["decoder_mask"].to(device)
      # batch_size x seq_len
      target_label = batch["target_label"].to(device)

      # batch_size x seq_len x d_model
      encoder_output = model.encode(encoder_input, encoder_mask)
      decoder_output = model.decode(encoder_output, encoder_mask, decoder_input, decoder_mask)
      # batch_size x seq_len x vocab_size
      projection_output = model.project(decoder_output)

      # Calculate loss of the batch
      loss = loss_fn(projection_output.view(-1, tokenizer_my.get_vocab_size()), target_label.view(-1))
      batch_iterator.set_postfix({"loss": f"{loss.item():6.3f}"})
      loss.backward()

      optimizer.step()
      optimizer.zero_grad(set_to_none=True)

      global_step += 1

    # Run validation after every epoch
    run_validation(model, val_dataloader, tokenizer_en, tokenizer_my, max_seq_len, device, lambda msg: batch_iterator.write(msg), global_step)

    model_filename = f"./malaygpt/model_{epoch}.pt"
    torch.save({
        "epoch": epoch,
        "model_state_dict": model.state_dict(),
        "optimizer_state_dict": optimizer.state_dict(),
        "global_step": global_step
    }, model_filename)

In [None]:
train_model(preload_epoch=None)

Processing Epoch 00: 100%|██████████| 300/300 [00:31<00:00,  9.41it/s, loss=1.514]


-------------------------------------------------------
Source Text: Case Sens
Target Text: Sensi Kata
Predicted by MalayGPT: C anti
-------------------------------------------------------
Source Text: Some kid gave to me.
Target Text: Ada budak berikannya pada saya.
Predicted by MalayGPT: - Siapa punya ini .


Processing Epoch 01: 100%|██████████| 300/300 [00:30<00:00,  9.70it/s, loss=1.544]


-------------------------------------------------------
Source Text: WOMAN: There's a mix-up with my reservation.
Target Text: Ada kesilapan dalam tempahan saya.
Predicted by MalayGPT: Aku akan menembak nya ! tapi pen cu I di sini .
-------------------------------------------------------
Source Text: How is Young Do doing?
Target Text: Yeong Do. okeykah?
Predicted by MalayGPT: Bagaimana dengan An dy ?


Processing Epoch 02: 100%|██████████| 300/300 [00:31<00:00,  9.66it/s, loss=1.365]


-------------------------------------------------------
Source Text: Too bad I'm not younger, or I'd lend you a hand.
Target Text: Aku tak akan menolong kamu
Predicted by MalayGPT: Aku tak nampak kamu mati di sini utk memas tikan awak .
-------------------------------------------------------
Source Text: I'll kill him, the useless little sewer rat!
Target Text: Aku akan bunuh dia!
Predicted by MalayGPT: Aku dah sedia , aku takkan berharap pada O re os hi m yang men el efon nya .


Processing Epoch 03: 100%|██████████| 300/300 [00:31<00:00,  9.57it/s, loss=1.387]


-------------------------------------------------------
Source Text: -Hey, Angela.
Target Text: - Hei, Angela.
Predicted by MalayGPT: Oh , baw akan en am .
-------------------------------------------------------
Source Text: How is Young Do doing?
Target Text: Yeong Do. okeykah?
Predicted by MalayGPT: Adakah peguam nya membuat apa - apa ke ma juan ?


Processing Epoch 04: 100%|██████████| 300/300 [00:31<00:00,  9.64it/s, loss=1.252]


-------------------------------------------------------
Source Text: What do you mean?
Target Text: Apa maksud kamu?
Predicted by MalayGPT: Apa yang awak maksudkan dengan L ana ?
-------------------------------------------------------
Source Text: You don't have to write anything down to be a poet.
Target Text: [Arthur] Anda dont mempunyai untuk menulis sesuatu bawah menjadi seorang penyair.
Predicted by MalayGPT: Anda tidak boleh meletakkan ia dalam minuman .


Processing Epoch 05: 100%|██████████| 300/300 [00:31<00:00,  9.64it/s, loss=1.300]


-------------------------------------------------------
Source Text: Your condolences are appreciated but your help is unnecessary unless you can name the Americans responsible.
Target Text: Takziah kamu dihargai tetapi bantuan kamu tidak perlu melainkan kamu boleh menamakan rakyat Amerika yang bertanggungjawab.
Predicted by MalayGPT: Hanya beberapa lelaki yang membunuh untuk yang masuk ke per ga d uhan bar hidup .
-------------------------------------------------------
Source Text: Password _type:
Target Text: _Katalaluan:
Predicted by MalayGPT: P eg awai par k ing hanya ber tugas di bahagian barat s


Processing Epoch 06: 100%|██████████| 300/300 [00:31<00:00,  9.62it/s, loss=1.603]


-------------------------------------------------------
Source Text: There are leafs in the way.
Target Text: Ada reranting dan daun.
Predicted by MalayGPT: - Ada yang mau beli makanan .
-------------------------------------------------------
Source Text: You don't have to write anything down to be a poet.
Target Text: [Arthur] Anda dont mempunyai untuk menulis sesuatu bawah menjadi seorang penyair.
Predicted by MalayGPT: Kau tak buat salah .


Processing Epoch 07: 100%|██████████| 300/300 [00:31<00:00,  9.64it/s, loss=1.323]


-------------------------------------------------------
Source Text: #There is a budding flower And I'm mad for her#
Target Text: # Terdapat bunga tunas dan saya gila padanya #
Predicted by MalayGPT: G el in tar pemacu pen cetak untuk di muat turun
-------------------------------------------------------
Source Text: You're pulling my leg, aren't you?
Target Text: Kau nak menentang aku?
Predicted by MalayGPT: Kau katakan pada mereka aku melakukan ini , okey ?


Processing Epoch 08: 100%|██████████| 300/300 [00:31<00:00,  9.67it/s, loss=1.359]


-------------------------------------------------------
Source Text: Then stop right now!
Target Text: Dah, berhenti...
Predicted by MalayGPT: Sekarang kau nak sekarang !
-------------------------------------------------------
Source Text: -Hey, Angela.
Target Text: - Hei, Angela.
Predicted by MalayGPT: - Dia juga cuba tengok .


Processing Epoch 09: 100%|██████████| 300/300 [00:31<00:00,  9.53it/s, loss=1.285]


-------------------------------------------------------
Source Text: This guy is going to have a lot of surveillance.
Target Text: This guy is going to have a lot of surveillance.
Predicted by MalayGPT: Orang ini mempunyai pulau dan j et sendiri , he bat sekali .
-------------------------------------------------------
Source Text: Chris Ryan instead opts to take a knee.
Target Text: Chris Ryan malah melutut.
Predicted by MalayGPT: F le k si bil iti M ini mal di laluan pen dekat an .


Processing Epoch 10: 100%|██████████| 300/300 [00:31<00:00,  9.66it/s, loss=1.249]


-------------------------------------------------------
Source Text: But the past few days.. ...we've been doing rounds of the visa and passport office.
Target Text: Tapi sejak beberapa hari... ..kami sibuk mengurus visa dan passport.
Predicted by MalayGPT: Tapi k ini , kami mem an dang ke hada pan bahawa sebagai rak ya t kita tidak tunduk .
-------------------------------------------------------
Source Text: Mr Raj, folks I like can call me Chutki
Target Text: Raj, orang yang saya suka boleh panggil saya Chutki
Predicted by MalayGPT: U ni ah aku nak be benang carian CD


Processing Epoch 11: 100%|██████████| 300/300 [00:31<00:00,  9.62it/s, loss=1.245]


-------------------------------------------------------
Source Text: Too bad I'm not younger, or I'd lend you a hand.
Target Text: Aku tak akan menolong kamu
Predicted by MalayGPT: Aku tak tahu macam mana kau , tapi takkan bertanya tentang awak .
-------------------------------------------------------
Source Text: Mr Raj, folks I like can call me Chutki
Target Text: Raj, orang yang saya suka boleh panggil saya Chutki
Predicted by MalayGPT: U ni , In di i put nya .


Processing Epoch 12: 100%|██████████| 300/300 [00:31<00:00,  9.62it/s, loss=1.260]


-------------------------------------------------------
Source Text: Who are you? I am Genus.
Target Text: Baiklah.
Predicted by MalayGPT: Siapakah kamu tahu macam mana ?
-------------------------------------------------------
Source Text: Take their statement.
Target Text: Ambil kenyataan mereka.
Predicted by MalayGPT: L en ny .


Processing Epoch 13: 100%|██████████| 300/300 [00:31<00:00,  9.60it/s, loss=1.326]


-------------------------------------------------------
Source Text: The black queen.
Target Text: Ratu hitam.
Predicted by MalayGPT: Ser onok tengok dua orang ni berg om o i lagi .
-------------------------------------------------------
Source Text: I'll kill him, the useless little sewer rat!
Target Text: Aku akan bunuh dia!
Predicted by MalayGPT: Aku dah sedia untuk kembali ber kerja .


Processing Epoch 14: 100%|██████████| 300/300 [00:31<00:00,  9.65it/s, loss=1.374]


-------------------------------------------------------
Source Text: Case Sens
Target Text: Sensi Kata
Predicted by MalayGPT: C os s
-------------------------------------------------------
Source Text: WOMAN: There's a mix-up with my reservation.
Target Text: Ada kesilapan dalam tempahan saya.
Predicted by MalayGPT: ( ME N G E L U H ) s untuk hal yang akan menyak it iku .


Processing Epoch 15: 100%|██████████| 300/300 [00:31<00:00,  9.62it/s, loss=1.267]


-------------------------------------------------------
Source Text: Man, hold the bag, stupid.
Target Text: Pegang beg itu, bodoh.
Predicted by MalayGPT: En . Ye ag er , saya James Sa v o y .
-------------------------------------------------------
Source Text: I drive the squirrel!
Target Text: Aku mengendarai tupai.
Predicted by MalayGPT: S ana !


Processing Epoch 16: 100%|██████████| 300/300 [00:31<00:00,  9.64it/s, loss=1.237]


-------------------------------------------------------
Source Text: Man, hold the bag, stupid.
Target Text: Pegang beg itu, bodoh.
Predicted by MalayGPT: G a ben or , Rick , sesiapa yang memerlukan bal di Kencing mereka dik osong kan dan anda .
-------------------------------------------------------
Source Text: Case Sens
Target Text: Sensi Kata
Predicted by MalayGPT: C as s .


Processing Epoch 17: 100%|██████████| 300/300 [00:31<00:00,  9.63it/s, loss=1.211]


-------------------------------------------------------
Source Text: Some kid gave to me.
Target Text: Ada budak berikannya pada saya.
Predicted by MalayGPT: Awak selamat sekarang .
-------------------------------------------------------
Source Text: This guy is going to have a lot of surveillance.
Target Text: This guy is going to have a lot of surveillance.
Predicted by MalayGPT: Orang ini mempunyai pulau dan j et sendiri , he bat sekali .


Processing Epoch 18: 100%|██████████| 300/300 [00:31<00:00,  9.63it/s, loss=1.276]


-------------------------------------------------------
Source Text: If it's after school hours, you're just gonna lay there all night until 7:00 a.m., when I'll come and save your life.
Target Text: Kalau selepas waktu sekolah kau perlu teruskan sehingga 7:00 pagi. Ketika itu, baru aku datang selamatkan kau.
Predicted by MalayGPT: Jika itu tak kau suka i , tapi akan pastikan dan betul .
-------------------------------------------------------
Source Text: I've never seen John Stockton smile so much.
Target Text: Aku tak pernah Lihat John Stockton segembira sebegini.
Predicted by MalayGPT: Aku tak pernah melihat v am pir baru berada p tasi sungguh cepat


Processing Epoch 19: 100%|██████████| 300/300 [00:31<00:00,  9.68it/s, loss=1.224]


-------------------------------------------------------
Source Text: Small
Target Text: Kecil
Predicted by MalayGPT: S ir
-------------------------------------------------------
Source Text: He gives victory to whom He wills, and He is the Exalted in Might, the Merciful.
Target Text: Ia memberi kemenangan kepada sesiapa yang dikehendakiNya, dan Dia lah jua yang Maha Kuasa, lagi Maha Mengasihani.
Predicted by MalayGPT: Dia pernah member inya , tapi saya tidak menerimanya ..


Processing Epoch 20: 100%|██████████| 300/300 [00:31<00:00,  9.61it/s, loss=1.202]


-------------------------------------------------------
Source Text: But the past few days.. ...we've been doing rounds of the visa and passport office.
Target Text: Tapi sejak beberapa hari... ..kami sibuk mengurus visa dan passport.
Predicted by MalayGPT: Tapi saya dengar ada beberapa kedai yang masih terima selepas tamat tem p oh .
-------------------------------------------------------
Source Text: Some kid gave to me.
Target Text: Ada budak berikannya pada saya.
Predicted by MalayGPT: Budak p anda i .


Processing Epoch 21: 100%|██████████| 300/300 [00:30<00:00,  9.69it/s, loss=1.317]


-------------------------------------------------------
Source Text: Who are you? I am Genus.
Target Text: Baiklah.
Predicted by MalayGPT: Siapakah kamu sar apan .
-------------------------------------------------------
Source Text: But the past few days.. ...we've been doing rounds of the visa and passport office.
Target Text: Tapi sejak beberapa hari... ..kami sibuk mengurus visa dan passport.
Predicted by MalayGPT: Tapi cuma ada satu au to dok .


Processing Epoch 22: 100%|██████████| 300/300 [00:31<00:00,  9.66it/s, loss=1.195]


-------------------------------------------------------
Source Text: He's gone, Barry.
Target Text: Dia sudah pergi, Barry.
Predicted by MalayGPT: Beliau telah .
-------------------------------------------------------
Source Text: You're pulling my leg, aren't you?
Target Text: Kau nak menentang aku?
Predicted by MalayGPT: Kau tak tahu apa yang kau hada p i .


Processing Epoch 23: 100%|██████████| 300/300 [00:31<00:00,  9.59it/s, loss=1.201]


-------------------------------------------------------
Source Text: The special names @DEFAULT_SINK@, @DEFAULT_SOURCE@ and @DEFAULT_MONITOR@ can be used to specify the default sink, source and monitor.
Target Text: Nama khas @DEFAULT_SINK@, @DEFAULT_SOURCE@ and @DEFAULT_MONITOR@ boleh digunakan untuk nyatakan sinki, sumber dan monitor lalai.
Predicted by MalayGPT: Di sini kom an der ( Be c ker ) ber bi cara
-------------------------------------------------------
Source Text: - This isn't the moon!
Target Text: - Kita tak tinggal di Bulan!
Predicted by MalayGPT: - Ini dapat membuat perasa anmu !


Processing Epoch 24: 100%|██████████| 300/300 [00:31<00:00,  9.63it/s, loss=1.209]


-------------------------------------------------------
Source Text: Take their statement.
Target Text: Ambil kenyataan mereka.
Predicted by MalayGPT: Mem perlahan kan denyutan jantung sehingga se denyutan per - minit .
-------------------------------------------------------
Source Text: WOMAN: There's a mix-up with my reservation.
Target Text: Ada kesilapan dalam tempahan saya.
Predicted by MalayGPT: L eka s , cepat lakukan , Bar b .


Processing Epoch 25: 100%|██████████| 300/300 [00:30<00:00,  9.68it/s, loss=1.259]


-------------------------------------------------------
Source Text: Some kid gave to me.
Target Text: Ada budak berikannya pada saya.
Predicted by MalayGPT: Bukan per iba di
-------------------------------------------------------
Source Text: WOMAN: There's a mix-up with my reservation.
Target Text: Ada kesilapan dalam tempahan saya.
Predicted by MalayGPT: G aya bing ka i sudah mempunyai butang untuk f ung si % s keadaan % s keadaan


Processing Epoch 26: 100%|██████████| 300/300 [00:31<00:00,  9.66it/s, loss=1.203]


-------------------------------------------------------
Source Text: What do you mean?
Target Text: Apa maksud kamu?
Predicted by MalayGPT: Apa yang kamu semua lakukan ?
-------------------------------------------------------
Source Text: The black queen.
Target Text: Ratu hitam.
Predicted by MalayGPT: Mak hluk an eh membawa


Processing Epoch 27: 100%|██████████| 300/300 [00:31<00:00,  9.65it/s, loss=1.193]


-------------------------------------------------------
Source Text: "So we went to my room, and guess what he showed me?"
Target Text: "Jadi kami pergi ke bilik saya, dan teka apa yang dia tunjukkan kepada saya?"
Predicted by MalayGPT: Dan apa jua perintah yang dibawa oleh Ras u lu l lah ( s . a . w ) kepada kamu maka ter im alah serta amal kan , dan apa jua yang di larang Nya kamu melakukannya maka patuh ilah lar angan Nya .
-------------------------------------------------------
Source Text: Case Sens
Target Text: Sensi Kata
Predicted by MalayGPT: K al en dar


Processing Epoch 28: 100%|██████████| 300/300 [00:31<00:00,  9.66it/s, loss=1.219]


-------------------------------------------------------
Source Text: Some kid gave to me.
Target Text: Ada budak berikannya pada saya.
Predicted by MalayGPT: Entah bagaimana saya tidak merasa selamat seperti sebelum ini .
-------------------------------------------------------
Source Text: Who are you? I am Genus.
Target Text: Baiklah.
Predicted by MalayGPT: Siapakah kamu tahu bahawa aku ni ?


Processing Epoch 29: 100%|██████████| 300/300 [00:31<00:00,  9.61it/s, loss=1.253]


-------------------------------------------------------
Source Text: I'll tell you like I tell them...
Target Text: Saya akan beritahu kamu seperti saya beritahu yg lain.
Predicted by MalayGPT: Saya m amal ia akan te saya ...
-------------------------------------------------------
Source Text: Too bad I'm not younger, or I'd lend you a hand.
Target Text: Aku tak akan menolong kamu
Predicted by MalayGPT: Aku tak tahu macam mana tang g apan kau , tapi ban d yang aku ken da li kan untuk J V C , pem a in d rum aku tak se su ai .


Processing Epoch 30: 100%|██████████| 300/300 [00:31<00:00,  9.66it/s, loss=1.211]


-------------------------------------------------------
Source Text: Then stop right now!
Target Text: Dah, berhenti...
Predicted by MalayGPT: Sekarang dia kembali ke P agar Tiga !
-------------------------------------------------------
Source Text: The crowds are very enthusiastic about this contest.
Target Text: Penonton sekali teruja dengan pertandingan ini.
Predicted by MalayGPT: Jadi bawa turun ping gan m ang ku k dan mula mem bas uh .


Processing Epoch 31: 100%|██████████| 300/300 [00:31<00:00,  9.67it/s, loss=1.260]


-------------------------------------------------------
Source Text: Mr Raj, folks I like can call me Chutki
Target Text: Raj, orang yang saya suka boleh panggil saya Chutki
Predicted by MalayGPT: En . Ye ag er , saya James Sa v o y .
-------------------------------------------------------
Source Text: What do you mean?
Target Text: Apa maksud kamu?
Predicted by MalayGPT: Apa pandangan kamu ?


Processing Epoch 32: 100%|██████████| 300/300 [00:31<00:00,  9.67it/s, loss=1.190]


-------------------------------------------------------
Source Text: Gopi, listen to me!
Target Text: GoPi, dengarkan aku!
Predicted by MalayGPT: Pergi , pergi ke saya !
-------------------------------------------------------
Source Text: Picked up a tail.
Target Text: Mengambil ekor.
Predicted by MalayGPT: R upa - rupanya be g ini .


Processing Epoch 33: 100%|██████████| 300/300 [00:31<00:00,  9.66it/s, loss=1.193]


-------------------------------------------------------
Source Text: The black queen.
Target Text: Ratu hitam.
Predicted by MalayGPT: The F o ur Se as ons of n d .
-------------------------------------------------------
Source Text: Password _type:
Target Text: _Katalaluan:
Predicted by MalayGPT: P indah atau salin fail yang dipilih oleh arahan P ot ong Fail atau Salin Fail sebelum ini


Processing Epoch 34: 100%|██████████| 300/300 [00:31<00:00,  9.66it/s, loss=1.227]


-------------------------------------------------------
Source Text: WOMAN: There's a mix-up with my reservation.
Target Text: Ada kesilapan dalam tempahan saya.
Predicted by MalayGPT: ( ME N G E L U H )
-------------------------------------------------------
Source Text: I didn't say anything, but I can talk to him if you want.
Target Text: Saya tak ada cakap apa-apa. Tapi saya boleh cakap dengan dia kalau kamu mahu.
Predicted by MalayGPT: Saya tak tanya apa yang awak cakap , .. tapi awak tidak di rumah kamu .


Processing Epoch 35: 100%|██████████| 300/300 [00:30<00:00,  9.68it/s, loss=1.208]


-------------------------------------------------------
Source Text: There are leafs in the way.
Target Text: Ada reranting dan daun.
Predicted by MalayGPT: S au dara selam anya !
-------------------------------------------------------
Source Text: You don't have to write anything down to be a poet.
Target Text: [Arthur] Anda dont mempunyai untuk menulis sesuatu bawah menjadi seorang penyair.
Predicted by MalayGPT: Anda tidak boleh meletakkan ia dalam minuman .


Processing Epoch 36: 100%|██████████| 300/300 [00:30<00:00,  9.68it/s, loss=1.217]


-------------------------------------------------------
Source Text: I've never seen John Stockton smile so much.
Target Text: Aku tak pernah Lihat John Stockton segembira sebegini.
Predicted by MalayGPT: Aku tak pernah melihat v am pir baru berada p tasi sungguh cepat
-------------------------------------------------------
Source Text: I'll tell you like I tell them...
Target Text: Saya akan beritahu kamu seperti saya beritahu yg lain.
Predicted by MalayGPT: Saya beritahu pesan an itu , " saya cin takan awak ."


Processing Epoch 37: 100%|██████████| 300/300 [00:31<00:00,  9.62it/s, loss=1.221]


-------------------------------------------------------
Source Text: I didn't say anything, but I can talk to him if you want.
Target Text: Saya tak ada cakap apa-apa. Tapi saya boleh cakap dengan dia kalau kamu mahu.
Predicted by MalayGPT: Saya tak tahu , tapi awak tahu , saya akan cari tahu esok .. saya akan men el efon nya .
-------------------------------------------------------
Source Text: I've never seen John Stockton smile so much.
Target Text: Aku tak pernah Lihat John Stockton segembira sebegini.
Predicted by MalayGPT: Aku pernah melihat v am pir baru berada p tasi sungguh cepat


Processing Epoch 38: 100%|██████████| 300/300 [00:31<00:00,  9.65it/s, loss=1.200]


-------------------------------------------------------
Source Text: -Hey, Angela.
Target Text: - Hei, Angela.
Predicted by MalayGPT: - Hey , lebih baik .
-------------------------------------------------------
Source Text: Some kid gave to me.
Target Text: Ada budak berikannya pada saya.
Predicted by MalayGPT: Entah bagaimana saya tidak merasa selamat seperti sebelum ini untuk bersama .


Processing Epoch 39: 100%|██████████| 300/300 [00:31<00:00,  9.63it/s, loss=1.194]


-------------------------------------------------------
Source Text: - This isn't the moon!
Target Text: - Kita tak tinggal di Bulan!
Predicted by MalayGPT: - Ini satu arahan !
-------------------------------------------------------
Source Text: Case Sens
Target Text: Sensi Kata
Predicted by MalayGPT: C a se y .


Processing Epoch 40: 100%|██████████| 300/300 [00:31<00:00,  9.67it/s, loss=1.242]


-------------------------------------------------------
Source Text: Animatable
Target Text: Animatable
Predicted by MalayGPT: A ni mas i
-------------------------------------------------------
Source Text: How is Young Do doing?
Target Text: Yeong Do. okeykah?
Predicted by MalayGPT: Adakah anda benar - benar mahu pada mkan .


Processing Epoch 41: 100%|██████████| 300/300 [00:31<00:00,  9.67it/s, loss=1.210]


-------------------------------------------------------
Source Text: He gives victory to whom He wills, and He is the Exalted in Might, the Merciful.
Target Text: Ia memberi kemenangan kepada sesiapa yang dikehendakiNya, dan Dia lah jua yang Maha Kuasa, lagi Maha Mengasihani.
Predicted by MalayGPT: Dia curi h alam an upacara ter larang itu .
-------------------------------------------------------
Source Text: He's gone, Barry.
Target Text: Dia sudah pergi, Barry.
Predicted by MalayGPT: Dia curi h alam an .


Processing Epoch 42: 100%|██████████| 300/300 [00:31<00:00,  9.62it/s, loss=1.163]


-------------------------------------------------------
Source Text: Some kid gave to me.
Target Text: Ada budak berikannya pada saya.
Predicted by MalayGPT: Awak ambil risiko ini ... ketika awak mula tidur dengan orang lain .
-------------------------------------------------------
Source Text: I'll tell aunt that you went to Masakei with colleague last week.
Target Text: Aku akan memberitahu bibi bahawa anda pergi ke Masakei dengan rakan minggu lalu.
Predicted by MalayGPT: Aku dah sedia untuk kembali ber kerja .


Processing Epoch 43: 100%|██████████| 300/300 [00:31<00:00,  9.64it/s, loss=1.185]


-------------------------------------------------------
Source Text: Good news, Caroline.
Target Text: Berita baik, Caroline.
Predicted by MalayGPT: S um p ah , C 1 , 2 un di ...
-------------------------------------------------------
Source Text: Too bad I'm not younger, or I'd lend you a hand.
Target Text: Aku tak akan menolong kamu
Predicted by MalayGPT: Aku tak tahu macam mana tang g apan kau , tapi ban d yang aku ken da li kan untuk J V C , pem a in d rum aku tak se su ai .


Processing Epoch 44: 100%|██████████| 300/300 [00:31<00:00,  9.59it/s, loss=1.209]


-------------------------------------------------------
Source Text: I'll tell you like I tell them...
Target Text: Saya akan beritahu kamu seperti saya beritahu yg lain.
Predicted by MalayGPT: Saya beritahu pesan an itu , " saya cin takan awak ."
-------------------------------------------------------
Source Text: Picked up a tail.
Target Text: Mengambil ekor.
Predicted by MalayGPT: Di sini kom an der ( Be c ker ) ber bi cara


Processing Epoch 45: 100%|██████████| 300/300 [00:31<00:00,  9.60it/s, loss=1.248]


-------------------------------------------------------
Source Text: I drive the squirrel!
Target Text: Aku mengendarai tupai.
Predicted by MalayGPT: S um p ah !
-------------------------------------------------------
Source Text: I'll tell you like I tell them...
Target Text: Saya akan beritahu kamu seperti saya beritahu yg lain.
Predicted by MalayGPT: Saya beritahu pesan an itu , " saya cin takan awak ."


Processing Epoch 46: 100%|██████████| 300/300 [00:31<00:00,  9.59it/s, loss=1.209]


-------------------------------------------------------
Source Text: The special names @DEFAULT_SINK@, @DEFAULT_SOURCE@ and @DEFAULT_MONITOR@ can be used to specify the default sink, source and monitor.
Target Text: Nama khas @DEFAULT_SINK@, @DEFAULT_SOURCE@ and @DEFAULT_MONITOR@ boleh digunakan untuk nyatakan sinki, sumber dan monitor lalai.
Predicted by MalayGPT: W ish " Be bo la D ag ing P is s ing " lebih berjaya !
-------------------------------------------------------
Source Text: Then stop right now!
Target Text: Dah, berhenti...
Predicted by MalayGPT: Sekarang !


Processing Epoch 47: 100%|██████████| 300/300 [00:31<00:00,  9.61it/s, loss=1.163]


-------------------------------------------------------
Source Text: There are leafs in the way.
Target Text: Ada reranting dan daun.
Predicted by MalayGPT: Tidak ada pak saan dalam ugama .
-------------------------------------------------------
Source Text: He gives victory to whom He wills, and He is the Exalted in Might, the Merciful.
Target Text: Ia memberi kemenangan kepada sesiapa yang dikehendakiNya, dan Dia lah jua yang Maha Kuasa, lagi Maha Mengasihani.
Predicted by MalayGPT: Ia mempunyai S an ta , s ku ter di curi dan gu dang ter beng kal ai .


Processing Epoch 48: 100%|██████████| 300/300 [00:31<00:00,  9.65it/s, loss=1.171]


-------------------------------------------------------
Source Text: This guy is going to have a lot of surveillance.
Target Text: This guy is going to have a lot of surveillance.
Predicted by MalayGPT: Orang - orang ini harus mati .
-------------------------------------------------------
Source Text: -Hey, Angela.
Target Text: - Hei, Angela.
Predicted by MalayGPT: Hei , perik sa jalan ini .


Processing Epoch 49: 100%|██████████| 300/300 [00:31<00:00,  9.51it/s, loss=1.163]


-------------------------------------------------------
Source Text: How is Young Do doing?
Target Text: Yeong Do. okeykah?
Predicted by MalayGPT: Awak kenal dia atau cuma ?
-------------------------------------------------------
Source Text: I'll tell you like I tell them...
Target Text: Saya akan beritahu kamu seperti saya beritahu yg lain.
Predicted by MalayGPT: Saya beritahu pesan an itu , " saya cin takan awak ."


Processing Epoch 50: 100%|██████████| 300/300 [00:31<00:00,  9.65it/s, loss=1.191]


-------------------------------------------------------
Source Text: Too bad I'm not younger, or I'd lend you a hand.
Target Text: Aku tak akan menolong kamu
Predicted by MalayGPT: Aku tak tahu macam mana tang g apan kau , tapi ban d yang aku ken da li kan untuk J V C , pem a in d rum aku tak se su ai .
-------------------------------------------------------
Source Text: Take their statement.
Target Text: Ambil kenyataan mereka.
Predicted by MalayGPT: Ma gu ir e .


Processing Epoch 51: 100%|██████████| 300/300 [00:31<00:00,  9.62it/s, loss=1.165]


-------------------------------------------------------
Source Text: Come on!
Target Text: Ayuh!
Predicted by MalayGPT: Ayuh !
-------------------------------------------------------
Source Text: The black queen.
Target Text: Ratu hitam.
Predicted by MalayGPT: Mak hluk an eh membawa T angan Putih .


Processing Epoch 52: 100%|██████████| 300/300 [00:31<00:00,  9.62it/s, loss=1.161]


-------------------------------------------------------
Source Text: But the past few days.. ...we've been doing rounds of the visa and passport office.
Target Text: Tapi sejak beberapa hari... ..kami sibuk mengurus visa dan passport.
Predicted by MalayGPT: Tapi cuma ada satu au to dok .
-------------------------------------------------------
Source Text: Gopi, listen to me!
Target Text: GoPi, dengarkan aku!
Predicted by MalayGPT: D en yutan jantung se mak in perlahan Pergi kepada me dik dengan Segera .


Processing Epoch 53: 100%|██████████| 300/300 [00:31<00:00,  9.67it/s, loss=1.205]


-------------------------------------------------------
Source Text: KID ZONE
Target Text: ZON KANAK-KANAK
Predicted by MalayGPT: S E M U A Ro ar
-------------------------------------------------------
Source Text: Too bad I'm not younger, or I'd lend you a hand.
Target Text: Aku tak akan menolong kamu
Predicted by MalayGPT: Aku tak tahu macam mana tang g apan kau , tapi ban d yang aku ken da li kan untuk J V C , pem a in d rum aku tak se su ai .


Processing Epoch 54: 100%|██████████| 300/300 [00:31<00:00,  9.67it/s, loss=1.166]


-------------------------------------------------------
Source Text: Take their statement.
Target Text: Ambil kenyataan mereka.
Predicted by MalayGPT: Encik P ick les , saya rasa sudah cukup untuk hari ini , bos .
-------------------------------------------------------
Source Text: The crowds are very enthusiastic about this contest.
Target Text: Penonton sekali teruja dengan pertandingan ini.
Predicted by MalayGPT: Orang - orang ini harus mati .


Processing Epoch 55: 100%|██████████| 300/300 [00:31<00:00,  9.64it/s, loss=1.176]


-------------------------------------------------------
Source Text: Your condolences are appreciated but your help is unnecessary unless you can name the Americans responsible.
Target Text: Takziah kamu dihargai tetapi bantuan kamu tidak perlu melainkan kamu boleh menamakan rakyat Amerika yang bertanggungjawab.
Predicted by MalayGPT: Hanya s anya orang - orang yang meny ahu t seru anmu itu ialah mereka yang mendengar ( yang mahu menurut kebenaran ); sedang orang - orang yang mati Allah bangkit kan mereka semula ( pada hari kiamat kel ak ), kemudian mereka dikembalikan kepadaNya untuk menerima balasan .
-------------------------------------------------------
Source Text: - But I think you already know that.
Target Text: - Tapi, tentu awak sudah tahu.
Predicted by MalayGPT: - Tapi awak perlukan bantuan .


Processing Epoch 56: 100%|██████████| 300/300 [00:31<00:00,  9.63it/s, loss=1.166]


-------------------------------------------------------
Source Text: WOMAN: There's a mix-up with my reservation.
Target Text: Ada kesilapan dalam tempahan saya.
Predicted by MalayGPT: W ah ... yang ber nasib baik ..
-------------------------------------------------------
Source Text: The special names @DEFAULT_SINK@, @DEFAULT_SOURCE@ and @DEFAULT_MONITOR@ can be used to specify the default sink, source and monitor.
Target Text: Nama khas @DEFAULT_SINK@, @DEFAULT_SOURCE@ and @DEFAULT_MONITOR@ boleh digunakan untuk nyatakan sinki, sumber dan monitor lalai.
Predicted by MalayGPT: or ton , H Jeff re e 8 7 4 27 39 9 3 p ra ju rit .


Processing Epoch 57: 100%|██████████| 300/300 [00:30<00:00,  9.69it/s, loss=1.165]


-------------------------------------------------------
Source Text: If it's after school hours, you're just gonna lay there all night until 7:00 a.m., when I'll come and save your life.
Target Text: Kalau selepas waktu sekolah kau perlu teruskan sehingga 7:00 pagi. Ketika itu, baru aku datang selamatkan kau.
Predicted by MalayGPT: Jika itu tak kau suka i , berdo alah
-------------------------------------------------------
Source Text: I didn't say anything, but I can talk to him if you want.
Target Text: Saya tak ada cakap apa-apa. Tapi saya boleh cakap dengan dia kalau kamu mahu.
Predicted by MalayGPT: Saya tak tahu , tapi awak tahu , saya akan cari tahu esok .. saya akan men el efon nya .


Processing Epoch 58: 100%|██████████| 300/300 [00:31<00:00,  9.59it/s, loss=1.176]


-------------------------------------------------------
Source Text: If it's after school hours, you're just gonna lay there all night until 7:00 a.m., when I'll come and save your life.
Target Text: Kalau selepas waktu sekolah kau perlu teruskan sehingga 7:00 pagi. Ketika itu, baru aku datang selamatkan kau.
Predicted by MalayGPT: Jika itu tak kau suka i , berdo alah
-------------------------------------------------------
Source Text: I've never seen John Stockton smile so much.
Target Text: Aku tak pernah Lihat John Stockton segembira sebegini.
Predicted by MalayGPT: Aku tak pernah melihat v am pir baru berada p tasi sungguh cepat


Processing Epoch 59: 100%|██████████| 300/300 [00:31<00:00,  9.61it/s, loss=1.183]


-------------------------------------------------------
Source Text: Then stop right now!
Target Text: Dah, berhenti...
Predicted by MalayGPT: Lakukan sekarang !
-------------------------------------------------------
Source Text: & View
Target Text: & Lihat
Predicted by MalayGPT: & Ok


Processing Epoch 60: 100%|██████████| 300/300 [00:31<00:00,  9.64it/s, loss=1.181]


-------------------------------------------------------
Source Text: -Hey, Angela.
Target Text: - Hei, Angela.
Predicted by MalayGPT: - Hey , dan lebih dulu .
-------------------------------------------------------
Source Text: WOMAN: There's a mix-up with my reservation.
Target Text: Ada kesilapan dalam tempahan saya.
Predicted by MalayGPT: RO ME : budak anda .


Processing Epoch 61: 100%|██████████| 300/300 [00:31<00:00,  9.48it/s, loss=1.168]


-------------------------------------------------------
Source Text: I've never seen John Stockton smile so much.
Target Text: Aku tak pernah Lihat John Stockton segembira sebegini.
Predicted by MalayGPT: Aku tak pernah melihat v am pir baru berada p tasi sungguh cepat
-------------------------------------------------------
Source Text: cannot create DHCP BPF socket: %s
Target Text: tidak dapat cipta soket DHCP BPF: %s
Predicted by MalayGPT: % s telah mem bat alkan tugas .


Processing Epoch 62: 100%|██████████| 300/300 [00:30<00:00,  9.68it/s, loss=1.196]


-------------------------------------------------------
Source Text: I didn't say anything, but I can talk to him if you want.
Target Text: Saya tak ada cakap apa-apa. Tapi saya boleh cakap dengan dia kalau kamu mahu.
Predicted by MalayGPT: Saya tak tahu , tapi awak tahu , saya akan cari tahu esok .. saya akan men el efon nya .
-------------------------------------------------------
Source Text: Too bad I'm not younger, or I'd lend you a hand.
Target Text: Aku tak akan menolong kamu
Predicted by MalayGPT: Aku tak tahu macam mana tang g apan kau , tapi ban d yang aku ken da li kan untuk J V C , pem a in d rum aku tak se su ai .


Processing Epoch 63: 100%|██████████| 300/300 [00:31<00:00,  9.67it/s, loss=1.157]


-------------------------------------------------------
Source Text: I'll tell aunt that you went to Masakei with colleague last week.
Target Text: Aku akan memberitahu bibi bahawa anda pergi ke Masakei dengan rakan minggu lalu.
Predicted by MalayGPT: Aku dah sedia untuk kembali ber kerja di V er dan tidak yang menikam kedu anya .
-------------------------------------------------------
Source Text: I didn't say anything, but I can talk to him if you want.
Target Text: Saya tak ada cakap apa-apa. Tapi saya boleh cakap dengan dia kalau kamu mahu.
Predicted by MalayGPT: saya tak tahu , apa yang awak cakap .


Processing Epoch 64: 100%|██████████| 300/300 [00:31<00:00,  9.65it/s, loss=1.172]


-------------------------------------------------------
Source Text: Animatable
Target Text: Animatable
Predicted by MalayGPT: A ni mas i
-------------------------------------------------------
Source Text: I didn't say anything, but I can talk to him if you want.
Target Text: Saya tak ada cakap apa-apa. Tapi saya boleh cakap dengan dia kalau kamu mahu.
Predicted by MalayGPT: saya tak tahu , tapi awak tahu , saya akan cari tahu esok .. saya akan men el efon nya .


Processing Epoch 65: 100%|██████████| 300/300 [00:31<00:00,  9.67it/s, loss=1.153]


-------------------------------------------------------
Source Text: Gopi, listen to me!
Target Text: GoPi, dengarkan aku!
Predicted by MalayGPT: G e org e , kalau kau boleh .
-------------------------------------------------------
Source Text: Take their statement.
Target Text: Ambil kenyataan mereka.
Predicted by MalayGPT: Kamu memiliki 39 yen .


Processing Epoch 66: 100%|██████████| 300/300 [00:31<00:00,  9.67it/s, loss=1.162]


-------------------------------------------------------
Source Text: The special names @DEFAULT_SINK@, @DEFAULT_SOURCE@ and @DEFAULT_MONITOR@ can be used to specify the default sink, source and monitor.
Target Text: Nama khas @DEFAULT_SINK@, @DEFAULT_SOURCE@ and @DEFAULT_MONITOR@ boleh digunakan untuk nyatakan sinki, sumber dan monitor lalai.
Predicted by MalayGPT: Jadi men ag apa aku harus melihat gambar orang - orang yang tidak aku kenal ?
-------------------------------------------------------
Source Text: I didn't say anything, but I can talk to him if you want.
Target Text: Saya tak ada cakap apa-apa. Tapi saya boleh cakap dengan dia kalau kamu mahu.
Predicted by MalayGPT: Saya tak jika aku beritahu kau sekarang .


Processing Epoch 67: 100%|██████████| 300/300 [00:30<00:00,  9.68it/s, loss=1.154]


-------------------------------------------------------
Source Text: Ang Lee is a mentor to me and my family
Target Text: Ang Lee adalah mentor saya dan juga keluarga saya
Predicted by MalayGPT: Lit erasi pel bagai daripada p rim itif untuk ke dok tor an .
-------------------------------------------------------
Source Text: - This isn't the moon!
Target Text: - Kita tak tinggal di Bulan!
Predicted by MalayGPT: - Jangan sentuh itu !


Processing Epoch 68: 100%|██████████| 300/300 [00:31<00:00,  9.67it/s, loss=1.160]


-------------------------------------------------------
Source Text: Animatable
Target Text: Animatable
Predicted by MalayGPT: A ni mas i
-------------------------------------------------------
Source Text: Your condolences are appreciated but your help is unnecessary unless you can name the Americans responsible.
Target Text: Takziah kamu dihargai tetapi bantuan kamu tidak perlu melainkan kamu boleh menamakan rakyat Amerika yang bertanggungjawab.
Predicted by MalayGPT: Hanya s anya orang - orang yang meny ahu t seru anmu itu ialah mereka yang mendengar ( yang mahu menurut kebenaran ); sedang orang - orang yang mati Allah bangkit kan mereka semula ( pada hari kiamat kel ak ), kemudian mereka dikembalikan kepadaNya untuk menerima balasan .


Processing Epoch 69: 100%|██████████| 300/300 [00:30<00:00,  9.68it/s, loss=1.166]


-------------------------------------------------------
Source Text: #There is a budding flower And I'm mad for her#
Target Text: # Terdapat bunga tunas dan saya gila padanya #
Predicted by MalayGPT: Se bagai seorang Jen dr al , tidak harus selalu menggunakan m il it er di ban ding tak tik , menggunakan org harus lo ya l .
-------------------------------------------------------
Source Text: "So we went to my room, and guess what he showed me?"
Target Text: "Jadi kami pergi ke bilik saya, dan teka apa yang dia tunjukkan kepada saya?"
Predicted by MalayGPT: Jadi ke selur uhan nya .


Processing Epoch 70: 100%|██████████| 300/300 [00:31<00:00,  9.64it/s, loss=1.161]


-------------------------------------------------------
Source Text: WOMAN: There's a mix-up with my reservation.
Target Text: Ada kesilapan dalam tempahan saya.
Predicted by MalayGPT: G a ben or , Rick , sesiapa yang memerlukan bal di Kencing mereka dik osong kan dan anda datang berjalan .
-------------------------------------------------------
Source Text: You don't have to write anything down to be a poet.
Target Text: [Arthur] Anda dont mempunyai untuk menulis sesuatu bawah menjadi seorang penyair.
Predicted by MalayGPT: Anda tidak boleh meletakkan ia dalam minuman .


Processing Epoch 71: 100%|██████████| 300/300 [00:31<00:00,  9.68it/s, loss=1.166]


-------------------------------------------------------
Source Text: "So we went to my room, and guess what he showed me?"
Target Text: "Jadi kami pergi ke bilik saya, dan teka apa yang dia tunjukkan kepada saya?"
Predicted by MalayGPT: Jadi men ag apa aku harus melihat gambar orang - orang yang tidak aku kenal ?
-------------------------------------------------------
Source Text: Ang Lee is a mentor to me and my family
Target Text: Ang Lee adalah mentor saya dan juga keluarga saya
Predicted by MalayGPT: Aku punya perminta an bagi yang men on ton video ini .


Processing Epoch 72: 100%|██████████| 300/300 [00:31<00:00,  9.67it/s, loss=1.174]


-------------------------------------------------------
Source Text: I'll tell you like I tell them...
Target Text: Saya akan beritahu kamu seperti saya beritahu yg lain.
Predicted by MalayGPT: Saya beritahu pesan an itu , " saya cin takan awak ."
-------------------------------------------------------
Source Text: - This isn't the moon!
Target Text: - Kita tak tinggal di Bulan!
Predicted by MalayGPT: - P om pe i i !


Processing Epoch 73: 100%|██████████| 300/300 [00:31<00:00,  9.59it/s, loss=1.176]


-------------------------------------------------------
Source Text: I've never seen John Stockton smile so much.
Target Text: Aku tak pernah Lihat John Stockton segembira sebegini.
Predicted by MalayGPT: Tak pernah aku tengok perkara macam ini .
-------------------------------------------------------
Source Text: Tried something new?
Target Text: Mencuba sesuatu yang baru?
Predicted by MalayGPT: S ut Gro u p war e


Processing Epoch 74: 100%|██████████| 300/300 [00:31<00:00,  9.65it/s, loss=1.157]


-------------------------------------------------------
Source Text: He gives victory to whom He wills, and He is the Exalted in Might, the Merciful.
Target Text: Ia memberi kemenangan kepada sesiapa yang dikehendakiNya, dan Dia lah jua yang Maha Kuasa, lagi Maha Mengasihani.
Predicted by MalayGPT: Dia pernah member inya , tapi saya tidak menerimanya ..
-------------------------------------------------------
Source Text: How is Young Do doing?
Target Text: Yeong Do. okeykah?
Predicted by MalayGPT: Adakah peguam nya membuat apa - apa ?


Processing Epoch 75: 100%|██████████| 300/300 [00:31<00:00,  9.64it/s, loss=1.178]


-------------------------------------------------------
Source Text: Good news, Caroline.
Target Text: Berita baik, Caroline.
Predicted by MalayGPT: P in ky , P amma , Bu bb ly , Lo vel y , S we et .
-------------------------------------------------------
Source Text: I'll kill him, the useless little sewer rat!
Target Text: Aku akan bunuh dia!
Predicted by MalayGPT: Aku dah sedia untuk kembali ber kerja .


Processing Epoch 76: 100%|██████████| 300/300 [00:31<00:00,  9.66it/s, loss=1.162]


-------------------------------------------------------
Source Text: Come on!
Target Text: Ayuh!
Predicted by MalayGPT: Ayuh !
-------------------------------------------------------
Source Text: Animatable
Target Text: Animatable
Predicted by MalayGPT: A ni mas i


Processing Epoch 77: 100%|██████████| 300/300 [00:31<00:00,  9.64it/s, loss=1.164]


-------------------------------------------------------
Source Text: Small
Target Text: Kecil
Predicted by MalayGPT: Peng is ih
-------------------------------------------------------
Source Text: Take their statement.
Target Text: Ambil kenyataan mereka.
Predicted by MalayGPT: Orang juga hilang .


Processing Epoch 78: 100%|██████████| 300/300 [00:31<00:00,  9.65it/s, loss=1.159]


-------------------------------------------------------
Source Text: I drive the squirrel!
Target Text: Aku mengendarai tupai.
Predicted by MalayGPT: S um p ah !
-------------------------------------------------------
Source Text: Gopi, listen to me!
Target Text: GoPi, dengarkan aku!
Predicted by MalayGPT: G aya bing ka i sudah mempunyai !


Processing Epoch 79: 100%|██████████| 300/300 [00:30<00:00,  9.70it/s, loss=1.154]


-------------------------------------------------------
Source Text: Small
Target Text: Kecil
Predicted by MalayGPT: Peng is ih
-------------------------------------------------------
Source Text: You're pulling my leg, aren't you?
Target Text: Kau nak menentang aku?
Predicted by MalayGPT: Kau tak tahu apa yang akan kau hada p i .


Processing Epoch 80: 100%|██████████| 300/300 [00:31<00:00,  9.64it/s, loss=1.156]


-------------------------------------------------------
Source Text: I'll tell aunt that you went to Masakei with colleague last week.
Target Text: Aku akan memberitahu bibi bahawa anda pergi ke Masakei dengan rakan minggu lalu.
Predicted by MalayGPT: Aku tak tahu macam mana tang g apan kau , tapi ban d yang aku ken da li kan untuk J V C , pem a in d rum aku tak se su ai .
-------------------------------------------------------
Source Text: The crowds are very enthusiastic about this contest.
Target Text: Penonton sekali teruja dengan pertandingan ini.
Predicted by MalayGPT: Orang - orang ini harus mati .


Processing Epoch 81: 100%|██████████| 300/300 [00:30<00:00,  9.69it/s, loss=1.156]


-------------------------------------------------------
Source Text: You're pulling my leg, aren't you?
Target Text: Kau nak menentang aku?
Predicted by MalayGPT: Kau sedang membaca kemudian j at uh ter tidur .
-------------------------------------------------------
Source Text: I've never seen John Stockton smile so much.
Target Text: Aku tak pernah Lihat John Stockton segembira sebegini.
Predicted by MalayGPT: Tak pernah aku tengok perkara macam ini .


Processing Epoch 82: 100%|██████████| 300/300 [00:30<00:00,  9.71it/s, loss=1.160]


-------------------------------------------------------
Source Text: So where's it been hiding the last 60 million years?
Target Text: Dimana dia bersembunyi selama 60 juta tahun terakhir?
Predicted by MalayGPT: Jadi ianya ke utara , dalam erti kata lain .
-------------------------------------------------------
Source Text: I've never seen John Stockton smile so much.
Target Text: Aku tak pernah Lihat John Stockton segembira sebegini.
Predicted by MalayGPT: Tak pernah aku tengok perkara macam ini .


Processing Epoch 83: 100%|██████████| 300/300 [00:31<00:00,  9.59it/s, loss=1.159]


-------------------------------------------------------
Source Text: Chris Ryan instead opts to take a knee.
Target Text: Chris Ryan malah melutut.
Predicted by MalayGPT: R 4 " is the ab bre vi ation for " Re gi s ter 4
-------------------------------------------------------
Source Text: The special names @DEFAULT_SINK@, @DEFAULT_SOURCE@ and @DEFAULT_MONITOR@ can be used to specify the default sink, source and monitor.
Target Text: Nama khas @DEFAULT_SINK@, @DEFAULT_SOURCE@ and @DEFAULT_MONITOR@ boleh digunakan untuk nyatakan sinki, sumber dan monitor lalai.
Predicted by MalayGPT: Dengan nama B apa , Anak dan Ro h Ku du s , A min .


Processing Epoch 84: 100%|██████████| 300/300 [00:31<00:00,  9.67it/s, loss=1.164]


-------------------------------------------------------
Source Text: cannot create DHCP BPF socket: %s
Target Text: tidak dapat cipta soket DHCP BPF: %s
Predicted by MalayGPT: % s telah mem bat alkan tugas .
-------------------------------------------------------
Source Text: If it's after school hours, you're just gonna lay there all night until 7:00 a.m., when I'll come and save your life.
Target Text: Kalau selepas waktu sekolah kau perlu teruskan sehingga 7:00 pagi. Ketika itu, baru aku datang selamatkan kau.
Predicted by MalayGPT: Jika kita tak melawan hari ini , akan ada lagi gen erasi peng an as , dan para De wa akan melepaskan k eng er ian dengan lama .


Processing Epoch 85: 100%|██████████| 300/300 [00:31<00:00,  9.64it/s, loss=1.161]


-------------------------------------------------------
Source Text: Take their statement.
Target Text: Ambil kenyataan mereka.
Predicted by MalayGPT: Mem perlahan kan denyutan jantung sehingga se denyutan per - minit .
-------------------------------------------------------
Source Text: - This isn't the moon!
Target Text: - Kita tak tinggal di Bulan!
Predicted by MalayGPT: - Ini adalah ke empat - empat .


Processing Epoch 86: 100%|██████████| 300/300 [00:31<00:00,  9.66it/s, loss=1.157]


-------------------------------------------------------
Source Text: Take their statement.
Target Text: Ambil kenyataan mereka.
Predicted by MalayGPT: Mem anfaatkan tenaga dari angkasa lepas .
-------------------------------------------------------
Source Text: cannot create DHCP BPF socket: %s
Target Text: tidak dapat cipta soket DHCP BPF: %s
Predicted by MalayGPT: dapat cari perintah '% s '


Processing Epoch 87: 100%|██████████| 300/300 [00:31<00:00,  9.65it/s, loss=1.169]


-------------------------------------------------------
Source Text: The crowds are very enthusiastic about this contest.
Target Text: Penonton sekali teruja dengan pertandingan ini.
Predicted by MalayGPT: Yang dis ak sikan oleh se kumpulan mal aik at , yang di dampingkan Tuhan di sis inya .
-------------------------------------------------------
Source Text: I'll tell aunt that you went to Masakei with colleague last week.
Target Text: Aku akan memberitahu bibi bahawa anda pergi ke Masakei dengan rakan minggu lalu.
Predicted by MalayGPT: Aku tak tahu macam mana tang g apan kau , tapi ban d yang aku ken da li kan untuk J V C , pem a in d rum aku tak se su ai .


Processing Epoch 88: 100%|██████████| 300/300 [00:31<00:00,  9.58it/s, loss=1.146]


-------------------------------------------------------
Source Text: There are leafs in the way.
Target Text: Ada reranting dan daun.
Predicted by MalayGPT: Ada yang mau beli makanan .
-------------------------------------------------------
Source Text: Your condolences are appreciated but your help is unnecessary unless you can name the Americans responsible.
Target Text: Takziah kamu dihargai tetapi bantuan kamu tidak perlu melainkan kamu boleh menamakan rakyat Amerika yang bertanggungjawab.
Predicted by MalayGPT: Hanya s anya orang - orang yang meny ahu t seru anmu itu ialah mereka yang mendengar ( yang mahu menurut kebenaran ); sedang orang - orang yang mati Allah bangkit kan mereka semula ( pada hari kiamat kel ak ), kemudian mereka dikembalikan kepadaNya untuk menerima balasan .


Processing Epoch 89: 100%|██████████| 300/300 [00:31<00:00,  9.61it/s, loss=1.163]


-------------------------------------------------------
Source Text: There are leafs in the way.
Target Text: Ada reranting dan daun.
Predicted by MalayGPT: Ada apa yang bi jak juga .
-------------------------------------------------------
Source Text: "So we went to my room, and guess what he showed me?"
Target Text: "Jadi kami pergi ke bilik saya, dan teka apa yang dia tunjukkan kepada saya?"
Predicted by MalayGPT: Jadi kita akan keluar melalui Ar men ia .


Processing Epoch 90:  99%|█████████▊| 296/300 [00:30<00:00,  8.93it/s, loss=1.146]

# Testing

In [None]:
def malaygpt(user_input_text):

    # validation using input text
    user_input_text = str(user_input_text).strip()

    # Let's get the model Define the device, tokenizers, and model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    tokenizer_en = Tokenizer.from_file("./tokenizer_en/tokenizer_en.json")
    tokenizer_my = Tokenizer.from_file("./tokenizer_my/tokenizer_my.json")

    # Build our model
    # model = build_model(tokenizer_en.get_vocab_size(), tokenizer_my.get_vocab_size(), max_seq_len, max_seq_len, d_model=512).to(device)
    # model = get_model(tokenizer_en.get_vocab_size(), tokenizer_my.get_vocab_size()).to(device)
    model = build_model(tokenizer_en.get_vocab_size(), tokenizer_my.get_vocab_size(),max_seq_len, max_seq_len, d_model=512).to(device)

    # Load the specific checkpoint of the model that you've saved during training.
    checkpoint_number = 19    # for this test, I am taking checkpoint number 10
    model_filename = f"./malaygpt/model_{checkpoint_number}.pt"
    state = torch.load(model_filename)
    model.load_state_dict(state['model_state_dict'])

    # Lets beging the inferencing
    model.eval()
    with torch.no_grad():
        # Precompute the encoder output and reuse it for every generation step
        source_text_encoding = tokenizer_en.encode(user_input_text)
        source_text_encoding = torch.cat([
            torch.tensor([tokenizer_en.token_to_id('[CLS]')], dtype=torch.int64),
            torch.tensor(source_text_encoding.ids, dtype=torch.int64),
            torch.tensor([tokenizer_en.token_to_id('[SEP]')], dtype=torch.int64),
            torch.tensor([tokenizer_en.token_to_id('[PAD]')] * (max_seq_len - len(source_text_encoding.ids) - 2), dtype=torch.int64)
        ], dim=0).to(device)
        source_mask = (source_text_encoding != tokenizer_en.token_to_id('[PAD]')).unsqueeze(0).unsqueeze(0).int().to(device)
        encoder_output = model.encode(source_text_encoding, source_mask)

        # Initialize the decoder input with the sos token
        decoder_input = torch.empty(1, 1).fill_(tokenizer_my.token_to_id('[CLS]')).type_as(source_text_encoding).to(device)

        # Generate the translation word by word
        while decoder_input.size(1) < max_seq_len:
            # build mask for target and calculate output
            decoder_mask = torch.triu(torch.ones((1, decoder_input.size(1), decoder_input.size(1))), diagonal=1).type(torch.int).type_as(source_mask).to(device)
            out = model.decode(encoder_output, source_mask, decoder_input, decoder_mask)

            # project next token
            prob = model.project(out[:, -1])
            _, next_word = torch.max(prob, dim=1)
            decoder_input = torch.cat([decoder_input, torch.empty(1, 1).type_as(source_text_encoding).fill_(next_word.item()).to(device)], dim=1)

            # print the translated word
            # print(f"{tokenizer_my.decode([next_word.item()])}", end=' ')

            # break if we predict the end of sentence token
            if next_word == tokenizer_my.token_to_id('[SEP]'):
                break

    # convert ids to tokens
    return tokenizer_my.decode(decoder_input[0].tolist())

In [None]:
user_input_text = "Good Morning."
translated_text = malaygpt(user_input_text)

print("English input:", user_input_text)
print("Malay translation:", translated_text)