# TITLE: Batak Toba-Indonesian machine translation

## Requirements

In [None]:
# ! pip install transformers datasets evaluate sacrebleu tensorflow ipywidgets ipykernel transformers[torch] # If running Jupyter Notebook locally
! pip install transformers datasets==2.14.6 evaluate sacrebleu transformers[torch] # If running using Google Colab

In [None]:
# Add new secret (environment variable) by opening up the "Secrets" tab in the left-side panel
# Name: "HF_TOKEN"
# Value: Your HuggingFace access token (get it here https://huggingface.co/settings/tokens)

# Or another way below:

In [None]:
# from huggingface_hub import login
# login(
#     token="YOUR_HUGGING_FACE_TOKEN_HERE",
#     add_to_git_credential=True
#     )

# # Disable caching on a global scale with disable_caching():
# from datasets import disable_caching

# disable_caching()
# # When you disable caching, 🤗 Datasets will no longer reload cached files when applying transforms to datasets. Any transform you apply on your dataset will be need to be reapplied.

In [None]:
# To be able to use indonlp/nusatranslation_mt, you need to install the following dependency: nusacrowd
! pip install nusacrowd

## 1. Batak Toba to Indonesian

In [None]:
from datasets import load_dataset

# Load the dataset from Hugging Face Datasets
dataset_btk_to_ind = load_dataset(
    "indonlp/nusatranslation_mt",
    "nusatranslation_mt_btk_ind_source"
    )

In [None]:
dataset_btk_to_ind['train'][0]

In [None]:
# Rename 'text' to 'source' and 'label' to 'target' in the dataset
dataset_btk_to_ind = dataset_btk_to_ind.rename_column(
    "text",
    "source"
    )
dataset_btk_to_ind = dataset_btk_to_ind.rename_column(
    "label",
    "target"
    )

In [None]:
# Verify the column names
dataset_btk_to_ind['train'][0]

### Dataset and preprocessing

The next step is to load AutoTokenizer to process the Batak Toba-Indonesian language pairs:

In [None]:
! pip install sentencepiece # For Facebook's NLLB models

In [None]:
# # Run this to create and publish a new tokenizer, otherwise use the already published one.
# # Add new src_lang code, e.g., "btk_Latn" to the model

# # First, create a new tokenizer by loading the existing one and adding the new language code
# from transformers import AutoTokenizer

# tokenizer = AutoTokenizer.from_pretrained(
#     "facebook/nllb-200-distilled-600M",
#     src_lang="btk_Latn",
#     tgt_lang="ind_Latn",
#     additional_special_tokens=['btk_Latn']
#     )

# # Publish the new tokenizer to the Hub
# tokenizer.push_to_hub(
#     "bbc-batak-toba-as-src-tokenizer"
#     )

# # # Example of how others would use it:

# # # Load the tokenizer from the Hub
# # tokenizer = AutoTokenizer.from_pretrained(
# #     "kepinsam/bbc-batak-toba-as-src-tokenizer"
# #     )

# # # Use the tokenizer to tokenize a Batak Toba sentence
# # sentence = "Horas ma ho!"
# # tokens = tokenizer(
# #     sentence
# #     )
# # print(
# #     tokens
# #     )

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(
    "kepinsam/bbc-batak-toba-as-src-tokenizer",
    src_lang="btk_Latn",
    tgt_lang="ind_Latn"
    )

In [None]:
def preprocess_function(examples):
    inputs = [example for example in examples["source"]]
    targets = [example for example in examples["target"]]

    model_inputs = tokenizer(
        inputs,
        text_target=targets,
        max_length=64,
        padding="max_length",
        truncation=True,
        return_tensors="pt"
        )
    return model_inputs

# speed up the map function by setting batched=True to process multiple elements of the dataset at once
tokenized_dataset = dataset_btk_to_ind.map(
    preprocess_function,
    batched=True,
    remove_columns=["id", "source", "target"]
    )

In [None]:
import numpy as np
import evaluate

sacrebleu = evaluate.load("sacrebleu")

def postprocess_text(preds, labels):
    preds = [pred.strip() for pred in preds]
    labels = [[label.strip()] for label in labels]

    return preds, labels


def compute_metrics(eval_preds):
    preds, labels = eval_preds
    if isinstance(preds, tuple):
        preds = preds[0]

    decoded_preds = tokenizer.batch_decode(
        preds,
        skip_special_tokens=True
        )

    labels = np.where(
        labels != -100,
        labels,
        tokenizer.pad_token_id
        )

    decoded_labels = tokenizer.batch_decode(
        labels,
        skip_special_tokens=True
        )

    decoded_preds, decoded_labels = postprocess_text(
        decoded_preds,
        decoded_labels
        )

    result = sacrebleu.compute(
        predictions=decoded_preds,
        references=decoded_labels
        )

    result = {"sacrebleu": result["score"]}
    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds]
    result["gen_len"] = np.mean(prediction_lens)
    result = {k: round(v, 4) for k, v in result.items()}
    return result

In [None]:
from transformers import AutoModelForSeq2SeqLM

model = AutoModelForSeq2SeqLM.from_pretrained(
    "facebook/nllb-200-distilled-600M"
    )

### Evaluation before training (Zero-shot translation)

In [None]:
# # Data collator is optional
# from transformers import DataCollatorForSeq2Seq

# data_collator = DataCollatorForSeq2Seq(
#     tokenizer=tokenizer,
#     model="facebook/nllb-200-distilled-600M"
#     )

In [None]:
# from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainer, Seq2SeqTrainingArguments

# training_args = Seq2SeqTrainingArguments(
#     output_dir="bbc-to-ind-nmt-v1",
#     eval_strategy="epoch",
#     per_device_eval_batch_size=16,

#     # For optimizing resources usage (GPU, RAM, etc.)
#     # ---
#     predict_with_generate=True,
#     optim="adamw_bnb_8bit",
#     fp16=True,
#     save_total_limit=3,
#     eval_accumulation_steps=4,
#     # ---

#     push_to_hub=False,
#     )

# # Create a Seq2SeqTrainer for evaluation only
# trainer = Seq2SeqTrainer(
#     model=model,
#     args=training_args,
#     train_dataset=None,  # Set train_dataset to None for evaluation
#     # data_collator=data_collator,
#     compute_metrics=compute_metrics,
#     )

# trainer.evaluate(
#     eval_dataset=tokenized_dataset["test"]
#     )

In [None]:
from transformers import pipeline
import evaluate

sacrebleu = evaluate.load("sacrebleu")

translator = pipeline(
    "translation",
    model="facebook/nllb-200-distilled-600M"
    )

test_sentences = {
    "text1": {"source": "Unang godang manungkun ho.", "target": "Jangan banyak bertanya kau."},
    "text2": {
        "source": "Asa adong hepengmu tu manuhor boras muse.",
        "target": "Supaya ada uangmu untuk membeli beras lagi.",
    },
    "text3": {
        "source": "Holong hian rohang hu tu ho dohot masihol hian au tu ho.",
        "target": "Cinta sekali hatiku samamu dan rindu sekali aku samamu.",
    },
    "text4": {
        "source": "Molo adong na salah, manang na hurang pambahenan ki, sai anju ma au.",
        "target": "Kalau ada yang salah, atau yang kurang di perbuatan ku, selalu maafkanlah aku.",
    },
    "text5": {
        "source": "Tarsongon bunga naung malos di ladang i, songon i ma rohang ki nunga malala, ndang hu rimpu songon i, dibahen ho holong ki gabe meam-meammu.",
        "target": "Seperti bunga yang sudah layu di ladang, seperti itulah hatiku yang sudah hancur, tidak ku kira seperti ini, kau buat cintaku jadi main-mainmu.",
    },
}

translated_sentences = []

# Perform translation and evaluation for each test sentence
for key, text_dict in test_sentences.items():
    source_text, translation, correct_translation = text_dict["source"], "", text_dict["target"]

    # Translate the source text
    translated_text = translator(
        source_text,
        src_lang="btk_Latn",
        tgt_lang="ind_Latn"
        )

    translation = translated_text[0]["translation_text"]

    # Calculate the SacreBLEU score
    sacrebleu_score = sacrebleu.compute(
        predictions=[translation],
        references=[[correct_translation]]
        )

    sacrebleu_score = sacrebleu_score["score"]

    translated_sentences.append((
        source_text,
        translation,
        correct_translation,
        sacrebleu_score
        ))

# Print the results
for i, (
    source,
    translation,
    correct_translation,
    sacrebleu_score
    ) in enumerate(translated_sentences):
    print(
        f"Sentence {i + 1}:"
        )
    print(
        "Source: ", source
        )
    print(
        "Target: ",
        correct_translation
        )
    print(
        "Prediction: ", translation
        )
    print(
        "SacreBLEU Score: ",
        sacrebleu_score
        )
    print()

### Training

In [None]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [None]:
! pip install bitsandbytes # For 'adamw_bnb_8bit' optimizer

In [None]:
model.config.to_dict()

In [None]:
# # Data collator is optional
# from transformers import DataCollatorForSeq2Seq

# data_collator = DataCollatorForSeq2Seq(
#     tokenizer=tokenizer,
#     model="facebook/nllb-200-distilled-600M"
#     )

In [None]:
from transformers import Seq2SeqTrainingArguments, Seq2SeqTrainer

training_args = Seq2SeqTrainingArguments(
    output_dir="bbc-to-ind-nmt-v2",
    eval_strategy="epoch",
    logging_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,

    # Hyperparameters to tune
    # ---

    # More influential hyperparameters
    num_train_epochs=5, # iterate on [5, 10] # Higher means longer training time
    per_device_train_batch_size=8, # iterate on [4, 8, 16, 32] # Higher needs higher GPU RAM

    # Less influential hyperparameters
    learning_rate=5e-5,
    weight_decay=0.3,
    warmup_ratio=0.1,
    per_device_eval_batch_size=16,
    # ---


    # For optimizing resources usage (System RAM, GPU RAM, Disk, etc.)
    # ---
    predict_with_generate=True,
    optim="adamw_bnb_8bit",
    fp16=True,
    save_total_limit=1,
    eval_accumulation_steps=4,
    # ---

    push_to_hub=True,
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    tokenizer=tokenizer,
    # data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

In [None]:
trainer.push_to_hub()

### Evaluation after training

In [None]:
from transformers import pipeline, AutoTokenizer
import evaluate

# tokenizer = AutoTokenizer.from_pretrained(
#     "kepinsam/bbc-batak-toba-as-src-tokenizer",
#     src_lang="btk_Latn",
#     tgt_lang="ind_Latn"
#     )

translator = pipeline(
    "translation",
    model="kepinsam/bbc-to-ind-nmt-v2",
    # decoder_start_token_id=tokenizer.bos_token_id,
    )

sacrebleu = evaluate.load("sacrebleu")

test_sentences = {
    "text1": {"source": "Unang godang manungkun ho.", "target": "Jangan banyak bertanya kau."},
    "text2": {
        "source": "Asa adong hepengmu tu manuhor boras muse.",
        "target": "Supaya ada uangmu untuk membeli beras lagi.",
    },
    "text3": {
        "source": "Holong hian rohang hu tu ho dohot masihol hian au tu ho.",
        "target": "Cinta sekali hatiku samamu dan rindu sekali aku samamu.",
    },
    "text4": {
        "source": "Molo adong na salah, manang na hurang pambahenan ki, sai anju ma au.",
        "target": "Kalau ada yang salah, atau yang kurang di perbuatan ku, selalu maafkanlah aku.",
    },
    "text5": {
        "source": "Tarsongon bunga naung malos di ladang i, songon i ma rohang ki nunga malala, ndang hu rimpu songon i, dibahen ho holong ki gabe meam-meammu.",
        "target": "Seperti bunga yang sudah layu di ladang, seperti itulah hatiku yang sudah hancur, tidak ku kira seperti ini, kau buat cintaku jadi main-mainmu.",
    },
}

translated_sentences = []

# Perform translation and evaluation for each test sentence
for key, text_dict in test_sentences.items():
    source_text, translation, correct_translation = text_dict["source"], "", text_dict["target"]

    # Translate the source text
    translated_text = translator(
        source_text,
        src_lang="btk_Latn",
        tgt_lang="ind_Latn"
        )
    translation = translated_text[0]["translation_text"]

    # Calculate the SacreBLEU score
    sacrebleu_score = sacrebleu.compute(
        predictions=[translation],
        references=[[correct_translation]]
        )

    sacrebleu_score = sacrebleu_score["score"]

    translated_sentences.append((
        source_text,
        translation,
        correct_translation,
        sacrebleu_score
        ))

# Print the results
for i, (
    source,
    translation,
    correct_translation,
    sacrebleu_score
    ) in enumerate(translated_sentences):
    print(
        f"Sentence {i + 1}:"
        )
    print(
        "Source: ",
        source
        )
    print(
        "Target: ",
        correct_translation
        )
    print(
        "Prediction: ",
        translation
        )
    print(
        "SacreBLEU Score: ",
        sacrebleu_score
        )
    print()

## 2. Indonesian to Batak Toba

In [None]:
from datasets import load_dataset

# Load the dataset from Hugging Face Datasets
dataset_ind_to_btk = load_dataset(
    "indonlp/nusatranslation_mt",
    "nusatranslation_mt_btk_ind_source"
    )

In [None]:
dataset_ind_to_btk['train'][0]

In [None]:
# Rename 'label' to 'source' and 'text' to 'target' in the dataset
dataset_ind_to_btk = dataset_ind_to_btk.rename_column(
    "label",
    "source"
    )
dataset_ind_to_btk = dataset_ind_to_btk.rename_column(
    "text",
    "target"
    )

In [None]:
# Verify the column names
dataset_ind_to_btk['train'][0]

### Dataset and preprocessing

The next step is to load an AutoTokenizer to process the Batak Toba-Indonesian language pairs:

In [None]:
! pip install sentencepiece # For Facebook's NLLB models

In [None]:
# # Run this to create and publish a new tokenizer, otherwise use the already published one.
# # Add new tgt_lang code, e.g., "btk_Latn" to the model

# # First, create a new tokenizer by loading the existing one and adding the new language code
# from transformers import AutoTokenizer

# tokenizer = AutoTokenizer.from_pretrained(
#     "facebook/nllb-200-distilled-600M",
#     src_lang="ind_Latn",
#     tgt_lang="btk_Latn",
#     additional_special_tokens=['btk_Latn']
#     )

# # Publish the new tokenizer to the Hub
# tokenizer.push_to_hub(
#     "bbc-batak-toba-as-tgt-tokenizer"
#     )

# # # Example of how others would use it:

# # # Load the tokenizer from the Hub
# # tokenizer = AutoTokenizer.from_pretrained(
# #     "kepinsam/bbc-batak-toba-as-tgt-tokenizer"
# #     )

# # # Use the tokenizer to tokenize a Batak Toba sentence
# # sentence = "Horas ma ho!"
# # tokens = tokenizer(
# #     sentence
# #     )
# # print(
# #     tokens
# #     )

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(
    "kepinsam/bbc-batak-toba-as-tgt-tokenizer",
    src_lang="ind_Latn",
    tgt_lang="btk_Latn"
    )

In [None]:
def preprocess_function(examples):
    inputs = [example for example in examples["source"]]
    targets = [example for example in examples["target"]]

    model_inputs = tokenizer(
        inputs,
        text_target=targets,
        max_length=64,
        padding="max_length",
        truncation=True,
        return_tensors="pt"
        )
    return model_inputs

# speed up the map function by setting batched=True to process multiple elements of the dataset at once
tokenized_dataset = dataset_ind_to_btk.map(
    preprocess_function,
    batched=True,
    remove_columns=["id", "source", "target"]
    )

In [None]:
import numpy as np
import evaluate

sacrebleu = evaluate.load("sacrebleu")

def postprocess_text(preds, labels):
    preds = [pred.strip() for pred in preds]
    labels = [[label.strip()] for label in labels]

    return preds, labels


def compute_metrics(eval_preds):
    preds, labels = eval_preds
    if isinstance(preds, tuple):
        preds = preds[0]

    decoded_preds = tokenizer.batch_decode(
        preds,
        skip_special_tokens=True
        )

    labels = np.where(
        labels != -100,
        labels,
        tokenizer.pad_token_id
        )

    decoded_labels = tokenizer.batch_decode(
        labels,
        skip_special_tokens=True
        )

    decoded_preds, decoded_labels = postprocess_text(
        decoded_preds,
        decoded_labels
        )

    result = sacrebleu.compute(
        predictions=decoded_preds,
        references=decoded_labels
        )

    result = {"sacrebleu": result["score"]}

    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds]
    result["gen_len"] = np.mean(prediction_lens)
    result = {k: round(v, 4) for k, v in result.items()}
    return result

In [None]:
from transformers import AutoModelForSeq2SeqLM

model = AutoModelForSeq2SeqLM.from_pretrained(
    "facebook/nllb-200-distilled-600M"
    )


### Evaluation before training (Zero-shot translation)

In [None]:
# # Data collator is optional
# from transformers import DataCollatorForSeq2Seq

# data_collator = DataCollatorForSeq2Seq(
#     tokenizer=tokenizer,
#     model="facebook/nllb-200-distilled-600M",
#     )

In [None]:
# from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainer, Seq2SeqTrainingArguments

# training_args = Seq2SeqTrainingArguments(
#     output_dir="ind-to-bbc-nmt-v2",
#     eval_strategy="epoch",
#     per_device_eval_batch_size=8,

#     # For optimizing resources usage (GPU, RAM, etc.)
#     # ---
#     predict_with_generate=True,
#     optim="adamw_bnb_8bit",
#     fp16=True,
#     save_total_limit=3,
#     eval_accumulation_steps=4,
#     # ---

#     push_to_hub=False,
# )

# # Create a Seq2SeqTrainer for evaluation only
# trainer = Seq2SeqTrainer(
#     model=model,
#     args=training_args,
#     train_dataset=None,  # Set train_dataset to None for evaluation
#     # data_collator=data_collator,
#     compute_metrics=compute_metrics,
#     )

# trainer.evaluate(
#     eval_dataset=tokenized_dataset["test"]
#     )

In [None]:
# from transformers import pipeline
# import evaluate

# sacrebleu = evaluate.load("sacrebleu")

# translator = pipeline(
#     "translation",
#     model="facebook/nllb-200-distilled-600M"
#     )

# test_sentences = {
#     "text1": {
#         "source": "Turun disini lah kita.",
#         "target": "Turun dison ma hita.",
#         },
#     "text2": {
#         "source": "Kalau tidak memasak kau, ayolah makan ke luar kita.",
#         "target": "Molo ndang mangalompa ho, eta ma mangan tu kaluar hita.",
#     },
#     "text3": {
#         "source": "Kalau pergi kau, hatiku pergi bersamamu.",
#         "target": "Molo lao ho, rohakku dohot mai lao.",
#     },
#     "text4": {
#         "source": "Biarpun meminta kau, tidak mau aku.",
#         "target": "Agia mangido pe ho, ndang olo au.",
#     },
#     "text5": {
#         "source": "Dahulu hiduplah seorang saja yang Bernama Raja Rahat yang berkuasa di Samosir.",
#         "target": "Na jolo adong ma raja na margoar Raja Rahat na marhuaso i Samosir.",
#     },
#     "text6": {
#         "source": "Biarlah orang lain memuji engkau dan bukan mulutmu, orang yang tidak kau kenal dan bukan bibirmu sendiri. Batu adalah berat dan pasir pun ada beratnya, tetapi lebih berat dari kedua-duanya adalah sakit hati terhadap orang bodoh.",
#         "target": "Halak na asing tagonan mamuji ho, unang tung pamanganmu sandiri, halak sileban tagonan, unang bibirmu sandiri. Dokdok do batu, jala borat horsik, alai dumokdok sian duansa do anggo hamurhingon ni halak na oto.",
#     },
# }

# translated_sentences = []

# # Perform translation and evaluation for each test sentence
# for key, text_dict in test_sentences.items():
#     source_text, translation, correct_translation = text_dict["source"], "", text_dict["target"]

#     # Translate the source text
#     translated_text = translator(
#         source_text,
#         src_lang="ind_Latn",
#         tgt_lang="btk_Latn"
#         )
#     translation = translated_text[0]["translation_text"]

#     # Calculate the SacreBLEU score
#     sacrebleu_score = sacrebleu.compute(
#         predictions=[translation],
#         references=[[correct_translation]]
#         )

#     sacrebleu_score = sacrebleu_score["score"]

#     translated_sentences.append((
#         source_text,
#         translation,
#         correct_translation,
#         sacrebleu_score
#         ))

# # Print the results
# for i, (
#     source,
#     translation,
#     correct_translation,
#     sacrebleu_score
#     ) in enumerate(translated_sentences):
#     print(
#         f"Sentence {i + 1}:"
#         )
#     print(
#         "Source: ",
#         source
#         )
#     print(
#         "Target: ",
#         correct_translation
#         )
#     print(
#         "Prediction: ",
#         translation
#         )
#     print(
#         "SacreBLEU Score: ",
#         sacrebleu_score
#         )
#     print()

### Training

In [None]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [None]:
! pip install bitsandbytes # For 'adamw_bnb_8bit' optimizer

In [None]:
model.config.to_dict()

In [None]:
# # Data collator is optional
# from transformers import DataCollatorForSeq2Seq

# data_collator = DataCollatorForSeq2Seq(
#     tokenizer=tokenizer,
#     model="facebook/nllb-200-distilled-600M"
#     )

In [None]:
from transformers import Seq2SeqTrainingArguments, Seq2SeqTrainer

training_args = Seq2SeqTrainingArguments(
    output_dir="ind-to-bbc-nmt-v2",
    eval_strategy="epoch",
    logging_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,

    # Hyperparameters to tune
    # ---

    # More influential hyperparameters
    num_train_epochs=5, # iterate on [5, 10] # Higher means longer training time
    per_device_train_batch_size=8, # iterate on [4, 8, 16, 32] # Higher needs higher GPU RAM

    # Less influential hyperparameters
    learning_rate=5e-5,
    weight_decay=0.3,
    warmup_ratio=0.1,
    per_device_eval_batch_size=16,
    # ---

    # For optimizing resources usage (System RAM, GPU RAM, Disk, etc.)
    # ---
    predict_with_generate=True,
    optim="adamw_bnb_8bit",
    fp16=True,
    save_total_limit=1,
    eval_accumulation_steps=4,
    # ---

    push_to_hub=True,
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    tokenizer=tokenizer,
    # data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()


In [None]:
# from transformers import GenerationConfig

# # Create a GenerationConfig object with your desired parameters
# generation_config = GenerationConfig(
#     max_length=200,
#     push_to_hub=True
#     # Add other generation parameters as needed
# )

# # Save the GenerationConfig with your model
# model.generation_config = generation_config
# model.save_pretrained(
#     "ind-to-bbc-nmt-v2",
#     generation_config=generation_config
#     )

In [None]:
trainer.push_to_hub()

### Evaluation after training

In [None]:
from transformers import pipeline, AutoTokenizer
import evaluate

# tokenizer = AutoTokenizer.from_pretrained(
#     "kepinsam/bbc-batak-toba-as-tgt-tokenizer",
#     src_lang="ind_Latn",
#     tgt_lang="btk_Latn"
#     )

translator = pipeline(
    "translation",
    model="kepinsam/ind-to-bbc-nmt-v2",
    # decoder_start_token_id=tokenizer.bos_token_id,
    )

sacrebleu = evaluate.load("sacrebleu")

test_sentences = {
    "text1": {
        "source": "Turun disini lah kita.",
        "target": "Turun dison ma hita.",
        },
    "text2": {
        "source": "Kalau tidak memasak kau, ayolah makan ke luar kita.",
        "target": "Molo ndang mangalompa ho, eta ma mangan tu kaluar hita.",
    },
    "text3": {
        "source": "Kalau pergi kau, hatiku pergi bersamamu.",
        "target": "Molo lao ho, rohakku dohot mai lao.",
    },
    "text4": {
        "source": "Biarpun meminta kau, tidak mau aku.",
        "target": "Agia mangido pe ho, ndang olo au.",
    },
    "text5": {
        "source": "Dahulu hiduplah seorang saja yang Bernama Raja Rahat yang berkuasa di Samosir.",
        "target": "Na jolo adong ma raja na margoar Raja Rahat na marhuaso i Samosir.",
    },
    "text6": {
        "source": "Biarlah orang lain memuji engkau dan bukan mulutmu, orang yang tidak kau kenal dan bukan bibirmu sendiri. Batu adalah berat dan pasir pun ada beratnya, tetapi lebih berat dari kedua-duanya adalah sakit hati terhadap orang bodoh.",
        "target": "Halak na asing tagonan mamuji ho, unang tung pamanganmu sandiri, halak sileban tagonan, unang bibirmu sandiri. Dokdok do batu, jala borat horsik, alai dumokdok sian duansa do anggo hamurhingon ni halak na oto.",
    },
}

translated_sentences = []

# Perform translation and evaluation for each test sentence
for key, text_dict in test_sentences.items():
    source_text, translation, correct_translation = text_dict["source"], "", text_dict["target"]

    # Translate the source text
    translated_text = translator(
        source_text,
        src_lang="ind_Latn",
        tgt_lang="btk_Latn"
        )
    translation = translated_text[0]["translation_text"]

    # Calculate the SacreBLEU score
    sacrebleu_score = sacrebleu.compute(
        predictions=[translation],
        references=[[correct_translation]]
        )

    sacrebleu_score = sacrebleu_score["score"]

    translated_sentences.append((
        source_text,
        translation,
        correct_translation,
        sacrebleu_score
        ))

# Print the results
for i, (
    source,
    translation,
    correct_translation,
    sacrebleu_score
    ) in enumerate(translated_sentences):
    print(
        f"Sentence {i + 1}:"
        )
    print(
        "Source: ",
        source
        )
    print(
        "Target: ",
        correct_translation
        )
    print(
        "Prediction: ",
        translation
        )
    print(
        "SacreBLEU Score: ",
        sacrebleu_score
        )
    print()