# Import dependencies

In [None]:
import warnings
import time
import logging

import sacrebleu
import itertools

from transformers import TFMarianMTModel, MarianTokenizer

Configuring logger and wornings

In [None]:
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(":")

# Configure warnings
warnings.filterwarnings("ignore")

# Getting model, observations
Get model

In [None]:
# Get model pretrained to translate from English to Ukrainian
model_name = "Helsinki-NLP/opus-mt-en-uk"

# Get tokenizer
tokenizer = MarianTokenizer.from_pretrained(model_name)

# Instantiate a pretrained TF 2.0 model from a pre-trained model configuration
model = TFMarianMTModel.from_pretrained(model_name)

# Get model summary
model.summary()

# Translate clause example

In [None]:
# Define input text
input_text = "What's your problem?"

start_time = time.time()

# Tokenize input text
input_ids = tokenizer(
    [input_text],
    return_tensors="tf",
    padding=True,
)

# Translate input text
translated_tokens = model.generate(
    inputs=input_ids["input_ids"],
)

# Decode translated tokens
translated_text = tokenizer.decode(
    token_ids=translated_tokens.numpy()[0],
    skip_special_tokens=True,
)

logger.info(f"Runtime: {(time.time() - start_time):.2f} seconds.")

print(f"Input Text: {input_text}.")
print(f"Translated Text: {translated_text}.")

The result looks good!

Get scores

In [None]:
# Set input clause as reference for the scoring
input = [input_text]

# Set output as an object
generated_output = [translated_text]

# Set human generated clause
human_output = ["У чому твоя проблема?"]

# Evaluate the quality of machine-generated translations by comparing them to one or more human-generated reference translations
bleu_score = sacrebleu.corpus_bleu(
    hypotheses=generated_output,
    references=[human_output],
).score

# Quantifie the similarity between machine-generated translations and human-generated references
chrf_score = sacrebleu.corpus_chrf(
    hypotheses=generated_output,
    references=[human_output],
).score

# Measure the edit distance between the machine-generated translation and the reference translation
ter_score = sacrebleu.corpus_ter(
    hypotheses=generated_output,
    references=[human_output],
).score

print(f"BLEU Score: {bleu_score:.2f}.")
print(f"chrF Score: {chrf_score:.2f}.")
print(f"TER Score: {ter_score:.2f}.")

Look up model configuration

In [None]:
# Get configuration
config = model.config

config

# Tune pretrained model

In [None]:
# Set the reference result
reference_translation = "Привіт, як справи?"

Set search parameters

In [None]:
# Define config parameter values
max_lengths = [128, 256, 512]
num_beams_list = [4, 8, 16]
num_hidden_layers_list = [2, 4, 6]
max_position_embeddings_list = [
    64,
    128,
    256,
]

# Generate all possible combinations of parameter values
configurations = list(
    itertools.product(
        max_lengths,
        num_beams_list,
        num_hidden_layers_list,
        max_position_embeddings_list,
    )
)

logger.info(f"Configuration amount: {len(configurations)}.")

Get search among parameters

In [None]:
# Evaluate each configuration and choose the one with the highest BLEU score
best_bleu_score = 0
reference_time = None
best_configuration = None

# Set loops
for config in configurations:
    max_length, num_beams, num_hidden_layers, max_pos_emb = config

    current_configuration = config
    print(f"Current configuration: {current_configuration}.")

    # Set model configuration
    model.config = model.config.from_dict(
        config_dict={
            "max_length": max_length,
            "num_beams": num_beams,
            "num_hidden_layers": num_hidden_layers,
            "max_position_embeddings": max_pos_emb,
        }
    )

    # Tokenize input text
    input_ids = tokenizer.encode(
        text=input_text,
        return_tensors="tf",
    )

    start_time = time.time()

    # Translate input text
    translated_tokens = model.generate(
        inputs=input_ids,
        max_length=max_length,
        num_beams=num_beams,
    )

    # Decode translated tokens
    translated_text = tokenizer.decode(
        token_ids=translated_tokens.numpy()[0],
        skip_special_tokens=True,
    )

    translation_time = time.time() - start_time
    print(f"translation time: {translation_time:.2f}.")

    # Compute BLEU score
    hypotheses = [translated_text]
    bleu_score = sacrebleu.corpus_bleu(
        hypotheses=generated_output,
        references=[human_output],
    ).score

    if reference_time is None:
        reference_time = translation_time

        logger.info(f"Reference time:{reference_time:.2f}.")

    if translation_time < reference_time:
        time_of_fast_translation = translation_time
        fast_configuration = current_configuration

    # Update the best configuration if BLEU score is higher
    if bleu_score > best_bleu_score:
        best_bleu_score = bleu_score
        best_configuration = current_configuration
        time_of_best_translation = translation_time

In [None]:
# Print the best configuration and BLEU score
print("Best Configuration:", best_configuration)
print(f"Best BLEU Score: {best_bleu_score:.2f}.")
print(f"Time: {time_of_best_translation:.2f} seconds.\n")
print("Fast Configuration:", fast_configuration)
print(f"Fast time: {time_of_fast_translation:.2f}")

# Summary
1. Pretrained MarianMT model used to translate a clause from English to Ukrainian.
2. Reasonable amount of the configuration parameters had been chosen tu tune the pre-trained model in terms of score and time.
3.  While the best score search shows reasonable result, the speed search doesn't show logic, which means, that it should be discovered separately. 