<a href="https://colab.research.google.com/github/davidandw190/faas-dl-inference/blob/main/notebooks/machine_translation_it-en.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Machine Translation (IT-EN)

In [None]:
!pip install transformers onnx onnxruntime optimum

In [34]:
import torch
import os
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from optimum.onnxruntime import ORTModelForSeq2SeqLM
from onnxruntime.quantization import quantize_dynamic, QuantType
import onnxruntime as ort
import numpy as np

In [35]:
model_name = 'Helsinki-NLP/opus-mt-it-en'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

In [36]:
test_sentences = [
    "Ciao, come stai?",
    "Mi piace la pasta.",
    "Dove si trova la stazione?",
    "Che bella giornata!",
    "Vorrei prenotare un tavolo per due persone, per favore."
]

In [37]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
model = model.to(device)

Using device: cuda:0


In [38]:
def translate_text(text, model):
    inputs = tokenizer(text, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model.generate(**inputs)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)


In [39]:
print("Testing PyTorch model:")
for sentence in test_sentences:
    translated = translate_text(sentence, model)
    print(f"Original (Italian): {sentence}")
    print(f"Translated (English): {translated}")
    print()


Testing PyTorch model:
Original (Italian): Ciao, come stai?
Translated (English): Hi, how are you?

Original (Italian): Mi piace la pasta.
Translated (English): I like pasta.

Original (Italian): Dove si trova la stazione?
Translated (English): Where is the station?

Original (Italian): Che bella giornata!
Translated (English): What a beautiful day!

Original (Italian): Vorrei prenotare un tavolo per due persone, per favore.
Translated (English): I'd like to book a table for two people, please.



In [None]:
ort_model = ORTModelForSeq2SeqLM.from_pretrained(model_name, from_transformers=True)
ort_model.save_pretrained("onnx_model")

In [41]:
onnx_model_dir = "onnx_model"
onnx_files = [f for f in os.listdir(onnx_model_dir) if f.endswith('.onnx')]

In [None]:
os.makedirs("machine_transaltion_it-en_quantized_model", exist_ok=True)
for onnx_file in onnx_files:
    input_file = os.path.join(onnx_model_dir, onnx_file)
    output_file = os.path.join("machine_transaltion_it-en_quantized_model", f"quantized_{onnx_file}")
    quantize_dynamic(input_file, output_file, weight_type=QuantType.QUInt8)
    print(f"Quantized {onnx_file} to {output_file}")

In [None]:
import shutil

shutil.copy(os.path.join(onnx_model_dir, "config.json"), "machine_transaltion_it-en_quantized_model")
shutil.copy(os.path.join(onnx_model_dir, "generation_config.json"), "machine_transaltion_it-en_quantized_model")
tokenizer.save_pretrained("machine_transaltion_it-en_quantized_model")

In [50]:
quantized_onnx_files = [f for f in os.listdir("machine_transaltion_it-en_quantized_model") if f.endswith('.onnx')]
encoder_file = next((f for f in quantized_onnx_files if 'encoder' in f), None)
decoder_file = next((f for f in quantized_onnx_files if 'decoder' in f and 'with_past' not in f), None)
decoder_with_past_file = next((f for f in quantized_onnx_files if 'decoder' in f and 'with_past' in f), None)


In [None]:
ort_quantized_model = ORTModelForSeq2SeqLM.from_pretrained(
    "machine_transaltion_it-en_quantized_model",
    encoder_file_name=encoder_file,
    decoder_file_name=decoder_file,
    decoder_with_past_file_name=decoder_with_past_file if decoder_with_past_file else None
)

In [52]:
def translate_text_onnx(text, ort_model):
    inputs = tokenizer(text, return_tensors="pt")
    outputs = ort_model.generate(**inputs)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

print("Testing ONNX models:")
for sentence in test_sentences:
    print(f"Original (Italian): {sentence}")
    print(f"Translated (ONNX): {translate_text_onnx(sentence, ort_model)}")
    print(f"Translated (ONNX quantized): {translate_text_onnx(sentence, ort_quantized_model)}")
    print()

Testing ONNX models:
Original (Italian): Ciao, come stai?
Translated (ONNX): Hi, how are you?
Translated (ONNX quantized): Hi, how are you?

Original (Italian): Mi piace la pasta.
Translated (ONNX): I like pasta.
Translated (ONNX quantized): I like pasta.

Original (Italian): Dove si trova la stazione?
Translated (ONNX): Where is the station?
Translated (ONNX quantized): Where is the station?

Original (Italian): Che bella giornata!
Translated (ONNX): What a beautiful day!
Translated (ONNX quantized): What a beautiful day!

Original (Italian): Vorrei prenotare un tavolo per due persone, per favore.
Translated (ONNX): I'd like to book a table for two people, please.
Translated (ONNX quantized): I'd like to book a table for two people, please.

