#### Libraries

In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import gradio as gr

  from .autonotebook import tqdm as notebook_tqdm


#### Init

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_id = "pigpig1524/ml-translator"
# tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained("vinai/vinai-translate-en2vi-v2", src_lang="vi_VN")
model.to(device)

MBartForConditionalGeneration(
  (model): MBartModel(
    (shared): MBartScaledWordEmbedding(66773, 1024, padding_idx=1)
    (encoder): MBartEncoder(
      (embed_tokens): MBartScaledWordEmbedding(66773, 1024, padding_idx=1)
      (embed_positions): MBartLearnedPositionalEmbedding(1026, 1024)
      (layers): ModuleList(
        (0-11): 12 x MBartEncoderLayer(
          (self_attn): MBartAttention(
            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=1024, out_features=4096, bias=True)
          (fc2): Linear(in_features=4096, out_features=1024, bias=True)


#### Gradio

In [6]:
def translate(text, model=model, batch_size=16):
    if not text.strip():
        return "Vui lòng nhập văn bản."

    texts = [text]
    translated_texts = []

    for i in range(0, len(texts), batch_size):
        batch = texts[i : i + batch_size]
        input_ids = tokenizer(batch, padding=True, return_tensors="pt").to(device)
        output_ids = model.generate(
            **input_ids,
            decoder_start_token_id=tokenizer.lang_code_to_id.get("vi_VN", None),
            num_return_sequences=1,
            num_beams=5,
            early_stopping=True
        )
        vi = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
        translated_texts.extend(vi)

    return translated_texts[0]

iface = gr.Interface(
    fn=translate,
    inputs=gr.Textbox(lines=4, placeholder="Nhập văn bản tiếng Anh ở đây ..."),
    outputs="text",
    title="English to Vietnamese Academic Text Translator",
)

iface.launch()

* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.


