### Többnyelvű fordító (NYTK M2M100)


In [None]:
%pip install gradio transformers torch sentencepiece

In [None]:
import gradio as gr
import torch
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer

''' 
Source language: Bulgarian (bg), Czech (cs), German (de), English (en), Croatian (hr), Polish, (pl), Romanian (ro), Russian (ru), Slovak (sk), 
Slovene (sl), Serbian (sr), Ukrainian (uk) 
'''
mname="NYTK/translation-m2m100-1.2B-multi12-hungarian"
#mname = "facebook/m2m100_418M"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = M2M100ForConditionalGeneration.from_pretrained(mname).to(device)
tokenizer = M2M100Tokenizer.from_pretrained(mname)

def predict(text, source_lang):
    tokenizer.src_lang = source_lang
    encoded_text = tokenizer(text, return_tensors="pt").to(device)
    generated_tokens = model.generate(**encoded_text, forced_bos_token_id=tokenizer.get_lang_id("hu"))
    return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]

with gr.Blocks() as demo:    
    label = gr.Label("Többnyelvű fordító (NYTK M2M100)")    
    source_language = gr.Dropdown(choices=["bg", "cs", "de", "en", "hr", "pl", "ro", "ru", "sk", "sl", "sr"], type='value', label="Forrás nyelv")
    input_text = gr.Textbox(label="Bemenet")
    output_text = gr.Textbox(label="Fordítás")
    translate_btn = gr.Button("Fordítás")
    translate_btn.click(fn=predict, inputs=[input_text,source_language], outputs=output_text)
    reflink = gr.HTML('<a href="https://github.com/nytud/machine-translation">GitHub Repo</a>')

demo.title = "Többnyelvű fordító"
demo.launch()