In [34]:
from transformers import AutoTokenizer, AutoModelForCausalLM


In [None]:
from transformers import pipeline

pipe = pipeline("text-generation", model="Qwen/Qwen2.5-1.5B-Instruct")
messages = [
    {"role": "user", "content": """Task: Translate the following Korean Manhwa dialogue into natural English. Return just english
    Context: A dramatic scene involving a vow/oath.
    Korean: 네 맹세의 ‘뒷편’에... 그림자에 숨은 ‘뒷편’에... ‘속임수’와 ‘배신’이 숨어있지 않다는걸
    English"""},
]
pipe(messages)

Loading weights: 100%|██████████| 338/338 [00:00<00:00, 1101.11it/s, Materializing param=model.norm.weight]                              
Both `max_new_tokens` (=256) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[{'generated_text': [{'role': 'user',
    'content': 'Task: Translate the following Korean Manhwa dialogue into natural English. Return just english\n    Context: A dramatic scene involving a vow/oath.\n    Korean: 네 맹세의 ‘뒷편’에... 그림자에 숨은 ‘뒷편’에... ‘속임수’와 ‘배신’이 숨어있지 않다는걸\n    English'},
   {'role': 'assistant',
    'content': "Yes, there's a hidden 'backstage' behind that oath... And it also hides a 'trickery' and 'betrayal'."}]}]

In [32]:
messages = [
    {
        "role": "system", 
        "content": """You are a professional Manhwa (Korean Manga) Localizer. 
        Your goal is to translate dialogue so it sounds natural, dramatic, and impactful in English.
        - Avoid literal translations like 'backside' unless it fits the context.
        - Use localized idioms (e.g., 'hidden agenda', 'ulterior motives').
        - Maintain the poetic rhythm of the original text.
        - OUTPUT ONLY THE TRANSLATED TEXT. No explanations. No quotes."""
    },
    {
        "role": "user", 
        "content": "네 맹세의 ‘뒷편’에... 그림자에 숨은 ‘뒷편’에... ‘속임수’와 ‘배신’이 숨어있지 않다는걸"
    },
]
res = pipe(messages)
translation = res[0]['generated_text'][-1]['content']

print(translation)

Both `max_new_tokens` (=256) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


I swear by your dark secrets... There's hidden "backends" there too... "Backends" where tricks and betrayals lurk!


In [36]:
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-3B-Instruct")
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-3B-Instruct")
tokenizer.save_pretrained("./backend/models/Qwen/tokenizer")
model.save_pretrained("./backend/models/Qwen/model")

Fetching 2 files: 100%|██████████| 2/2 [03:14<00:00, 97.05s/it] 
Loading weights: 100%|██████████| 434/434 [00:00<00:00, 1118.53it/s, Materializing param=model.norm.weight]                              
Writing model shards: 100%|██████████| 1/1 [00:09<00:00,  9.01s/it]


In [None]:
def translate_with_llm(text):
    messages = [
    {
        "role": "system", 
        "content": """You are a professional Manhwa (Korean Manga) Localizer. 
        Your goal is to translate dialogue so it sounds natural, dramatic, and impactful in English.
        - Avoid literal translations like 'backside' unless it fits the context.
        - Use localized idioms (e.g., 'hidden agenda', 'ulterior motives').
        - Maintain the poetic rhythm of the original text.
        - OUTPUT ONLY THE TRANSLATED TEXT. No explanations. No quotes."""
    },
    {
        "role": "user", 
        "content": text
    },
    ]
    inputs = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        tokenize=True,
        return_dict=True,
        return_tensors="pt",
    ).to(model.device)

    outputs = model.generate(**inputs, max_new_tokens=40)
    output_text = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
    return output_text

In [38]:
translate_with_llm("네 맹세의 ‘뒷편’에... 그림자에 숨은 ‘뒷편’에... ‘속임수’와 ‘배신’이 숨어있지 않다는걸")

'In the hidden back of his oath… In the shadow where deceit and treachery should be lurking… There is nothing but truth.<|im_end|>'