diff --git a/docs/changes.rst b/docs/changes.rst index ed8f2560c536..0f818f8746da 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -12,6 +12,8 @@ Not yet released. **Bug fixes** +* Improved handling placeables in :ref:`mt-openai`. + **Compatibility** **Upgrading** diff --git a/weblate/machinery/base.py b/weblate/machinery/base.py index 055cba4aef1f..be48067693e1 100644 --- a/weblate/machinery/base.py +++ b/weblate/machinery/base.py @@ -112,6 +112,8 @@ class BatchMachineTranslation: settings_form: None | type[BaseMachineryForm] = None request_timeout = 5 is_available = True + replacement_start = "[X" + replacement_end = "X]" @classmethod def get_rank(cls): @@ -317,7 +319,7 @@ def format_replacement( self, h_start: int, h_end: int, h_text: str, h_kind: None | Unit ) -> str: """Generate a single replacement.""" - return f"[X{h_start}X]" + return f"{self.replacement_start}{h_start}{self.replacement_end}" def get_highlights( self, text: str, unit diff --git a/weblate/machinery/openai.py b/weblate/machinery/openai.py index bbd8cd7660ca..711ad078ef4d 100644 --- a/weblate/machinery/openai.py +++ b/weblate/machinery/openai.py @@ -41,7 +41,7 @@ You always reply with translated string only. You do not include transliteration. {separator} -You treat strings like {placeable_1} or {placeable_2} as placeables for user input and keep them intact. +{placeables} {glossary} """ SEPARATOR = "\n==WEBLATE_PART==\n" @@ -53,6 +53,9 @@ Use the following glossary during the translation: {} """ +PLACEABLES_PROMPT = """ +You treat strings like {placeable_1} or {placeable_2} as placeables for user input and keep them intact. +""" class OpenAITranslation(BatchMachineTranslation): @@ -97,7 +100,7 @@ def format_prompt_part(self, name: Literal["style", "persona"]): return text def get_prompt( - self, source_language: str, target_language: str, units: list + self, source_language: str, target_language: str, texts: list[str], units: list ) -> str: glossary = "" if any(units): @@ -107,6 +110,13 @@ def get_prompt( if glossary: glossary = GLOSSARY_PROMPT.format(glossary) separator = SEPARATOR_PROMPT if len(units) > 1 else "" + placeables = "" + if any(self.replacement_start in text for text in texts): + placeables = PLACEABLES_PROMPT.format( + placeable_1=self.format_replacement(0, -1, "", None), + placeable_2=self.format_replacement(123, -1, "", None), + ) + return PROMPT.format( source_language=source_language, target_language=target_language, @@ -114,8 +124,7 @@ def get_prompt( style=self.format_prompt_part("style"), glossary=glossary, separator=separator, - placeable_1=self.format_replacement(0, -1, "", None), - placeable_2=self.format_replacement(123, -1, "", None), + placeables=placeables, ) def download_multiple_translations( @@ -128,7 +137,7 @@ def download_multiple_translations( ) -> DownloadMultipleTranslations: texts = [text for text, _unit in sources] units = [unit for _text, unit in sources] - prompt = self.get_prompt(source, language, units) + prompt = self.get_prompt(source, language, texts, units) messages = [ ChatCompletionSystemMessageParam(role="system", content=prompt), ChatCompletionUserMessageParam(role="user", content=SEPARATOR.join(texts)),