Skip to content

Commit

Permalink
fix(machinery): better handle placeholders in OpenAI
Browse files Browse the repository at this point in the history
Do not confuse it by talking about placeholders when there are none, it
then tends to hallucinate and add ones.
  • Loading branch information
nijel committed May 6, 2024
1 parent 15021fb commit b6f6da6
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 6 deletions.
2 changes: 2 additions & 0 deletions docs/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ Not yet released.

**Bug fixes**

* Improved handling placeables in :ref:`mt-openai`.

**Compatibility**

**Upgrading**
Expand Down
4 changes: 3 additions & 1 deletion weblate/machinery/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ class BatchMachineTranslation:
settings_form: None | type[BaseMachineryForm] = None
request_timeout = 5
is_available = True
replacement_start = "[X"
replacement_end = "X]"

@classmethod
def get_rank(cls):
Expand Down Expand Up @@ -317,7 +319,7 @@ def format_replacement(
self, h_start: int, h_end: int, h_text: str, h_kind: None | Unit
) -> str:
"""Generate a single replacement."""
return f"[X{h_start}X]"
return f"{self.replacement_start}{h_start}{self.replacement_end}"

def get_highlights(
self, text: str, unit
Expand Down
19 changes: 14 additions & 5 deletions weblate/machinery/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
You always reply with translated string only.
You do not include transliteration.
{separator}
You treat strings like {placeable_1} or {placeable_2} as placeables for user input and keep them intact.
{placeables}
{glossary}
"""
SEPARATOR = "\n==WEBLATE_PART==\n"
Expand All @@ -53,6 +53,9 @@
Use the following glossary during the translation:
{}
"""
PLACEABLES_PROMPT = """
You treat strings like {placeable_1} or {placeable_2} as placeables for user input and keep them intact.
"""


class OpenAITranslation(BatchMachineTranslation):
Expand Down Expand Up @@ -97,7 +100,7 @@ def format_prompt_part(self, name: Literal["style", "persona"]):
return text

def get_prompt(
self, source_language: str, target_language: str, units: list
self, source_language: str, target_language: str, texts: list[str], units: list
) -> str:
glossary = ""
if any(units):
Expand All @@ -107,15 +110,21 @@ def get_prompt(
if glossary:
glossary = GLOSSARY_PROMPT.format(glossary)
separator = SEPARATOR_PROMPT if len(units) > 1 else ""
placeables = ""
if any(self.replacement_start in text for text in texts):
placeables = PLACEABLES_PROMPT.format(
placeable_1=self.format_replacement(0, -1, "", None),
placeable_2=self.format_replacement(123, -1, "", None),
)

return PROMPT.format(
source_language=source_language,
target_language=target_language,
persona=self.format_prompt_part("persona"),
style=self.format_prompt_part("style"),
glossary=glossary,
separator=separator,
placeable_1=self.format_replacement(0, -1, "", None),
placeable_2=self.format_replacement(123, -1, "", None),
placeables=placeables,
)

def download_multiple_translations(
Expand All @@ -128,7 +137,7 @@ def download_multiple_translations(
) -> DownloadMultipleTranslations:
texts = [text for text, _unit in sources]
units = [unit for _text, unit in sources]
prompt = self.get_prompt(source, language, units)
prompt = self.get_prompt(source, language, texts, units)
messages = [
ChatCompletionSystemMessageParam(role="system", content=prompt),
ChatCompletionUserMessageParam(role="user", content=SEPARATOR.join(texts)),
Expand Down

0 comments on commit b6f6da6

Please sign in to comment.