diff --git a/README.md b/README.md index ad5cf4e..6b2c13f 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,33 @@ Optional parameters, - `YESGLOT_PER_ITEM_OUTPUT`: 100 (default) - `YESGLOT_LLM_MODEL_TEMPERATURE`: 0 (default) +### System Prompt + +It is preconfigured, though you may override it to tailor the behavior of your translation. + +- `SYSTEM_PROMPT_FUNCTION`: for example, `"myproject.myapp.utils.get_system_prompt"` +- `SYSTEM_PROMPT`: string + +Default: + +> You are a professional translator. Translate into the target language. +> - Keep placeholders like {name} / {{handlebars}} unchanged. +> - Keep URLs and emails unchanged. +> - Return ONLY a JSON array of strings in the same order. + + +### Preamble Template + +It’s already configured, but you can override it to adjust how your translation behaves. + +- `PREAMBLE_TEMPLATE_FUNCTION`: for example, `"myproject.myapp.utils.get_preamble"` +- `PREAMBLE_TEMPLATE`: string + +Default: + +> Translate these items into {language}. Return ONLY a JSON array: + + ## Usage A typical workflow with Django translations: diff --git a/tests/test_prompts.py b/tests/test_prompts.py new file mode 100644 index 0000000..4a41ee5 --- /dev/null +++ b/tests/test_prompts.py @@ -0,0 +1,50 @@ +from unittest import mock +from django.test import SimpleTestCase +from django.test.utils import override_settings + +from yesglot.exceptions import YesGlotException +from yesglot.llm import get_system_prompt, get_preamble_template +from yesglot.settings import DEFAULT_SYSTEM_PROMPT, DEFAULT_PREAMBLE_TEMPLATE + + +def dummy_function(): + return "dummy" + + +class SystemPromptTests(SimpleTestCase): + @override_settings(YESGLOT_SYSTEM_PROMPT_FUNCTION="tests.test_prompts.dummy_function", YESGLOT_SYSTEM_PROMPT="literal") + def test_conflict_raises(self): + with self.assertRaises(YesGlotException): + get_system_prompt() + + @override_settings(YESGLOT_SYSTEM_PROMPT_FUNCTION="tests.test_prompts.dummy_function") + def test_from_function(self): + self.assertEqual(get_system_prompt(), "dummy") + + @override_settings(YESGLOT_SYSTEM_PROMPT="literal-sp") + def test_literal(self): + self.assertEqual(get_system_prompt(), "literal-sp") + + def test_default(self): + self.assertEqual(get_system_prompt(), DEFAULT_SYSTEM_PROMPT) + + +class PreambleTemplateTests(SimpleTestCase): + @override_settings(YESGLOT_PREAMBLE_TEMPLATE_FUNCTION="tests.test_prompts.dummy_function", YESGLOT_PREAMBLE_TEMPLATE="literal") + def test_conflict_raises(self): + with self.assertRaises(YesGlotException): + get_preamble_template() + + @override_settings(YESGLOT_PREAMBLE_TEMPLATE_FUNCTION="tests.test_prompts.dummy_function", YESGLOT_PREAMBLE_TEMPLATE="") + def test_from_function(self): + self.assertEqual(get_preamble_template(), "dummy") + + @override_settings(YESGLOT_PREAMBLE_TEMPLATE_FUNCTION="", YESGLOT_PREAMBLE_TEMPLATE="Hi in {language}: ", + DEFAULT_PREAMBLE_TEMPLATE="Default {language}: ") + def test_literal(self): + self.assertEqual(get_preamble_template(), "Hi in {language}: ") + + @override_settings(YESGLOT_PREAMBLE_TEMPLATE_FUNCTION="", YESGLOT_PREAMBLE_TEMPLATE="", + DEFAULT_PREAMBLE_TEMPLATE="Default {language}: ") + def test_default(self): + self.assertEqual(get_preamble_template(), DEFAULT_PREAMBLE_TEMPLATE) diff --git a/yesglot/exceptions.py b/yesglot/exceptions.py new file mode 100644 index 0000000..18b0229 --- /dev/null +++ b/yesglot/exceptions.py @@ -0,0 +1,4 @@ + + +class YesGlotException(Exception): + pass diff --git a/yesglot/llm.py b/yesglot/llm.py index e2fba5e..f478c9c 100644 --- a/yesglot/llm.py +++ b/yesglot/llm.py @@ -4,18 +4,11 @@ import tiktoken from litellm import completion, completion_cost, get_max_tokens -from yesglot.settings import API_KEY, LLM_MODEL, PER_ITEM_OUTPUT, SAFETY_MARGIN, LLM_MODEL_TEMPERATURE +from yesglot.prompts import get_system_prompt, get_preamble_template +from yesglot.settings import yesglot_settings logging.getLogger("LiteLLM").setLevel(logging.ERROR) -SYSTEM_PROMPT = ( - "You are a professional translator. Translate into the target language.\n" - "- Keep placeholders like {name} / {{handlebars}} unchanged.\n" - "- Keep URLs and emails unchanged.\n" - "- Return ONLY a JSON array of strings in the same order." -) -PREAMBLE_TEMPLATE = "Translate these items into {language}. Return ONLY a JSON array:\n" - def get_encoder(model): try: @@ -33,8 +26,8 @@ def make_batches(items, target_language, model): encoder = get_encoder(model) # Precompute stable token costs - system_tokens = count_tokens(SYSTEM_PROMPT, encoder) - user_preamble = PREAMBLE_TEMPLATE.format(language=target_language) + system_tokens = count_tokens(get_system_prompt(), encoder) + user_preamble = get_preamble_template().format(language=target_language) preamble_tokens = count_tokens(user_preamble, encoder) batches = [] @@ -48,13 +41,13 @@ def make_batches(items, target_language, model): items_tokens = count_tokens(items_json, encoder) # Estimated output tokens - est_output_tokens = PER_ITEM_OUTPUT * len(candidate) + est_output_tokens = yesglot_settings.PER_ITEM_OUTPUT * len(candidate) # Total estimated tokens total_tokens = system_tokens + preamble_tokens + items_tokens + est_output_tokens - max_context_tokens = get_max_tokens(LLM_MODEL) - if current and total_tokens >= (max_context_tokens - SAFETY_MARGIN): + max_context_tokens = get_max_tokens(yesglot_settings.LLM_MODEL) + if current and total_tokens >= (max_context_tokens - yesglot_settings.SAFETY_MARGIN): batches.append(current) current = [item] else: @@ -67,15 +60,15 @@ def make_batches(items, target_language, model): def translate_batch(batch, target_language, model): - user_prompt = PREAMBLE_TEMPLATE.format(language=target_language) + json.dumps(batch, ensure_ascii=False) + user_prompt = get_preamble_template().format(language=target_language) + json.dumps(batch, ensure_ascii=False) response = completion( model=model, messages=[ - {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "system", "content": get_system_prompt()}, {"role": "user", "content": user_prompt}, ], - temperature=LLM_MODEL_TEMPERATURE, - api_key=API_KEY, + temperature=yesglot_settings.LLM_MODEL_TEMPERATURE, + api_key=yesglot_settings.API_KEY, ) cost = completion_cost(completion_response=response) @@ -83,7 +76,7 @@ def translate_batch(batch, target_language, model): return json.loads(content), cost -def translate_items(items, target_language, model=LLM_MODEL): +def translate_items(items, target_language, model=yesglot_settings.LLM_MODEL): results = [] total_cost = 0 for batch in make_batches(items, target_language, model): diff --git a/yesglot/management/commands/translatemessages.py b/yesglot/management/commands/translatemessages.py index a948519..ad27467 100644 --- a/yesglot/management/commands/translatemessages.py +++ b/yesglot/management/commands/translatemessages.py @@ -3,7 +3,7 @@ from django.core.management.base import BaseCommand from yesglot.llm import translate_items -from yesglot.settings import LLM_MODEL +from yesglot.settings import yesglot_settings from yesglot.translation_files import fill_translations, parse_empty_translations from yesglot.utils import get_language_name, get_project_po_files @@ -14,7 +14,7 @@ class Command(BaseCommand): def handle(self, **options): # noqa start = perf_counter() self.stdout.write(self.style.SUCCESS("▶ Translation run started.")) - self.stdout.write(f"Using translation model: {self.style.NOTICE(LLM_MODEL)}\n") + self.stdout.write(f"Using translation model: {self.style.NOTICE(yesglot_settings.LLM_MODEL)}\n") project_po_files = get_project_po_files() if not project_po_files: diff --git a/yesglot/prompts.py b/yesglot/prompts.py new file mode 100644 index 0000000..2078385 --- /dev/null +++ b/yesglot/prompts.py @@ -0,0 +1,29 @@ +from django.utils.module_loading import import_string + +from yesglot.exceptions import YesGlotException +from yesglot.settings import yesglot_settings +from yesglot.settings import DEFAULT_SYSTEM_PROMPT, DEFAULT_PREAMBLE_TEMPLATE + + +def get_system_prompt(): + if yesglot_settings.SYSTEM_PROMPT_FUNCTION and yesglot_settings.SYSTEM_PROMPT: + raise YesGlotException("You can't use system prompt function and system prompt at the same time.") + elif yesglot_settings.SYSTEM_PROMPT_FUNCTION: + func = import_string(yesglot_settings.SYSTEM_PROMPT_FUNCTION) + return func() + elif yesglot_settings.SYSTEM_PROMPT: + return yesglot_settings.SYSTEM_PROMPT + else: + return DEFAULT_SYSTEM_PROMPT + + +def get_preamble_template(): + if yesglot_settings.PREAMBLE_TEMPLATE_FUNCTION and yesglot_settings.PREAMBLE_TEMPLATE: + raise YesGlotException("You can't use system prompt function and system prompt at the same time.") + elif yesglot_settings.PREAMBLE_TEMPLATE_FUNCTION: + func = import_string(yesglot_settings.PREAMBLE_TEMPLATE_FUNCTION) + return func() + elif yesglot_settings.PREAMBLE_TEMPLATE: + return yesglot_settings.PREAMBLE_TEMPLATE + else: + return DEFAULT_PREAMBLE_TEMPLATE \ No newline at end of file diff --git a/yesglot/settings.py b/yesglot/settings.py index 26b1075..3d2f05b 100644 --- a/yesglot/settings.py +++ b/yesglot/settings.py @@ -1,15 +1,59 @@ from django.conf import settings as django_settings -LLM_MODEL = django_settings.YESGLOT_LLM_MODEL -API_KEY = django_settings.YESGLOT_API_KEY -# cushion to avoid hitting the hard limit -SAFETY_MARGIN = getattr(django_settings, "YESGLOT_SAFETY_MARGIN", 1000) +# Default values +DEFAULT_SYSTEM_PROMPT = ( + "You are a professional translator. Translate into the target language.\n" + "- Keep placeholders like {name} / {{handlebars}} unchanged.\n" + "- Keep URLs and emails unchanged.\n" + "- Return ONLY a JSON array of strings in the same order." +) +DEFAULT_PREAMBLE_TEMPLATE = "Translate these items into {language}. Return ONLY a JSON array:\n" -# rough estimate of tokens per translated item -PER_ITEM_OUTPUT = getattr(django_settings, "YESGLOT_PER_ITEM_OUTPUT", 100) -# Lower temperature makes outputs more predictable and factual, while higher temperature -# increases randomness for more diverse and creative results. -# https://www.youtube.com/shorts/XsLK3tPy9SI -LLM_MODEL_TEMPERATURE = getattr(django_settings, "YESGLOT_LLM_MODEL_TEMPERATURE", 0) +class YesglotSettings: + """Lazy loading of settings so override_settings works in tests""" + + @property + def LLM_MODEL(self): + return django_settings.YESGLOT_LLM_MODEL + + @property + def API_KEY(self): + return django_settings.YESGLOT_API_KEY + + @property + def LLM_MODEL_TEMPERATURE(self): + # Lower temperature makes outputs more predictable and factual, while higher temperature + # increases randomness for more diverse and creative results. + # https://www.youtube.com/shorts/XsLK3tPy9SI + return getattr(django_settings, "YESGLOT_LLM_MODEL_TEMPERATURE", 0) + + @property + def SYSTEM_PROMPT_FUNCTION(self): + return getattr(django_settings, 'YESGLOT_SYSTEM_PROMPT_FUNCTION', None) + + @property + def SYSTEM_PROMPT(self): + return getattr(django_settings, 'YESGLOT_SYSTEM_PROMPT', None) + + @property + def PREAMBLE_TEMPLATE_FUNCTION(self): + return getattr(django_settings, 'YESGLOT_PREAMBLE_TEMPLATE_FUNCTION', None) + + @property + def PREAMBLE_TEMPLATE(self): + return getattr(django_settings, 'YESGLOT_PREAMBLE_TEMPLATE', None) + + @property + def SAFETY_MARGIN(self): + # cushion to avoid hitting the hard limit + return getattr(django_settings, "YESGLOT_SAFETY_MARGIN", 1000) + + @property + def PER_ITEM_OUTPUT(self): + # rough estimate of tokens per translated item + return getattr(django_settings, "YESGLOT_PER_ITEM_OUTPUT", 100) + + +yesglot_settings = YesglotSettings()