Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,33 @@ Optional parameters,
- `YESGLOT_PER_ITEM_OUTPUT`: 100 (default)
- `YESGLOT_LLM_MODEL_TEMPERATURE`: 0 (default)

### System Prompt

It is preconfigured, though you may override it to tailor the behavior of your translation.

- `SYSTEM_PROMPT_FUNCTION`: for example, `"myproject.myapp.utils.get_system_prompt"`
- `SYSTEM_PROMPT`: string

Default:

> You are a professional translator. Translate into the target language.
> - Keep placeholders like {name} / {{handlebars}} unchanged.
> - Keep URLs and emails unchanged.
> - Return ONLY a JSON array of strings in the same order.


### Preamble Template

It’s already configured, but you can override it to adjust how your translation behaves.

- `PREAMBLE_TEMPLATE_FUNCTION`: for example, `"myproject.myapp.utils.get_preamble"`
- `PREAMBLE_TEMPLATE`: string

Default:

> Translate these items into {language}. Return ONLY a JSON array:


## Usage

A typical workflow with Django translations:
Expand Down
50 changes: 50 additions & 0 deletions tests/test_prompts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from unittest import mock
from django.test import SimpleTestCase
from django.test.utils import override_settings

from yesglot.exceptions import YesGlotException
from yesglot.llm import get_system_prompt, get_preamble_template
from yesglot.settings import DEFAULT_SYSTEM_PROMPT, DEFAULT_PREAMBLE_TEMPLATE


def dummy_function():
return "dummy"


class SystemPromptTests(SimpleTestCase):
@override_settings(YESGLOT_SYSTEM_PROMPT_FUNCTION="tests.test_prompts.dummy_function", YESGLOT_SYSTEM_PROMPT="literal")
def test_conflict_raises(self):
with self.assertRaises(YesGlotException):
get_system_prompt()

@override_settings(YESGLOT_SYSTEM_PROMPT_FUNCTION="tests.test_prompts.dummy_function")
def test_from_function(self):
self.assertEqual(get_system_prompt(), "dummy")

@override_settings(YESGLOT_SYSTEM_PROMPT="literal-sp")
def test_literal(self):
self.assertEqual(get_system_prompt(), "literal-sp")

def test_default(self):
self.assertEqual(get_system_prompt(), DEFAULT_SYSTEM_PROMPT)


class PreambleTemplateTests(SimpleTestCase):
@override_settings(YESGLOT_PREAMBLE_TEMPLATE_FUNCTION="tests.test_prompts.dummy_function", YESGLOT_PREAMBLE_TEMPLATE="literal")
def test_conflict_raises(self):
with self.assertRaises(YesGlotException):
get_preamble_template()

@override_settings(YESGLOT_PREAMBLE_TEMPLATE_FUNCTION="tests.test_prompts.dummy_function", YESGLOT_PREAMBLE_TEMPLATE="")
def test_from_function(self):
self.assertEqual(get_preamble_template(), "dummy")

@override_settings(YESGLOT_PREAMBLE_TEMPLATE_FUNCTION="", YESGLOT_PREAMBLE_TEMPLATE="Hi in {language}: ",
DEFAULT_PREAMBLE_TEMPLATE="Default {language}: ")
def test_literal(self):
self.assertEqual(get_preamble_template(), "Hi in {language}: ")

@override_settings(YESGLOT_PREAMBLE_TEMPLATE_FUNCTION="", YESGLOT_PREAMBLE_TEMPLATE="",
DEFAULT_PREAMBLE_TEMPLATE="Default {language}: ")
def test_default(self):
self.assertEqual(get_preamble_template(), DEFAULT_PREAMBLE_TEMPLATE)
4 changes: 4 additions & 0 deletions yesglot/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@


class YesGlotException(Exception):
pass
31 changes: 12 additions & 19 deletions yesglot/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,11 @@
import tiktoken
from litellm import completion, completion_cost, get_max_tokens

from yesglot.settings import API_KEY, LLM_MODEL, PER_ITEM_OUTPUT, SAFETY_MARGIN, LLM_MODEL_TEMPERATURE
from yesglot.prompts import get_system_prompt, get_preamble_template
from yesglot.settings import yesglot_settings

logging.getLogger("LiteLLM").setLevel(logging.ERROR)

SYSTEM_PROMPT = (
"You are a professional translator. Translate into the target language.\n"
"- Keep placeholders like {name} / {{handlebars}} unchanged.\n"
"- Keep URLs and emails unchanged.\n"
"- Return ONLY a JSON array of strings in the same order."
)
PREAMBLE_TEMPLATE = "Translate these items into {language}. Return ONLY a JSON array:\n"


def get_encoder(model):
try:
Expand All @@ -33,8 +26,8 @@ def make_batches(items, target_language, model):
encoder = get_encoder(model)

# Precompute stable token costs
system_tokens = count_tokens(SYSTEM_PROMPT, encoder)
user_preamble = PREAMBLE_TEMPLATE.format(language=target_language)
system_tokens = count_tokens(get_system_prompt(), encoder)
user_preamble = get_preamble_template().format(language=target_language)
preamble_tokens = count_tokens(user_preamble, encoder)

batches = []
Expand All @@ -48,13 +41,13 @@ def make_batches(items, target_language, model):
items_tokens = count_tokens(items_json, encoder)

# Estimated output tokens
est_output_tokens = PER_ITEM_OUTPUT * len(candidate)
est_output_tokens = yesglot_settings.PER_ITEM_OUTPUT * len(candidate)

# Total estimated tokens
total_tokens = system_tokens + preamble_tokens + items_tokens + est_output_tokens

max_context_tokens = get_max_tokens(LLM_MODEL)
if current and total_tokens >= (max_context_tokens - SAFETY_MARGIN):
max_context_tokens = get_max_tokens(yesglot_settings.LLM_MODEL)
if current and total_tokens >= (max_context_tokens - yesglot_settings.SAFETY_MARGIN):
batches.append(current)
current = [item]
else:
Expand All @@ -67,23 +60,23 @@ def make_batches(items, target_language, model):


def translate_batch(batch, target_language, model):
user_prompt = PREAMBLE_TEMPLATE.format(language=target_language) + json.dumps(batch, ensure_ascii=False)
user_prompt = get_preamble_template().format(language=target_language) + json.dumps(batch, ensure_ascii=False)
response = completion(
model=model,
messages=[
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "system", "content": get_system_prompt()},
{"role": "user", "content": user_prompt},
],
temperature=LLM_MODEL_TEMPERATURE,
api_key=API_KEY,
temperature=yesglot_settings.LLM_MODEL_TEMPERATURE,
api_key=yesglot_settings.API_KEY,
)
cost = completion_cost(completion_response=response)

content = response["choices"][0]["message"]["content"].strip()
return json.loads(content), cost


def translate_items(items, target_language, model=LLM_MODEL):
def translate_items(items, target_language, model=yesglot_settings.LLM_MODEL):
results = []
total_cost = 0
for batch in make_batches(items, target_language, model):
Expand Down
4 changes: 2 additions & 2 deletions yesglot/management/commands/translatemessages.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from django.core.management.base import BaseCommand

from yesglot.llm import translate_items
from yesglot.settings import LLM_MODEL
from yesglot.settings import yesglot_settings
from yesglot.translation_files import fill_translations, parse_empty_translations
from yesglot.utils import get_language_name, get_project_po_files

Expand All @@ -14,7 +14,7 @@ class Command(BaseCommand):
def handle(self, **options): # noqa
start = perf_counter()
self.stdout.write(self.style.SUCCESS("▶ Translation run started."))
self.stdout.write(f"Using translation model: {self.style.NOTICE(LLM_MODEL)}\n")
self.stdout.write(f"Using translation model: {self.style.NOTICE(yesglot_settings.LLM_MODEL)}\n")

project_po_files = get_project_po_files()
if not project_po_files:
Expand Down
29 changes: 29 additions & 0 deletions yesglot/prompts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from django.utils.module_loading import import_string

from yesglot.exceptions import YesGlotException
from yesglot.settings import yesglot_settings
from yesglot.settings import DEFAULT_SYSTEM_PROMPT, DEFAULT_PREAMBLE_TEMPLATE


def get_system_prompt():
if yesglot_settings.SYSTEM_PROMPT_FUNCTION and yesglot_settings.SYSTEM_PROMPT:
raise YesGlotException("You can't use system prompt function and system prompt at the same time.")
elif yesglot_settings.SYSTEM_PROMPT_FUNCTION:
func = import_string(yesglot_settings.SYSTEM_PROMPT_FUNCTION)
return func()
elif yesglot_settings.SYSTEM_PROMPT:
return yesglot_settings.SYSTEM_PROMPT
else:
return DEFAULT_SYSTEM_PROMPT


def get_preamble_template():
if yesglot_settings.PREAMBLE_TEMPLATE_FUNCTION and yesglot_settings.PREAMBLE_TEMPLATE:
raise YesGlotException("You can't use system prompt function and system prompt at the same time.")
elif yesglot_settings.PREAMBLE_TEMPLATE_FUNCTION:
func = import_string(yesglot_settings.PREAMBLE_TEMPLATE_FUNCTION)
return func()
elif yesglot_settings.PREAMBLE_TEMPLATE:
return yesglot_settings.PREAMBLE_TEMPLATE
else:
return DEFAULT_PREAMBLE_TEMPLATE
64 changes: 54 additions & 10 deletions yesglot/settings.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,59 @@
from django.conf import settings as django_settings

LLM_MODEL = django_settings.YESGLOT_LLM_MODEL
API_KEY = django_settings.YESGLOT_API_KEY

# cushion to avoid hitting the hard limit
SAFETY_MARGIN = getattr(django_settings, "YESGLOT_SAFETY_MARGIN", 1000)
# Default values
DEFAULT_SYSTEM_PROMPT = (
"You are a professional translator. Translate into the target language.\n"
"- Keep placeholders like {name} / {{handlebars}} unchanged.\n"
"- Keep URLs and emails unchanged.\n"
"- Return ONLY a JSON array of strings in the same order."
)
DEFAULT_PREAMBLE_TEMPLATE = "Translate these items into {language}. Return ONLY a JSON array:\n"

# rough estimate of tokens per translated item
PER_ITEM_OUTPUT = getattr(django_settings, "YESGLOT_PER_ITEM_OUTPUT", 100)

# Lower temperature makes outputs more predictable and factual, while higher temperature
# increases randomness for more diverse and creative results.
# https://www.youtube.com/shorts/XsLK3tPy9SI
LLM_MODEL_TEMPERATURE = getattr(django_settings, "YESGLOT_LLM_MODEL_TEMPERATURE", 0)
class YesglotSettings:
"""Lazy loading of settings so override_settings works in tests"""

@property
def LLM_MODEL(self):
return django_settings.YESGLOT_LLM_MODEL

@property
def API_KEY(self):
return django_settings.YESGLOT_API_KEY

@property
def LLM_MODEL_TEMPERATURE(self):
# Lower temperature makes outputs more predictable and factual, while higher temperature
# increases randomness for more diverse and creative results.
# https://www.youtube.com/shorts/XsLK3tPy9SI
return getattr(django_settings, "YESGLOT_LLM_MODEL_TEMPERATURE", 0)

@property
def SYSTEM_PROMPT_FUNCTION(self):
return getattr(django_settings, 'YESGLOT_SYSTEM_PROMPT_FUNCTION', None)

@property
def SYSTEM_PROMPT(self):
return getattr(django_settings, 'YESGLOT_SYSTEM_PROMPT', None)

@property
def PREAMBLE_TEMPLATE_FUNCTION(self):
return getattr(django_settings, 'YESGLOT_PREAMBLE_TEMPLATE_FUNCTION', None)

@property
def PREAMBLE_TEMPLATE(self):
return getattr(django_settings, 'YESGLOT_PREAMBLE_TEMPLATE', None)

@property
def SAFETY_MARGIN(self):
# cushion to avoid hitting the hard limit
return getattr(django_settings, "YESGLOT_SAFETY_MARGIN", 1000)

@property
def PER_ITEM_OUTPUT(self):
# rough estimate of tokens per translated item
return getattr(django_settings, "YESGLOT_PER_ITEM_OUTPUT", 100)


yesglot_settings = YesglotSettings()