# Using implementation from 2025-02-23

The previous experimentation gave us enough experience on how to use the provider's SDKs to implement the translator tool into the `elelem` library.

Here's a brief summary of how to use it.

In [28]:
txt = (
    "El rey de parangaricutirimicuaro se quiere " \
    "desparangaricutirimicuar, quien logre " \
    "desparangaricutirimicuarlo un gran " \
    "desparangaricutirimicuador será"
)

In [29]:
from elelem.google.translator import GoogleTranslator
from elelem.anthropic.translator import AnthropicTranslator
from elelem.openai.translator import OpenAITranslator
from elelem.mistral.translator import MistralTranslator
from elelem.ollama.translator import OllamaTranslator
from elelem.types import ElelemError, ElelemAPIResponseError

In [30]:
googleT = GoogleTranslator()
anthropicT = AnthropicTranslator()
openaiT = OpenAITranslator()
mistralT = MistralTranslator()
ollamaT = OllamaTranslator()

In [31]:
print("GOOGLE DETECTING LANGUAGE...")
res, metadata = googleT.detect(txt)
print("RESPONSE:")
print(res.model_dump_json(indent=2))
print("\nMETADATA:")
print(metadata.model_dump_json(indent=2))

GOOGLE DETECTING LANGUAGE...
RESPONSE:
{
  "src_lang": "Spanish",
  "src_text": "El rey de parangaricutirimicuaro se quiere desparangaricutirimicuar, quien logre desparangaricutirimicuarlo un gran desparangaricutirimicuador será"
}

METADATA:
{
  "provider": "google",
  "model": "gemini-2.0-flash-lite-preview-02-05",
  "operation": "models/generate_content",
  "duration": 0.8520453480005017,
  "input_tokens": 128,
  "output_tokens": 13,
  "timestamp": "2025-03-01T00:51:57.721688Z"
}


In [32]:
print("GOOGLE TRANSLATING LANGUAGE...")
res, metadata = googleT.translate(txt, "English")
print("RESPONSE:")
print(res.model_dump_json(indent=2))
print("\nMETADATA:")
print(metadata.model_dump_json(indent=2))

GOOGLE TRANSLATING LANGUAGE...
RESPONSE:
{
  "src_lang": "Spanish",
  "src_text": "El rey de parangaricutirimicuaro se quiere desparangaricutirimicuar, quien logre desparangaricutirimicuarlo un gran desparangaricutirimicuador será",
  "tgt_lang": "English",
  "tgt_text": "The king of Parangaricutirimicuaro wants to unparangaricutirimicuar himself; whoever manages to unparangaricutirimicuar him will be a great unparangaricutirimicuator."
}

METADATA:
{
  "provider": "google",
  "model": "gemini-2.0-flash-lite-preview-02-05",
  "operation": "models/generate_content",
  "duration": 1.12276910699984,
  "input_tokens": 154,
  "output_tokens": 70,
  "timestamp": "2025-03-01T00:53:38.765087Z"
}


In [33]:
print("ANTHROPIC DETECTING LANGUAGE...")
res, metadata = anthropicT.detect(txt)
print("RESPONSE:")
print(res.model_dump_json(indent=2))
print("\nMETADATA:")
print(metadata.model_dump_json(indent=2))

ANTHROPIC DETECTING LANGUAGE...
RESPONSE:
{
  "src_lang": "es",
  "src_text": "El rey de parangaricutirimicuaro se quiere desparangaricutirimicuar, quien logre desparangaricutirimicuarlo un gran desparangaricutirimicuador será"
}

METADATA:
{
  "provider": "anthropic",
  "model": "claude-3-5-haiku-latest",
  "operation": "messages/create",
  "duration": 1.5549830390000352,
  "input_tokens": 749,
  "output_tokens": 35,
  "timestamp": "2025-03-01T00:54:05.559974Z"
}


In [34]:
print("ANTHROPIC TRANSLATING LANGUAGE...")
res, metadata = anthropicT.translate(txt, "English")
print("RESPONSE:")
print(res.model_dump_json(indent=2))
print("\nMETADATA:")
print(metadata.model_dump_json(indent=2))

ANTHROPIC TRANSLATING LANGUAGE...
RESPONSE:
{
  "src_lang": "es",
  "src_text": "El rey de parangaricutirimicuaro se quiere desparangaricutirimicuar, quien logre desparangaricutirimicuarlo un gran desparangaricutirimicuador será",
  "tgt_lang": "English",
  "tgt_text": "The king of Parangaricutirimicuaro wants to un-Parangaricutirimicuate himself, whoever manages to un-Parangaricutirimicuate him will be a great un-Parangaricutirimicuator"
}

METADATA:
{
  "provider": "anthropic",
  "model": "claude-3-5-haiku-latest",
  "operation": "messages/create",
  "duration": 2.5343684600002234,
  "input_tokens": 760,
  "output_tokens": 115,
  "timestamp": "2025-03-01T00:54:18.013313Z"
}


In [35]:
print("OPENAI DETECTING LANGUAGE...")
res, metadata = openaiT.detect(txt)
print("RESPONSE:")
print(res.model_dump_json(indent=2))
print("\nMETADATA:")
print(metadata.model_dump_json(indent=2))

OPENAI DETECTING LANGUAGE...
RESPONSE:
{
  "src_lang": "Spanish",
  "src_text": "El rey de parangaricutirimicuaro se quiere desparangaricutirimicuar, quien logre desparangaricutirimicuarlo un gran desparangaricutirimicuador será"
}

METADATA:
{
  "provider": "openai",
  "model": "gpt-4o-mini",
  "operation": "beta/chat/completions/parse",
  "duration": 0.9883059069998126,
  "input_tokens": 173,
  "output_tokens": 7,
  "timestamp": "2025-03-01T00:54:26.430353Z"
}


In [36]:
print("OPENAI TRANSLATING LANGUAGE...")
res, metadata = openaiT.translate(txt, "English")
print("RESPONSE:")
print(res.model_dump_json(indent=2))
print("\nMETADATA:")
print(metadata.model_dump_json(indent=2))

OPENAI TRANSLATING LANGUAGE...
RESPONSE:
{
  "src_lang": "Spanish",
  "src_text": "El rey de parangaricutirimicuaro se quiere desparangaricutirimicuar, quien logre desparangaricutirimicuarlo un gran desparangaricutirimicuador será",
  "tgt_lang": "English",
  "tgt_text": "The king of Parangaricutirimícuaro wants to un-parangaricutirimícuaro himself; whoever manages to un-parangaricutirimícuaro him will be a great un-parangaricutirimícuador."
}

METADATA:
{
  "provider": "openai",
  "model": "gpt-4o-mini",
  "operation": "beta/chat/completions/parse",
  "duration": 3.1394193249998352,
  "input_tokens": 220,
  "output_tokens": 59,
  "timestamp": "2025-03-01T00:54:29.573563Z"
}


In [37]:
print("MISTRAL DETECTING LANGUAGE...")
res, metadata = mistralT.detect(txt)
print("RESPONSE:")
print(res.model_dump_json(indent=2))
print("\nMETADATA:")
print(metadata.model_dump_json(indent=2))

MISTRAL DETECTING LANGUAGE...
RESPONSE:
{
  "src_lang": "Spanish",
  "src_text": "El rey de parangaricutirimicuaro se quiere desparangaricutirimicuar, quien logre desparangaricutirimicuarlo un gran desparangaricutirimicuador será"
}

METADATA:
{
  "provider": "mistral",
  "model": "mistral-small-latest",
  "operation": "chat/parse",
  "duration": 1.2410207759994591,
  "input_tokens": 120,
  "output_tokens": 7,
  "timestamp": "2025-03-01T00:54:43.530921Z"
}


In [38]:
print("MISTRAL TRANSLATING LANGUAGE...")
res, metadata = mistralT.translate(txt, "English")
print("RESPONSE:")
print(res.model_dump_json(indent=2))
print("\nMETADATA:")
print(metadata.model_dump_json(indent=2))

MISTRAL TRANSLATING LANGUAGE...
RESPONSE:
{
  "src_lang": "Spanish",
  "src_text": "El rey de parangaricutirimicuaro se quiere desparangaricutirimicuar, quien logre desparangaricutirimicuarlo un gran desparangaricutirimicuador será",
  "tgt_lang": "English",
  "tgt_text": "The king of Parangaricutirimicuaro wants to be unparangaricutirimicuaro, whoever unparangaricutirimicuaro him will be a great unparangaricutirimicuador."
}

METADATA:
{
  "provider": "mistral",
  "model": "mistral-small-latest",
  "operation": "chat/parse",
  "duration": 1.605148411999835,
  "input_tokens": 127,
  "output_tokens": 62,
  "timestamp": "2025-03-01T00:54:45.139684Z"
}


In [39]:
try:
    print("OLLAMA DETECTING LANGUAGE...")
    res, metadata = ollamaT.detect(txt)
    print("RESPONSE:")
    print(res.model_dump_json(indent=2))
    print("\nMETADATA:")
    print(metadata.model_dump_json(indent=2))
except ElelemAPIResponseError as e:
    print("message:", e)
    print("exception:", e.exception)
    print("Response:", e.response)
except ElelemError as e:
    print("message:", e)
    print("exception:", e.exception)
except Exception as e:
    print(e)

OLLAMA DETECTING LANGUAGE...
RESPONSE:
{
  "src_lang": "Spanish",
  "src_text": "El rey de parangaricutirimicuaro se quiere desparangaricutirimicuar, quien logre desparangaricutirimicuarlo un gran desparangaricutirimicuador será"
}

METADATA:
{
  "provider": "ollama",
  "model": "gemma2:9b",
  "operation": "generate",
  "duration": 4.65059200900032,
  "input_tokens": 126,
  "output_tokens": 14,
  "timestamp": "2025-03-01T00:55:29.548534Z"
}


In [40]:
print("OLLAMA TRANSLATING LANGUAGE...")
res, metadata = ollamaT.translate(txt, "English")
print("RESPONSE:")
print(res.model_dump_json(indent=2))
print("\nMETADATA:")
print(metadata.model_dump_json(indent=2))

OLLAMA TRANSLATING LANGUAGE...
RESPONSE:
{
  "src_lang": "Spanish",
  "src_text": "El rey de parangaricutirimicuaro se quiere desparangaricutirimicuar, quien logre desparangaricutirimicuarlo un gran desparangaricutirimicuador será",
  "tgt_lang": "English",
  "tgt_text": "The king of Parangaricutirimícuaro wants to be deposed. Whoever deposes him will be a great deposer."
}

METADATA:
{
  "provider": "ollama",
  "model": "gemma2:9b",
  "operation": "generate",
  "duration": 5.246626543000275,
  "input_tokens": 134,
  "output_tokens": 46,
  "timestamp": "2025-03-01T00:55:34.798656Z"
}


# Experimentation 2025-02-22

## Dependencies

In [1]:
import elelem.clients as clients

In [2]:
from pydantic import BaseModel, Field

In [3]:
from typing import Optional, Any, Literal, Tuple

In [4]:
from abc import ABC, abstractmethod

In [5]:
import json

In [6]:
import time

In [7]:
from string import Template

## Basic types

In [8]:
class RequestMetadata(BaseModel):
    provider : Literal["google", "anthropic", "openai", "mistral", "ollama"]
    model : str
    duration : float
    input_tokens : int
    output_tokens : int

In [9]:
class LLMDetectLanguageResult(BaseModel):
    src_lang : str = Field(description="Language of the source text")
    
class DetectLanguageResult(BaseModel):
    src_lang : str = Field(description="Language of the source text")
    src_text : str = Field(description="Source text")

In [10]:
class LLMTranslateResult(BaseModel):
    src_lang : str = Field(description="Language of the source text")
    tgt_text : Optional[str] = Field(description="Translated text, or null if already in the desired target language")

class TranslateResult(BaseModel):
    src_lang : str = Field(description="Language of the source text")
    src_text : str = Field(description="Source text")
    tgt_lang : str = Field(description="Language of the target text")
    tgt_text : Optional[str] = Field(description="Target text")

In [11]:
class Translator(ABC):
    @abstractmethod
    def detect_language(self, src_text : str) -> Tuple[DetectLanguageResult, RequestMetadata]:
        pass
    
    @abstractmethod
    def translate(self, src_text : str, tgt_lang : str) -> Tuple[TranslateResult, RequestMetadata]:
        pass

## Templates & prompts

In [12]:
translate_system = (
    "You are an expert translator fluent in many languages. You will be " \
    "provided with a source text and a target language.  Your task is to " \
    "accurately translate the source text into the target language, while " \
    "preserving the original meaning and style as closely as possible. You " \
    "will also identify the source language of the text. "
)

In [13]:
translate_prompt = Template(
    "Please translate the following text into ${tgt_lang} and tell me the " \
    "language of the original text:\n\n" \
    "${src_text}"
)

In [14]:
detect_system = (
    "You are an expert linguist specializing in language " \
    "identification. You will be provided with a source text. Your task is " \
    "to accurately determine the source language of the given text." \
    "Use full language names like 'English', 'Spanish', 'Kazakh', 'Greek', etc."
)

In [15]:
detect_prompt = Template(
    "Determine the source language of the following text:\n\n" \
    "${src_text}"
)

## Test suite

In [16]:
def test_translator(T):
    total_duration = 0
    print('*' * 80)
    print('T.detect_language("Hello, world")')
    res = T.detect_language("Hello, world")
    print(res[0])
    print(res[1])
    total_duration += res[1].duration
    
    print('*' * 80)
    print('T.translate("Hello, world", "Español")')
    res = T.translate("Hello, world", "Español")
    print(res[0])
    print(res[1])
    total_duration += res[1].duration

    print('*' * 80)
    print('T.translate("Hello mate oi!", "Español")')
    res = T.translate("Hello mate oi!", "Español")
    print(res[0])
    print(res[1])
    total_duration += res[1].duration

    print('*' * 80)
    print('T.translate("Hello mate oi!", "Español de España")')
    res = T.translate("Hello mate oi!", "Español de España")
    print(res[0])
    print(res[1])
    total_duration += res[1].duration

    print('*' * 80)
    print('T.translate("Hello mate oi!", "Español de México")')
    res = T.translate("Hello mate oi!", "Español de México")
    print(res[0])
    print(res[1])
    total_duration += res[1].duration

    print('*' * 80)
    print('T.translate("Hello mate oi!", "Español del norte de México, bien norteño patrón!")')
    res = T.translate("Hello mate oi!", "Español del norte de México, bien norteño patrón!")
    print(res[0])
    print(res[1])
    total_duration += res[1].duration

    print('*' * 80)
    print('T.detect_language("Сәлем, әлем!")')
    res = T.detect_language("Сәлем, әлем!")
    print(res[0])
    print(res[1])
    total_duration += res[1].duration

    print('*' * 80)
    print('T.translate("Сәлем, әлем!", "Español")')
    res = T.translate("Сәлем, әлем!", "Español")
    print(res[0])
    print(res[1])
    total_duration += res[1].duration

    print('*' * 80)
    print('Al lector traducido al español')
    res = T.translate(
        """La sottise, l'erreur, le péché, la lésine,
    Occupent nos esprits et travaillent nos corps,
    Et nous alimentons nos aimables remords,
    Comme les mendiants nourrissent leur vermine.
    
    Nos péchés sont têtus, nos repentirs sont lâches;
    Nous nous faisons payer grassement nos aveux,
    Et nous rentrons gaiement dans le chemin bourbeux,
    Croyant par de vils pleurs laver toutes nos taches.
    
    Sur l'oreiller du mal c'est Satan Trismégiste
    Qui berce longuement notre esprit enchanté,
    Et le riche métal de notre volonté
    Est tout vaporisé par ce savant chimiste.
    
    C'est le Diable qui tient les fils qui nous remuent!
    Aux objets répugnants nous trouvons des appas;
    Chaque jour vers l'Enfer nous descendons d'un pas,
    Sans horreur, à travers des ténèbres qui puent.""",
        "Español",
    )
    print(res[0].tgt_text)
    total_duration += res[1].duration

    print("=" * 80)
    print("TOTAL DURATION:", total_duration)

## Using Google

In [18]:
class GoogleTranslator(Translator):
    def __init__(self):
        self.model = "gemini-2.0-flash-lite-preview-02-05"
    
    def _make_config(self, system, schema):
        return clients.google_api.types.GenerateContentConfig(
            system_instruction=system,
            temperature=0,
            response_mime_type="application/json",
            response_schema=schema,
        )

    def _make_metadata(self, res, elapsed):
        return RequestMetadata(
            provider="google",
            model=self.model,
            duration=elapsed,
            input_tokens=res.usage_metadata.prompt_token_count,
            output_tokens=res.usage_metadata.candidates_token_count,
        )

    def _prompt(self, config, prompt):
        return clients.google.models.generate_content(
            model=self.model,
            config=config,
            contents=prompt,
        )
    
    def detect_language(self, src_text : str) -> Tuple[DetectLanguageResult, RequestMetadata]:
        prompt = detect_prompt.substitute(src_text=src_text)
        config = self._make_config(detect_system, LLMDetectLanguageResult)
        start_time = time.perf_counter()
        res = self._prompt(config, prompt)
        end_time = time.perf_counter()
        elapsed = end_time - start_time
        return (
            DetectLanguageResult(
                src_text=src_text,
                src_lang=res.parsed.src_lang,
            ),
            self._make_metadata(res, elapsed),
        )

    def translate(self, src_text : str, tgt_lang : str) -> Tuple[TranslateResult, RequestMetadata]:
        prompt = translate_prompt.substitute(tgt_lang=tgt_lang, src_text=src_text)
        config = self._make_config(translate_system, LLMTranslateResult)
        start_time = time.perf_counter()
        res = self._prompt(config, prompt)
        end_time = time.perf_counter()
        elapsed = end_time - start_time
        return (
            TranslateResult(
                src_lang = res.parsed.src_lang,
                src_text = src_text,
                tgt_lang = tgt_lang,
                tgt_text = res.parsed.tgt_text,
            ),
            self._make_metadata(res, elapsed),
        )

La siguiente celda dura como 9.505784358014353 segundos, descomentala para calar

In [19]:
google_translator = GoogleTranslator()
test_translator(google_translator)

********************************************************************************
T.detect_language("Hello, world")
src_lang='English' src_text='Hello, world'
provider='google' model='gemini-2.0-flash-lite-preview-02-05' duration=0.8479017170002408 input_tokens=78 output_tokens=13
********************************************************************************
T.translate("Hello, world", "Español")
src_lang='English' src_text='Hello, world' tgt_lang='Español' tgt_text='Hola, mundo'
provider='google' model='gemini-2.0-flash-lite-preview-02-05' duration=0.8220592180000494 input_tokens=112 output_tokens=25
********************************************************************************
T.translate("Hello mate oi!", "Español")
src_lang='English' src_text='Hello mate oi!' tgt_lang='Español' tgt_text='¡Hola amigo!'
provider='google' model='gemini-2.0-flash-lite-preview-02-05' duration=0.8863348080003561 input_tokens=113 output_tokens=24
********************************************************

## Using Anthropic

In [20]:
class AnthropicTranslator(Translator):
    def __init__(self):
        self.model = "claude-3-5-haiku-latest"
        self.max_tokens=1024
        self.tools = [
            {
                "name": "detect-language",
                "description": "Accurately identifies the language of the given text.",
                "input_schema": LLMDetectLanguageResult.model_json_schema(),
            },
            {
                "name": "detect-and-translate-language",
                "description": "Identifies the language of the source text and provides an accurate translation to the specified target language.",
                "input_schema": LLMTranslateResult.model_json_schema(),
            },
        ]

    def _make_metadata(self, res, elapsed):
        return RequestMetadata(
            provider="anthropic",
            model=self.model,
            duration=elapsed,
            input_tokens=res.usage.input_tokens,
            output_tokens=res.usage.output_tokens,
        )

    def _prompt(self, system, tool, prompt):
        return clients.anthropic.messages.create(
            model=self.model,
            max_tokens=self.max_tokens,
            system=system,
            temperature=0,
            tools=self.tools,
            tool_choice={"type": "tool", "name": tool},
            messages=[
                {"role": "user", "content": prompt},
            ],
        )

    def detect_language(self, src_text : str) -> Tuple[DetectLanguageResult, RequestMetadata]:
        prompt = detect_prompt.substitute(src_text=src_text)
        tool = "detect-language"
        start_time = time.perf_counter()
        res = self._prompt(detect_system, tool, prompt)
        end_time = time.perf_counter()
        elapsed = end_time - start_time
        metadata = self._make_metadata(res, elapsed)
        try:
            parsed = LLMDetectLanguageResult.model_validate(res.content[0].input)
            return (
                DetectLanguageResult(
                    src_text=src_text,
                    src_lang=parsed.src_lang
                ),
                self._make_metadata(res, elapsed),
            )
        except Exception as e:
            print(f"Something went wrong: {e}")
            return res, metadata

    def translate(self, src_text : str, tgt_lang : str) -> Tuple[TranslateResult, RequestMetadata]:
        prompt = translate_prompt.substitute(tgt_lang=tgt_lang, src_text=src_text)
        tool = "detect-and-translate-language"
        start_time = time.perf_counter()
        res = self._prompt(translate_system, tool, prompt)
        end_time = time.perf_counter()
        elapsed = end_time - start_time
        metadata = self._make_metadata(res, elapsed)
        try:
            parsed = LLMTranslateResult.model_validate(res.content[0].input)
            return (
                TranslateResult(
                    src_lang = parsed.src_lang,
                    src_text = src_text,
                    tgt_lang = tgt_lang,
                    tgt_text = parsed.tgt_text,
                ),
                self._make_metadata(res, elapsed),
            )
        except Exception as e:
            print(f"Something went wrong: {e}")
            return res, metadata

La siguiente celda tarda como 13.52147050700296 segundos, descomentala para calar

In [21]:
anthropic_translator = AnthropicTranslator()
test_translator(anthropic_translator)

********************************************************************************
T.detect_language("Hello, world")
src_lang='English' src_text='Hello, world'
provider='anthropic' model='claude-3-5-haiku-latest' duration=1.018118905999927 input_tokens=697 output_tokens=35
********************************************************************************
T.translate("Hello, world", "Español")
src_lang='en' src_text='Hello, world' tgt_lang='Español' tgt_text='Hola, mundo'
provider='anthropic' model='claude-3-5-haiku-latest' duration=1.329876556000272 input_tokens=712 output_tokens=59
********************************************************************************
T.translate("Hello mate oi!", "Español")
src_lang='en' src_text='Hello mate oi!' tgt_lang='Español' tgt_text='¡Hola amigo!'
provider='anthropic' model='claude-3-5-haiku-latest' duration=1.5770502209998085 input_tokens=714 output_tokens=61
********************************************************************************
T.translate("

## Using OpenAI

In [22]:
class OpenAITranslator(Translator):
    def __init__(self):
        self.model = "gpt-4o-mini"

    def _make_metadata(self, res, elapsed):
        return RequestMetadata(
            provider="openai",
            model=self.model,
            duration=elapsed,
            input_tokens=res.usage.prompt_tokens,
            output_tokens=res.usage.completion_tokens,
        )

    def _prompt(self, system, res_format, prompt):
        return clients.openai.beta.chat.completions.parse(
            model=self.model,
            temperature=0,
            response_format=res_format,
            messages=[
                {"role": "developer",
                 "content": [{"type": "text", "text": system}]},
                {"role": "user",
                 "content": [{"type": "text","text": prompt}]},
            ],
        )

    def detect_language(self, src_text : str) -> Tuple[DetectLanguageResult, RequestMetadata]:
        prompt = detect_prompt.substitute(src_text=src_text)
        start_time = time.perf_counter()
        res = self._prompt(detect_system, LLMDetectLanguageResult, prompt)
        end_time = time.perf_counter()
        elapsed = end_time - start_time
        return (
            DetectLanguageResult(
                src_text=src_text,
                src_lang=res.choices[0].message.parsed.src_lang,
            ),
            self._make_metadata(res, elapsed)
        )

    def translate(self, src_text : str, tgt_lang : str) -> Tuple[TranslateResult, RequestMetadata]:
        prompt = translate_prompt.substitute(tgt_lang=tgt_lang, src_text=src_text)
        start_time = time.perf_counter()
        res = self._prompt(translate_system, LLMTranslateResult, prompt)
        end_time = time.perf_counter()
        elapsed = end_time - start_time
        return (
            TranslateResult(
                src_lang = res.choices[0].message.parsed.src_lang,
                src_text = src_text,
                tgt_lang = tgt_lang,
                tgt_text = res.choices[0].message.parsed.tgt_text,
            ),
            self._make_metadata(res, elapsed)
        )

La siguiente celda tarda como 9.407515291000891 segundos, descomentala para calar

In [23]:
openai_translator = OpenAITranslator()
test_translator(openai_translator)

********************************************************************************
T.detect_language("Hello, world")
src_lang='English' src_text='Hello, world'
provider='openai' model='gpt-4o-mini' duration=1.1554771430000983 input_tokens=130 output_tokens=7
********************************************************************************
T.translate("Hello, world", "Español")
src_lang='English' src_text='Hello, world' tgt_lang='Español' tgt_text='Hola, mundo'
provider='openai' model='gpt-4o-mini' duration=1.3574547289999828 input_tokens=183 output_tokens=15
********************************************************************************
T.translate("Hello mate oi!", "Español")
src_lang='English' src_text='Hello mate oi!' tgt_lang='Español' tgt_text='¡Hola amigo!'
provider='openai' model='gpt-4o-mini' duration=0.5820824110001013 input_tokens=184 output_tokens=16
********************************************************************************
T.translate("Hello mate oi!", "Español de Españ

## Using Mistral

In [24]:
class MistralTranslator(Translator):
    def __init__(self):
        self.model = "mistral-small-latest"

    def _make_metadata(self, res, elapsed):
        return RequestMetadata(
            provider="mistral",
            model=self.model,
            duration=elapsed,
            input_tokens=res.usage.prompt_tokens,
            output_tokens=res.usage.completion_tokens,
        )

    def _prompt(self, system, res_format, prompt):
        return clients.mistral.chat.parse(
            model=self.model,
            temperature=0,
            response_format=res_format,
            messages=[
                {"role": "system",
                 "content": system},
                {"role": "user",
                 "content": prompt},
            ],
        )

    def detect_language(self, src_text : str) -> Tuple[DetectLanguageResult, RequestMetadata]:
        prompt = detect_prompt.substitute(src_text=src_text)
        start_time = time.perf_counter()
        res = self._prompt(detect_system, LLMDetectLanguageResult, prompt)
        end_time = time.perf_counter()
        elapsed = end_time - start_time
        return (
            DetectLanguageResult(
                src_text=src_text,
                src_lang=res.choices[0].message.parsed.src_lang,
            ),
            self._make_metadata(res, elapsed)
        )

    def translate(self, src_text : str, tgt_lang : str) -> Tuple[TranslateResult, RequestMetadata]:
        prompt = translate_prompt.substitute(tgt_lang=tgt_lang, src_text=src_text)
        start_time = time.perf_counter()
        res = self._prompt(translate_system, LLMTranslateResult, prompt)
        end_time = time.perf_counter()
        elapsed = end_time - start_time
        return (
            TranslateResult(
                src_lang = res.choices[0].message.parsed.src_lang,
                src_text = src_text,
                tgt_lang = tgt_lang,
                tgt_text = res.choices[0].message.parsed.tgt_text,
            ),
            self._make_metadata(res, elapsed)
        )

La siguiente celda tarda como 12.31103517000156 segundos, descomentala para calar

In [25]:
mistral_translator = MistralTranslator()
test_translator(mistral_translator)

********************************************************************************
T.detect_language("Hello, world")
src_lang='English' src_text='Hello, world'
provider='mistral' model='mistral-small-latest' duration=0.9361330310002813 input_tokens=74 output_tokens=7
********************************************************************************
T.translate("Hello, world", "Español")
src_lang='English' src_text='Hello, world' tgt_lang='Español' tgt_text='Hola, mundo'
provider='mistral' model='mistral-small-latest' duration=1.9519102040003418 input_tokens=86 output_tokens=22
********************************************************************************
T.translate("Hello mate oi!", "Español")


SDKError: API error occurred: Status 502
<html>
<head><title>502 Bad Gateway</title></head>
<body>
<center><h1>502 Bad Gateway</h1></center>
<hr><center>cloudflare</center>
</body>
</html>


## Using Ollama

In [26]:
class OllamaTranslator(Translator):
    def __init__(self):
        self.model = "gemma2:9b"

    def _make_metadata(self, res, elapsed):
        return RequestMetadata(
            provider="ollama",
            model=self.model,
            duration=elapsed,
            input_tokens=res.prompt_eval_count,
            output_tokens=res.eval_count,
        )

    def _prompt(self, system, res_format, prompt):
        return clients.ollama.generate(
            model=self.model,
            system=system,
            format=res_format.model_json_schema(),
            options={
                "temperature": 0,
            },
            prompt=prompt,
        )

    def detect_language(self, src_text : str) -> Tuple[DetectLanguageResult, RequestMetadata]:
        prompt = detect_prompt.substitute(src_text=src_text)
        start_time = time.perf_counter()
        res = self._prompt(detect_system, LLMDetectLanguageResult, prompt)
        end_time = time.perf_counter()
        elapsed = end_time - start_time
        parsed = LLMDetectLanguageResult.model_validate_json(res.response)
        return (
            DetectLanguageResult(
                src_text=src_text,
                src_lang=parsed.src_lang,
            ),
            self._make_metadata(res, elapsed)
        )

    def translate(self, src_text : str, tgt_lang : str) -> Tuple[TranslateResult, RequestMetadata]:
        prompt = translate_prompt.substitute(tgt_lang=tgt_lang, src_text=src_text)
        start_time = time.perf_counter()
        res = self._prompt(translate_system, LLMTranslateResult, prompt)
        end_time = time.perf_counter()
        elapsed = end_time - start_time
        parsed = LLMTranslateResult.model_validate_json(res.response)
        return (
            TranslateResult(
                src_lang = parsed.src_lang,
                src_text = src_text,
                tgt_lang = tgt_lang,
                tgt_text = parsed.tgt_text,
            ),
            self._make_metadata(res, elapsed)
        )

La siguiente celda tarda como:
- 75.49605615798646 segundos en la violenta
- 49.216149886000494 segundos en la furiosa
- ??? segundos en la pitaya

descomentala para calar

In [27]:
ollama_translator = OllamaTranslator()
test_translator(ollama_translator)

********************************************************************************
T.detect_language("Hello, world")
src_lang='English' src_text='Hello, world'
provider='ollama' model='gemma2:9b' duration=3.9903903190001984 input_tokens=76 output_tokens=13
********************************************************************************
T.translate("Hello, world", "Español")
src_lang='English' src_text='Hello, world' tgt_lang='Español' tgt_text='¡Hola mundo!'
provider='ollama' model='gemma2:9b' duration=2.9429496449997714 input_tokens=92 output_tokens=21
********************************************************************************
T.translate("Hello mate oi!", "Español")
src_lang='English' src_text='Hello mate oi!' tgt_lang='Español' tgt_text='¡Hola amigo, oi!'
provider='ollama' model='gemma2:9b' duration=2.6775158430000374 input_tokens=93 output_tokens=25
********************************************************************************
T.translate("Hello mate oi!", "Español de España")