# 1. Instalar dependencias

In [None]:
# Instalación de las dependencias necesarias
!pip install transformers sentencepiece -q

# 2. Función de Detección Dinámica de Términos y Construcción del Prompt

In [None]:
# Imports necesarios para esta sección
import re

def extraer_terminos(texto):
    """
    Extrae términos a preservar (por ejemplo, métodos o palabras clave en mayúsculas)
    a partir del texto. Aquí se asume que los términos técnicos son palabras en mayúsculas.
    Puedes ajustar la expresión regular según los patrones que se observan en la documentación.
    """
    # Se busca palabras en mayúsculas de 2 o más letras (para evitar capturar palabras comunes)
    patron = r"\b[A-Z]{2,}\b"
    terminos = set(re.findall(patron, texto))

    # Añadimos palabras que ya sabemos de antemano que no se tienen que traducir.
    # Términos adicionales conocidos
    terminos_adicionales = {"Business Central", "codeunit", "report", "xmlport"}

    # Unir ambos conjuntos
    terminos = terminos.union(terminos_adicionales)

    return list(terminos)

def sustituir_terminos_por_tokens(texto, terminos):
    """
    Sustituye cada término técnico en el texto por un token único en la forma [TERM_N].
    Devuelve el texto modificado y un diccionario mapeando tokens a términos originales.
    """
    token_map = {}
    texto_modificado = texto
    for i, termino in enumerate(terminos):
        token = f"[TERM_{i}]"
        token_map[token] = termino
        # Usamos re.escape para tratar caracteres especiales
        texto_modificado = re.sub(rf"\\b{re.escape(termino)}\\b", token, texto_modificado)
    return texto_modificado, token_map

def restaurar_terminos(texto, token_map):
    """
    Restaura en el texto los tokens a sus valores originales, según dict token_map.
    """
    texto_restaurado = texto
    for token, termino in token_map.items():
        texto_restaurado = texto_restaurado.replace(token, termino)
    return texto_restaurado

    """
def construir_prompt(texto, terminos):
    """
    """
    Construye el prompt personalizado para la traducción, incluyendo la instrucción de preservar
    los términos técnicos en inglés.
    """
    """
    prompt_v1 = (
        "# Your Role\n"
        "You are a highly skilled multilingual language expert capable of accurately translating texts from English to Catalan. "
        "Your expertise lies in capturing the nuances and subtleties of both languages, ensuring that the translated text faithfully represents the original meaning, tone, and context.\n\n"
        "# Instructions\n"
        "1. Read the provided English text carefully.\n"
        "2. Analyze the text's context, idioms, and any culturally specific expressions.\n"
        "3. Translate the text into Catalan, ensuring clarity, grammatical correctness, and preservation of the original intent.\n"
        "4. Preserve the following technical terms (methods, keywords, etc.) in their original form: {}.\n"
        "5. Ensure the translation reflects both the literal meaning and the underlying connotations present in the original text.\n"
        "6. Proofread the translated text for accuracy and fluency in Catalan.\n\n"
        "# Constraints\n"
        "1. Translate exclusively from English to Catalan without adding personal commentary.\n"
        "2. Retain the style, tone, and context of the original text.\n"
        "3. Do not include additional annotations or explanations in the output.\n"
        "4. Provide a comprehensive translation that completely captures all details of the original text.\n"
        "5. The translated text must be clear, precise, and sound natural to native Catalan speakers.\n\n"
        "===\n\n"
        "Text:\n"
        "{}"
    )
    prompt_v2 = (
        "# Your Role\n"
        "You are a highly skilled multilingual language expert capable of accurately translating texts "
        "from English to Catalan. Your expertise lies in capturing the nuances and subtleties of both languages, "
        "ensuring that the translated text faithfully represents the original meaning, tone, and context.\n\n"
        "# Instructions\n"
        "1. Read the provided English text carefully.\n"
        "2. Analyze the text's context, idioms, and any culturally specific expressions.\n"
        "3. Translate the text into Catalan, ensuring clarity, grammatical correctness, and preservation of the original intent.\n"
        "4. **Do not translate any word that is written entirely in uppercase. Keep these technical terms as is.**\n"
        "5. Preserve the following technical terms in their original form: {}.\n"
        "6. Ensure the translation reflects both the literal meaning and the underlying connotations present in the original text.\n"
        "7. Proofread the translated text for accuracy and fluency in Catalan.\n\n"
        "# Constraints\n"
        "1. Translate exclusively from English to Catalan without adding personal commentary.\n"
        "2. Retain the style, tone, and context of the original text.\n"
        "3. Do not include additional annotations or explanations in the output.\n"
        "4. Provide a comprehensive translation that completely captures all details of the original text.\n"
        "5. The translated text must be clear, precise, and sound natural to native Catalan speakers.\n\n"
        "===\n\n"
        "Text:\n"
        "{}"
    )
    prompt_v3 = (
        "# Your Role\n"
        "You are a highly skilled multilingual language expert capable of accurately translating texts "
        "from English to Catalan. Your expertise lies in capturing the nuances and subtleties of both languages, "
        "ensuring that the translated text faithfully represents the original meaning, tone, and context.\n\n"
        "# Instructions\n"
        "1. Read the provided English text carefully.\n"
        "2. Analyze the text's context, idioms, and any culturally specific expressions.\n"
        "3. Translate the text into Catalan, ensuring clarity, grammatical correctness, and preservation of the original intent.\n"
        "4. Do not translate any word that is written entirely in uppercase. Keep these technical terms as is.\n"
        "5. Preserve the following technical terms in their original form: {}.\n"
        "6. Ensure the translation reflects both the literal meaning and the underlying connotations present in the original text.\n"
        "7. Proofread the translated text for accuracy and fluency in Catalan.\n\n"
        "# Constraints\n"
        "1. Translate exclusively from English to Catalan without adding personal commentary.\n"
        "2. Retain the style, tone, and context of the original text.\n"
        "3. Do not include additional annotations or explanations in the output.\n"
        "4. Provide a comprehensive translation that completely captures all details of the original text.\n"
        "5. The translated text must be clear, precise, and sound natural to native Catalan speakers.\n\n"
        "===\n\n"
        "___Text:___\n"
        "{}"
    )
    prompt_v4 = (
        "# Your Role\n"
        "You are a highly skilled multilingual language expert capable of accurately translating texts "
        "from English to Catalan. Your expertise lies in capturing the nuances and subtleties of both languages, "
        "ensuring that the translated text faithfully represents the original meaning, tone, and context.\n\n"
        "# Instructions\n"
        "1. Read the provided English text carefully.\n"
        "2. Analyze the text's context, idioms, and any culturally specific expressions.\n"
        "3. Translate the text into Catalan, ensuring clarity, grammatical correctness, and preservation of the original intent.\n"
        "4. Do not translate any word that is written entirely in uppercase. Keep these technical terms as is.\n"
        "5. Preserve the following technical terms in their original form: {}.\n"
        "6. Ensure the translation reflects both the literal meaning and the underlying connotations present in the original text.\n"
        "7. Proofread the translated text for accuracy and fluency in Catalan.\n\n"
        "# Constraints\n"
        "1. Translate exclusively from English to Catalan without adding personal commentary.\n"
        "2. Retain the style, tone, and context of the original text.\n"
        "3. Do not include additional annotations or explanations in the output.\n"
        "4. Provide a comprehensive translation that completely captures all details of the original text.\n"
        "5. The translated text must be clear, precise, and sound natural to native Catalan speakers.\n\n"
        "6. REMEBER: whole capitalized word should not be translated.\n\n"
        "===\n\n"
        "___Text:___\n"
        "{}"
    )
    prompt_v5 = (
        "# Your Role\n"
        "You are a highly skilled multilingual language expert capable of accurately translating texts "
        "from English to Catalan. Your expertise lies in capturing the nuances and subtleties of both languages, "
        "ensuring that the translated text faithfully represents the original meaning, tone, and context.\n\n"
        "# Instructions\n"
        "1. Read the provided English text carefully.\n"
        "2. Analyze the text's context, idioms, and any culturally specific expressions.\n"
        "3. Translate the text into Catalan, ensuring clarity, grammatical correctness, and preservation of the original intent.\n"
        "4. Do not translate any word that is written entirely in uppercase. Keep these technical terms as is.\n"
        "5. Preserve the following technical terms in their original form: {}.\n"
        "6. Ensure the translation reflects both the literal meaning and the underlying connotations present in the original text.\n"
        "7. Proofread the translated text for accuracy and fluency in Catalan.\n\n"
        "# Constraints\n"
        "1. Translate exclusively from English to Catalan without adding personal commentary.\n"
        "2. Retain the style, tone, and context of the original text.\n"
        "3. Do not include additional annotations or explanations in the output.\n"
        "4. Provide a comprehensive translation that completely captures all details of the original text.\n"
        "5. The translated text must be clear, precise, and sound natural to native Catalan speakers.\n\n"
        "6. REMEBER: whole capitalized word must not be translated.\n\n"
        "===\n\n"
        "___Text:___\n"
        "{}"
    )
    prompt_v6 = (
        "# Your Role\n"
        "You are a highly skilled multilingual language expert capable of accurately translating texts "
        "from English to Catalan. Your expertise lies in capturing the nuances and subtleties of both languages, "
        "ensuring that the translated text faithfully represents the original meaning, tone, and context.\n\n"
        "# Instructions\n"
        "1. Read the provided English text carefully.\n"
        "2. Analyze the text's context, idioms, and any culturally specific expressions.\n"
        "3. Translate the text into Catalan, ensuring clarity, grammatical correctness, and preservation of the original intent.\n"
        "4. IMPORTANT: DO NOT translate ANY word that is written entirely in uppercase letters. For example, words such as RUN, GET, SET, etc., MUST remain exactly as is."
        "5. Preserve the following technical terms in their original form: {}.\n"
        "6. Ensure the translation reflects both the literal meaning and the underlying connotations present in the original text.\n"
        "7. Proofread the translated text for accuracy and fluency in Catalan.\n\n"
        "# Constraints\n"
        "1. Translate exclusively from English to Catalan without adding personal commentary.\n"
        "2. Retain the style, tone, and context of the original text.\n"
        "3. Do not include additional annotations or explanations in the output.\n"
        "4. Provide a comprehensive translation that completely captures all details of the original text.\n"
        "5. The translated text must be clear, precise, and sound natural to native Catalan speakers.\n\n"
        "===\n\n"
        "___Text:___\n"
        "{}"
    )
    prompt_v7 = (
        "# Your Role\n"
        "You are a highly skilled multilingual language expert capable of accurately translating texts "
        "from English to Catalan. Your expertise lies in capturing the nuances and subtleties of both languages, "
        "ensuring that the translated text faithfully represents the original meaning, tone, and context.\n\n"
        "# Instructions\n"
        "1. Read the provided English text carefully.\n"
        "2. Analyze the text's context, idioms, and any culturally specific expressions.\n"
        "3. Translate the text into Catalan, ensuring clarity, grammatical correctness, and preservation of the original intent.\n"
        "4. **IMPORTANT: Under no circumstances should you translate any word that is written entirely in uppercase letters.**\n"
        "   - Treat these words as non-translatable and, if possible, enclose them in backticks (e.g., `RUN`, `GET`, `SET`) to emphasize that they must remain unchanged.\n"
        "5. Preserve the following technical terms in their original form: {}.\n"
        "6. Ensure the translation reflects both the literal meaning and the underlying connotations present in the original text.\n"
        "7. Proofread the translated text for accuracy and fluency in Catalan.\n\n"
        "# Constraints\n"
        "1. Translate exclusively from English to Catalan without adding personal commentary.\n"
        "2. Retain the style, tone, and context of the original text.\n"
        "3. Do not include additional annotations or explanations in the output.\n"
        "4. Provide a comprehensive translation that completely captures all details of the original text.\n"
        "5. The translated text must be clear, precise, and sound natural to native Catalan speakers.\n\n"
        "===\n\n"
        "___Text:___\n"
        "{}"
    ).format(", ".join(terminos) if terminos else "[NONE]", texto)

    return prompt

    """

def construir_prompt(texto_prepro, terminos):
    """
    Construye el prompt personalizado para la traducción a partir del texto preprocesado,
    indicando que los tokens ya están marcados para evitar su traducción.
    """
    prompt = (
        "# Your Role\\n"
        "You are a highly skilled multilingual language expert capable of accurately translating texts "
        "from English to Catalan. Your expertise lies in capturing the nuances and subtleties of both languages, "
        "ensuring that the translated text faithfully represents the original meaning, tone, and context.\\n\\n"
        "# Instructions\\n"
        "1. Read the provided English text carefully.\\n"
        "2. Analyze the text's context, idioms, and any culturally specific expressions.\\n"
        "3. Translate the text into Catalan, ensuring clarity, grammatical correctness, and preservation of the original intent.\\n"
        "4. IMPORTANT: The text contains special tokens (e.g., [TERM_0], [TERM_1], ...) representing technical terms that must not be translated. Make sure these tokens remain unchanged.\\n"
        "5. Proofread the translated text for accuracy and fluency in Catalan.\\n\\n"
        "# Constraints\\n"
        "1. Translate exclusively from English to Catalan without adding personal commentary.\\n"
        "2. Retain the style, tone, and context of the original text.\\n"
        "3. Do not include any additional annotations or explanations in the output.\\n"
        "4. Provide a comprehensive translation that completely captures all details of the original text.\\n"
        "5. The final translated text must be clear, precise, and sound natural to native Catalan speakers.\\n\\n"
        "===\\n\\n"
        "___Text:___\\n"
        "{}"
    ).format(texto_prepro)

    return prompt


# 3. Función de traducción utilizando Hugging Face Transformers

In [None]:
# Imports necesarios para esta sección
from transformers import MarianMTModel, MarianTokenizer

# Cargar el tokenizador y el modelo para la traducción de inglés a catalán.
# Se utiliza el modelo "Helsinki-NLP/opus-mt-en-ca" de Helsinki-NLP
tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-ca")
model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-ca")

"""
def traducir_texto(prompt):
"""
"""
    Esta función traduce el texto contenido en el prompt del inglés al catalán
    utilizando el modelo real de traducción Helsinki-NLP/opus-mt-en-ca.
"""
"""
    # Se asume que el prompt tiene la sección final 'Text:\n' seguido del texto a traducir.
    # Extraer todo el texto posterior a la primera aparición de "Text:".
    try:
        texto_a_traducir = prompt.split("___Text:___")[-1].strip()
    except Exception as e:
        return "Error extrayendo el texto a traducir: " + str(e)

    # Tokenización y generación de la traducción
    inputs = tokenizer(texto_a_traducir, return_tensors="pt", truncation=True)
    translated = model.generate(**inputs)
    traduccion_resultado = tokenizer.decode(translated[0], skip_special_tokens=True)
    return traduccion_resultado
"""
def traducir_texto(prompt, token_map):
    """
    Traduce el texto contenido en el prompt (utilizando el modelo real) y luego restaura los tokens
    a sus términos originales.
    """
    # Extraer el texto a traducir: se asume que el prompt tiene la sección final '___Text:___'
    try:
        texto_a_traducir = prompt.split("___Text:___")[-1].strip()
    except Exception as e:
        return "Error extrayendo el texto a traducir: " + str(e)

    # Tokenización y generación de la traducción
    inputs = tokenizer(texto_a_traducir, return_tensors="pt", truncation=True)
    translated = model.generate(**inputs)
    traduccion_resultado = tokenizer.decode(translated[0], skip_special_tokens=True)

    # Restaurar tokens a los términos originales
    traduccion_restaurada = restaurar_terminos(traduccion_resultado, token_map)
    return traduccion_restaurada




# 4. Definir la función de traducción y mostrar los widgets

In [None]:
# Imports necesarios para esta sección
import ipywidgets as widgets
from IPython.display import display, Markdown

# Título y descripción en el notebook
display(Markdown("## PoC - Traducción de Documentación técnica de Business Central"))
display(Markdown("Este notebook permite pegar el texto original en inglés y obtener una traducción a catalán (manteniendo ciertos métodos y palabras clave en inglés). La iteración del prompting se ha realizado de forma manual."))

# Texto de entrada
texto_original = widgets.Textarea(
    value="",
    placeholder="Paste the text to be translated...",
    description="Original:",
    layout=widgets.Layout(width="95%", height="150px")
)

# Texto traducido
texto_traducido = widgets.Textarea(
    value="",
    placeholder="Aquí apareixerà el text traduit...",
    description="Traducció:",
    layout=widgets.Layout(width="95%", height="150px"),
    disabled=True
)

# Botón para iniciar la traducción
btn_traducir = widgets.Button(
    description="Traducir",
    button_style="success",
)

def manejar_traduccion(b):
    # Obtener el texto original del widget
    texto = texto_original.value

    # Extraer términos técnicos a preservar
    terminos = extraer_terminos(texto)

    """
    # Sólo con esto no funciona, ya que traduce incluso las palabras en mayúscula
    # Construir el prompt con instrucciones y texto a traducir
    prompt_personalizado = construir_prompt(texto, terminos)

    # Hacer la llamada al modelo de traducción
    resultado_traduccion = traducir_texto(prompt_personalizado)
    """

    # Iteración N
    # Sustituir los términos por tokens en el texto
    texto_prepro, token_map = sustituir_terminos_por_tokens(texto, terminos)

    # Construir el prompt utilizando el texto preprocesado
    prompt_personalizado = construir_prompt(texto_prepro, terminos)

    # Hacer la llamada al modelo de traducción y restaurar tokens
    resultado_traduccion = traducir_texto(prompt_personalizado, token_map)

    # Mostrar el resultado de la traducción en el notebook
    display(Markdown("### Términos sin traducir:"))
    display(Markdown(", ".join(terminos)))
    texto_traducido.value = resultado_traduccion

# Asociar el botón con la función de manejo de traducción
btn_traducir.on_click(manejar_traduccion)

# Mostrar widgets
display(texto_original, btn_traducir, texto_traducido)


## PoC - Traducción de Documentación técnica de Business Central

Este notebook permite pegar el texto original en inglés y obtener una traducción a catalán (manteniendo ciertos métodos y palabras clave en inglés). La iteración del prompting se ha realizado de forma manual.

Textarea(value='', description='Original:', layout=Layout(height='150px', width='95%'), placeholder='Paste the…

Button(button_style='success', description='Traducir', style=ButtonStyle())

Textarea(value='', description='Traducció:', disabled=True, layout=Layout(height='150px', width='95%'), placeh…

### Términos sin traducir:

Business Central, codeunit, report, AL, xmlport

# 5. Ejemplos de Pruebas con Fragmentos de Documentación

In [None]:
# Ejemplo 1: Fragmento de documentación con términos en mayúsculas
ejemplo1 = (
    "The function CALCFIELDS calculates the necessary fields for a record. "
    "Ensure that RUN is executed after GET and before SET to maintain data integrity."
)

# Ejemplo 2: Otro fragmento de documentación
ejemplo2 = (
    "Utilize the Codeunit methods to carry out the procedures. "
    "The XMLPort must be configured correctly to transform the data."
)

# Ejemplo 3: extracto de una página de documentación
ejemplo3 = (
    """ Programming in AL

AL is the programming language that is used for manipulating data such as retrieving, inserting, and modifying records in a Business Central database. It controls the execution of the various application objects, such as pages, reports, or codeunits.

With AL, you can create business rules to ensure that the data, which is stored in the database is meaningful and consistent with the way customers do business. Through AL programming, you can:

Add new data or transfer data from one table to another, for example, from a journal table to a ledger table.

Combine data from multiple tables into one report or display it on one page.

Perform calculations, such as calculating the total amount of a sales order.

Control the flow of the application, for example, by showing a message box when a certain condition is met.

Create new or modify existing objects, such as a page or a report.

Where to write AL code

Almost every object in Business Central contains triggers where you can add your AL code. Triggers exist for the following objects.
    """
)

# Widget Dropdown para elegir ejemplo
ejemplo_dropdown = widgets.Dropdown(
    options=[
        ("Ejemplo 1", ejemplo1),
        ("Ejemplo 2", ejemplo2),
        ("Ejemplo 3", ejemplo3),
        ],
    description="Ejemplo:"
)

# Botón para cargar el ejemplo seleccionado en el Textarea
btn_cargar_ejemplo = widgets.Button(
    description="Cargar Ejemplo",
    button_style="info"
)

def cargar_ejemplo(b):
    texto_original.value = ejemplo_dropdown.value

display(Markdown("### Càrrega d'Exemples"))
display(ejemplo_dropdown, btn_cargar_ejemplo)
btn_cargar_ejemplo.on_click(cargar_ejemplo)


### Càrrega d'Exemples

Dropdown(description='Ejemplo:', options=(('Ejemplo 1', 'The function CALCFIELDS calculates the necessary fiel…

Button(button_style='info', description='Cargar Ejemplo', style=ButtonStyle())

# 7. Conclusiones
Después de varias iteraciones con este modelo, no he sido capaz de traducir textos aparentemente fáciles.

Una de las dificultades encontradas es la inconsistencia de los keywords: mayúsculas, camelcase, singular, plural...

Seguramente con modelos más avanzados como GPT en su versión mini o nano bastaría para procesar correctamente el prompt original (sin preprocesar los keywords técnicos).

Como primera experiencia de desarrollo en Colab y python 🆗

Pero el resultado dista mucho de lo deseado 🙅

## Mejores para iterar en el proyecto:
- Usar un modelo más actual
- Finetunear un modelo con un dataset (sintético) de documentación traducida
