In [1]:
import os
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
from langchain_anthropic import ChatAnthropic
import openai

# Claude
anthropic_chat = ChatAnthropic()

# Print the available models
print("Available Anthropic models:", anthropic_chat.model_name)

# For ChatGPT-4
openai.api_key = os.getenv("OPENAI_API_KEY")
# alternatively set LANGCHAIN_API_KEY
os.environ["LANGCHAIN_TRACING_V2"] = ""  # "true"

try:
    models = openai.models.list()
    print("API key is valid. Available models:", [model.id for model in models.data])
except openai.error.AuthenticationError as e:
    print("API key is invalid:", str(e))

openai_llm = ChatOpenAI(model="gpt-3.5-turbo", api_key=os.getenv("OPENAI_API_KEY"))  # gpt-4-turbo - more correct grammar info


ModuleNotFoundError: No module named 'langchain_anthropic'

In [3]:
def llama3_request(prompt):
    # This function should interact with your local Llama3 model.
    # Replace the below code with actual API or command-line interaction.
    import subprocess
    result = subprocess.run(['ollama', 'run', 'llama3', '--prompt', prompt], capture_output=True, text=True)
    return result.stdout


In [44]:
import os
from functools import lru_cache
from typing import Any, Dict

from pydantic import Field
import json
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
import openai
from langchain.schema import BaseOutputParser

class SimpleOutputParser(BaseOutputParser[str]):
    """Simple output parser."""

    def parse(self, text: str) -> str:
        return text

class SentenceOutputParser(BaseOutputParser):
    retry_count: int = Field(default=1, description="Number of retry attempts for JSON parsing")
    
    def parse(self, text: str) -> str:
        for _ in range(self.retry_count + 1):
            try:
                json_result = json.loads(text)
                if "translation" not in json_result:
                    json_result["translation"] = text
            except json.JSONDecodeError:
                if _ == self.retry_count:
                    json_result = {"translation": text}
        text_language = json_result.get("text_language")
        translation = json_result["translation"]
        if text_language:
            json_result["translation"] = f"({text_language}) {translation}"
        result = f"""{json_result["translation"]}<hr>{json_result.get("comments", "")}"""
        return result
            
def _create_article_templates(self) -> Dict[str, Any]:
        parser = SimpleOutputParser()
        sentence_parser = SentenceOutputParser(retry_count=1)

        translation_system_template = """Given the following text in {text_language},
translate the term marked with <span class="highlighted-term"> tag to {user_language}. 
Give translation of the term in the exact sentence where the term is (and not all occurrences in the text).
Put into result only the term translation.
If the text is not in {text_language}, prefix the result with the text language name in parentheses, like (Latin).
"""

        translation_prompt_template = ChatPromptTemplate.from_messages(
            [("system", translation_system_template), ("user", "{text}")]
        )

        dictionary_system_template = """Given the term in {text_language},
write {text_language} - {user_language} dictionary article for the term.
The article should be in {user_language}.
Include grammar attributes - part of the speech, genre, number, countability and other grammar attributes.
All grammar attributes should be on one line in a compact way with abbreviations like in good dictionaries.
Include different meanings, declination table and other information you expect to see in a good dictionary
like Oxford, but do not include examples.
If you are sure the text is in a different language, write the article based on that language and indicate
it by starting the result with the detected language name in parentheses.
Give the result in HTML formatting, without any block marks."""

        dictionary_prompt_template = ChatPromptTemplate.from_messages(
            [("system", dictionary_system_template), ("user", "The term is: {term}")]
        )

        examples_system_template = """Given the term in {text_language},
provide up to seven examples in {text_language} of sentences with the term.
After each example, provide the translation of the example to {user_language} in a separate paragraph 
using the <p> tag.
Do not prefix the translation with ({user_language}) or with "Translation." 
Separate examples with <hr> tags. 
Provide the result in HTML formatting, without any block marks.

Ensure your response adheres strictly to these instructions:
- Do not repeat examples.
- If you detect a language different from {text_language}, mention that, 
but do not mention the language if it is {text_language}.
- Do not mark the translation with "Translation" or similar terms.
        """

        examples_prompt_template = ChatPromptTemplate.from_messages(
            [("system", examples_system_template), ("user", "The term is: {term}")]
        )

        explain_system_template = """Explain using only {text_language}, the usage
of the term marked in the text with a <span class="highlighted-term"> tag.
Explain the usage only in the sentence where the term
is marked with the <span class="highlighted-term"> tag, and not in other occurrences of the text.
Only if the text is not in {text_language}, start with the detected language in parentheses.
After <hr> provide the translation of the explanation to {user_language}.
Give the result in HTML formatting without any additional block marks or labels.

Ensure your response adheres strictly to these instructions:
- Use {text_language} for the initial explanation.
- Never put into the result names of the languages {text_language} or {user_language}.
- Explain the exact sentence where the term is marked with the <span class="highlighted-term"> tag.
Do not mention usage of the term in sentences where it is not marked
with the <span class="highlighted-term"> tag.
"""

        explain_prompt_template = ChatPromptTemplate.from_messages(
            [("system", explain_system_template), ("user", "{text}")]
        )

        sentence_system_template = """Given following text translate to
{user_language} only the sentence with term marked with a <span class="highlighted-term"> tag inside.
Translate only the sentence and not other parts of the text.
Expected text language is {text_language} but you should detect the actual language if it does not fit.
Give comments in {user_language} about parts that can be difficult to understand by
{user_language} student learning {text_language} - difficult words, forms and expressions etc.
Return result in json without any additional block marks or labels:
"translation" - translation;
"sentence" - the original sentence with the marked term;
"text_language" - ISO639 set-1 code of the actual text language;
"comments" - comments.
"""

        sentence_prompt_template = ChatPromptTemplate.from_messages(
            [("system", sentence_system_template), ("user", "The text is: {text}")]
        )

        return {
            "Translate": {"template": translation_prompt_template, "parser": parser},
            "Dictionary": {"template": dictionary_prompt_template, "parser": parser},
            "Examples": {"template": examples_prompt_template, "parser": parser},
            "Explain": {"template": explain_prompt_template, "parser": parser},
            "Sentence": {"template": sentence_prompt_template, "parser": sentence_parser},
        }


In [56]:
import os
from functools import lru_cache
from typing import Any, Dict

from pydantic import Field
import json
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
import openai
from langchain.schema import BaseOutputParser

class SimpleOutputParser(BaseOutputParser[str]):
    """Simple output parser."""

    def parse(self, text: str) -> str:
        return text

class SentenceOutputParser(BaseOutputParser[str]):
    retry_count: int = Field(default=1, description="Number of retry attempts for JSON parsing")
    
    def parse(self, text: str) -> str:
        for _ in range(self.retry_count + 1):
            try:
                json_result = json.loads(text)
                if "translation" not in json_result:
                    json_result["translation"] = text
            except json.JSONDecodeError:
                if _ == self.retry_count:
                    json_result = {"translation": text}
        text_language = json_result.get("text_language")
        translation = json_result["translation"]
        if text_language:
            json_result["translation"] = f"({text_language}) {translation}"
        result = f"""{json_result["translation"]}<hr>{json_result.get("comments", "")}"""
        return result
            
def _create_article_templates(self) -> Dict[str, Any]:
        parser = SimpleOutputParser()
        sentence_parser = SentenceOutputParser(retry_count=1)

        translation_system_template = """Given the following text in {text_language},
translate the term marked with <span class="highlighted-term"> tag to {user_language}. 
Give translation of the term in the exact sentence where the term is (and not all occurrences in the text).
Put into result only the term translation.
If the text is not in {text_language}, prefix the result with the text language name in parentheses, like (Latin).
"""

        translation_prompt_template = ChatPromptTemplate.from_messages(
            [("system", translation_system_template), ("user", "{text}")]
        )

        dictionary_system_template = """Given the term in {text_language},
write {text_language} - {user_language} dictionary article for the term.
The article should be in {user_language}.
Include grammar attributes - part of the speech, genre, number, countability and other grammar attributes.
All grammar attributes should be on one line in a compact way with abbreviations like in good dictionaries.
Include different meanings, declination table and other information you expect to see in a good dictionary
like Oxford, but do not include examples.
If you are sure the text is in a different language, write the article based on that language and indicate
it by starting the result with the detected language name in parentheses.
Give the result in HTML formatting, without any block marks."""

        dictionary_prompt_template = ChatPromptTemplate.from_messages(
            [("system", dictionary_system_template), ("user", "The term is: {term}")]
        )

        examples_system_template = """Given the term in {text_language},
provide up to seven examples in {text_language} of sentences with the term.
After each example, provide the translation of the example to {user_language} in a separate paragraph 
using the <p> tag.
Do not prefix the translation with ({user_language}) or with "Translation." 
Separate examples with <hr> tags. 
Provide the result in HTML formatting, without any block marks.

Ensure your response adheres strictly to these instructions:
- Do not repeat examples.
- If you detect a language different from {text_language}, mention that, 
but do not mention the language if it is {text_language}.
- Do not mark the translation with "Translation" or similar terms.
        """

        examples_prompt_template = ChatPromptTemplate.from_messages(
            [("system", examples_system_template), ("user", "The term is: {term}")]
        )

        explain_system_template = """Explain using only {text_language}, the usage
of the term marked in the text with a <span class="highlighted-term"> tag.
Explain the usage only in the sentence where the term
is marked with the <span class="highlighted-term"> tag, and not in other occurrences of the text.
Only if the text is not in {text_language}, start with the detected language in parentheses.
After <hr> provide the translation of the explanation to {user_language}.
Give the result in HTML formatting without any additional block marks or labels.

Ensure your response adheres strictly to these instructions:
- Use {text_language} for the initial explanation.
- Never put into the result names of the languages {text_language} or {user_language}.
- Explain the exact sentence where the term is marked with the <span class="highlighted-term"> tag.
Do not mention usage of the term in sentences where it is not marked
with the <span class="highlighted-term"> tag.
"""

        explain_prompt_template = ChatPromptTemplate.from_messages(
            [("system", explain_system_template), ("user", "{text}")]
        )

        sentence_system_template = """Given following text translate to
{user_language} only the sentence with term marked with a <span class="highlighted-term"> tag inside.
Translate only the sentence and not other parts of the text.
Expected text language is {text_language} but you should detect the actual language if it does not fit.
Give comments in {user_language} about parts that can be difficult to understand by
{user_language} student learning {text_language} - difficult words, forms and expressions etc.
Return result in json without any additional block marks or labels:
"translation" - the sentence translation to {user_language};
"sentence" - the original sentence with the marked term;
"text_language" - the actual text language. Null in this field if the text language is {text_language};
"comments" - comments.
"""

        sentence_prompt_template = ChatPromptTemplate.from_messages(
            [("system", sentence_system_template), ("user", "The text is: {text}")]
        )

        return {
            "Translate": {"template": translation_prompt_template, "parser": parser},
            "Dictionary": {"template": dictionary_prompt_template, "parser": parser},
            "Examples": {"template": examples_prompt_template, "parser": parser},
            "Explain": {"template": explain_prompt_template, "parser": parser},
            "Sentence": {"template": sentence_prompt_template, "parser": sentence_parser},
        }


In [9]:
import os
import os.path
import sys
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..')))

import types
import importlib
from IPython.display import display, HTML
import lexiflux.llm

importlib.reload(lexiflux.llm)
from lexiflux.llm import Llm

# serbian_text = "Ljubav je najlepša stvar na svetu. Ljubav pokreće sve."
# word_to_translate = "Ljubav"

# Llm._create_article_templates = types.MethodType(_create_article_templates, Llm)
llm = Llm()


article_name = "Explain"
article_params = {
    "model_name": "gpt-3.5-turbo"  # "gpt-4-turbo"  # Specify the model to use
}

data = {
    "text": 'pokreće sve. <selected>List</selected> sa drveta je pao na zemlju. Na stolu je bio list papira sa važnim beleškama. Ljubav je najlepša',
    "term": "list",
    # "text": 'Abbati, medico, patronoque <selected>intima pande</selected>.',
    # "term": "intima pande",
    # "text" : '<selected>Deus</selected> ex machina.',
    # "term": "Deus",
    "text_language": "Serbian",
    # "user_language": "English",
    "user_language": "Russian",
    "word_ids": (0, ),
    "book_code": "my-book",
    "page_number": 1,
}

texts = [
    {
    "text": 'pokreće sve. **List** sa drveta je pao na zemlju. Na stolu je bio list papira sa važnim beleškama. Ljubav je najlepša',
    "text_language": "Serbian",
    "user_language": "Russian",
    },
    {
    "text": 'pokreće sve. **List** sa drveta je pao na zemlju. Na stolu je bio list papira sa važnim beleškama. Ljubav je najlepša',
    "text_language": "Serbian",
    "user_language": "English",
    },
    {
    "text": 'Abbati, medico, patronoque **intima pande**.',
    "text_language": "Serbian",
    "user_language": "Russian",
    },
    {
    "text" : '**Deus** ex machina.',
    "text_language": "Serbian",
    "user_language": "Russian",
    },
    {
    "text" : 'na brzinu sam uradio jutarnju gimnastiku i obukao se. To što **se dešavalo** delovalo mi je krajnje interesantno. Telefonski fonogram',
    "text_language": "Serbian",
    "user_language": "Russian",
    },
]
for text in texts[:1]:
    data["text"] = text["text"]
    data["text_language"] = text["text_language"]
    data["user_language"] = text["user_language"]
    # print(llm._detect_sentence("", llm._hashable_dict(article_params), llm._hashable_dict(data)))
    
    article = llm.get_article(article_name, article_params, data)
    display(HTML(article))
    print("*"*50)
    print()


Detected sentence: {'text': 'pokreće sve. ||**List** sa drveta je pao na zemlju.|| Na stolu je bio list papira sa važnim beleškama. Ljubav je najlepša', 'detected_language': 'Serbian'}


**************************************************

