In [1]:
import os
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
from langchain_anthropic import ChatAnthropic
import openai

# For ChatGPT-4
openai.api_key = os.getenv("OPENAI_API_KEY")
# alternatively set LANGCHAIN_API_KEY
os.environ["LANGCHAIN_TRACING_V2"] = ""  # "true"

try:
    models = openai.models.list()
    print("API key is valid. Available models:", [model.id for model in models.data])
except openai.error.AuthenticationError as e:
    print("API key is invalid:", str(e))

openai_llm = ChatOpenAI(model="gpt-3.5-turbo", api_key=os.getenv("OPENAI_API_KEY"))  # gpt-4-turbo - more correct grammar info


API key is valid. Available models: ['whisper-1', 'tts-1', 'dall-e-2', 'tts-1-hd-1106', 'tts-1-hd', 'gpt-4-turbo-2024-04-09', 'gpt-4-turbo', 'gpt-3.5-turbo-1106', 'dall-e-3', 'gpt-4o-2024-05-13', 'gpt-4-0125-preview', 'gpt-4o', 'gpt-4-turbo-preview', 'text-embedding-3-small', 'text-embedding-3-large', 'gpt-3.5-turbo-16k', 'gpt-4-1106-preview', 'babbage-002', 'gpt-4', 'gpt-4-0613', 'gpt-3.5-turbo-0125', 'tts-1-1106', 'gpt-3.5-turbo', 'gpt-3.5-turbo-instruct', 'gpt-3.5-turbo-instruct-0914', 'text-embedding-ada-002', 'davinci-002']


In [3]:
def llama3_request(prompt):
    # This function should interact with your local Llama3 model.
    # Replace the below code with actual API or command-line interaction.
    import subprocess
    result = subprocess.run(['ollama', 'run', 'llama3', '--prompt', prompt], capture_output=True, text=True)
    return result.stdout


In [12]:
import os
import os.path
import sys
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..')))

import types
import importlib
from IPython.display import display, HTML
import lexiflux.language.llm

importlib.reload(lexiflux.language.llm)
from lexiflux.language.llm import Llm

# serbian_text = "Ljubav je najlepša stvar na svetu. Ljubav pokreće sve."
# word_to_translate = "Ljubav"

# Llm._create_article_templates = types.MethodType(_create_article_templates, Llm)
llm = Llm()


book_code = "my-book"
page_number = 1

cases = [
    {
    "text": 'pokreće sve. List sa drveta je pao na zemlju. Na stolu je bio list papira sa važnim beleškama. Ljubav je najlepša',
    "term": "List",
    "term_occurence": 1,
    "text_language": "Serbian",
    "user_language": "Russian",
    },
    {
    "text": 'pokreće sve. List sa drveta je pao na zemlju. Na stolu je bio list papira sa važnim beleškama. Ljubav je najlepša',
    "term": "List",
    "term_occurence": 1,
    "text_language": "Serbian",
    "user_language": "English",
    },
    {
    "text": 'Abbati, medico, patronoque intima pande.',
    "term": "intima pande",
    "term_occurence": 1,
    "text_language": "Serbian",
    "user_language": "Russian",
    },
    {
    "text" : 'Deus ex machina.',
    "term": "Deus",
    "term_occurence": 1,
    "text_language": "Serbian",
    "user_language": "Russian",
    },
    {
    "text" : 'na brzinu sam uradio jutarnju gimnastiku i obukao se. To što se dešavalo delovalo mi je krajnje interesantno. Telefonski fonogram',
    "term": "se dešavalo",
    "term_occurence": 1,
    "text_language": "Serbian",
    "user_language": "Russian",
    },
]

article_name = "Explain"
params = {"model": "gpt-3.5-turbo"}  # "gpt-4-turbo"

for case in cases:
    data = case.copy()
    words_info = llm.prepare_data(case["text"], case["term"], case["term_occurence"])
    print(words_info)
    data["word_slices"] = words_info["word_slices"]
    data["term_word_ids"] = words_info["term_word_ids"]
    
    article = llm.generate_article(
        article_name, 
        params,
        data,
    )
    display(HTML(article))
    print("*"*50)
    print()


Term: List (13, 17)
Word 0: pokreće (0, 7)
Word 1: sve (8, 11)
Word 2: List (13, 17)
Term word: 2
Word 3: sa (18, 20)
Word 4: drveta (21, 27)
Word 5: je (28, 30)
Word 6: pao (31, 34)
Word 7: na (35, 37)
Word 8: zemlju (38, 44)
Word 9: Na (46, 48)
Word 10: stolu (49, 54)
Word 11: je (55, 57)
Word 12: bio (58, 61)
Word 13: list (62, 66)
Word 14: papira (67, 73)
Word 15: sa (74, 76)
Word 16: važnim (77, 83)
Word 17: beleškama (84, 93)
Word 18: Ljubav (95, 101)
Word 19: je (102, 104)
Word 20: najlepša (105, 113)
{'word_slices': [(0, 7), (8, 11), (13, 17), (18, 20), (21, 27), (28, 30), (31, 34), (35, 37), (38, 44), (46, 48), (49, 54), (55, 57), (58, 61), (62, 66), (67, 73), (74, 76), (77, 83), (84, 93), (95, 101), (102, 104), (105, 113)], 'term_word_ids': [2]}
NLTK punkt tokenizer not available for Serbian. Using default.
{'text': 'pokreće sve. ||**List** sa drveta je pao na zemlju||. Na stolu je bio list papira sa važnim beleškama. Ljubav je najlepša', 'detected_language': 'Serbian'}


**************************************************

Term: List (13, 17)
Word 0: pokreće (0, 7)
Word 1: sve (8, 11)
Word 2: List (13, 17)
Term word: 2
Word 3: sa (18, 20)
Word 4: drveta (21, 27)
Word 5: je (28, 30)
Word 6: pao (31, 34)
Word 7: na (35, 37)
Word 8: zemlju (38, 44)
Word 9: Na (46, 48)
Word 10: stolu (49, 54)
Word 11: je (55, 57)
Word 12: bio (58, 61)
Word 13: list (62, 66)
Word 14: papira (67, 73)
Word 15: sa (74, 76)
Word 16: važnim (77, 83)
Word 17: beleškama (84, 93)
Word 18: Ljubav (95, 101)
Word 19: je (102, 104)
Word 20: najlepša (105, 113)
{'word_slices': [(0, 7), (8, 11), (13, 17), (18, 20), (21, 27), (28, 30), (31, 34), (35, 37), (38, 44), (46, 48), (49, 54), (55, 57), (58, 61), (62, 66), (67, 73), (74, 76), (77, 83), (84, 93), (95, 101), (102, 104), (105, 113)], 'term_word_ids': [2]}
NLTK punkt tokenizer not available for Serbian. Using default.
{'text': 'pokreće sve. ||**List** sa drveta je pao na zemlju||. Na stolu je bio list papira sa važnim beleškama. Ljubav

**************************************************

Term: intima pande (27, 39)
Word 0: Abbati (0, 6)
Word 1: medico (8, 14)
Word 2: patronoque (16, 26)
Word 3: intima (27, 33)
Term word: 3
Word 4: pande (34, 39)
Term word: 4
{'word_slices': [(0, 6), (8, 14), (16, 26), (27, 33), (34, 39)], 'term_word_ids': [3, 4]}
NLTK punkt tokenizer not available for Serbian. Using default.
{'text': '||Abbati, medico, patronoque **intima pande**||.', 'detected_language': 'Serbian'}


**************************************************

Term: Deus (0, 4)
Word 0: Deus (0, 4)
Term word: 0
Word 1: ex (5, 7)
Word 2: machina (8, 15)
{'word_slices': [(0, 4), (5, 7), (8, 15)], 'term_word_ids': [0]}
NLTK punkt tokenizer not available for Serbian. Using default.
{'text': '||**Deus** ex machina||.', 'detected_language': 'Serbian'}


**************************************************

Term: se dešavalo (61, 72)
Word 0: na (0, 2)
Word 1: brzinu (3, 9)
Word 2: sam (10, 13)
Word 3: uradio (14, 20)
Word 4: jutarnju (21, 29)
Word 5: gimnastiku (30, 40)
Word 6: i (41, 42)
Word 7: obukao (43, 49)
Word 8: se (50, 52)
Word 9: To (54, 56)
Word 10: što (57, 60)
Word 11: se (61, 63)
Term word: 11
Word 12: dešavalo (64, 72)
Term word: 12
Word 13: delovalo (73, 81)
Word 14: mi (82, 84)
Word 15: je (85, 87)
Word 16: krajnje (88, 95)
Word 17: interesantno (96, 108)
Word 18: Telefonski (110, 120)
Word 19: fonogram (121, 129)
{'word_slices': [(0, 2), (3, 9), (10, 13), (14, 20), (21, 29), (30, 40), (41, 42), (43, 49), (50, 52), (54, 56), (57, 60), (61, 63), (64, 72), (73, 81), (82, 84), (85, 87), (88, 95), (96, 108), (110, 120), (121, 129)], 'term_word_ids': [11, 12]}
NLTK punkt tokenizer not available for Serbian. Using default.
{'text': 'na brzinu sam uradio jutarnju gimnastiku i obukao se. ||To što **se dešavalo** delovalo mi je k

**************************************************

