# Detailed usage of the library

**Setup**

In [6]:
import knex

knex.init('ollama', 'llama3.1', 'http://localhost:11434') # Local LLM (direct)
# knex.init('ollama', 'llama3.1', 'http://127.0.0.1:5000') # Local LLM (through proxy)
# knex.init('openai', 'gpt-4o') # OPENAI llm

text = """
René Dubois naît le 17.8.1905 aux Verrières, et meurt le 16.11.1976 à Marin-Epagnier, protestant, de Buttes.
Fils d'Henri Adolphe, fonctionnaire postal.
Marié(e) à Suzanne Mina Arnoux, Française, fille de Jules Cyprien.
Licence en sciences économiques à Neuchâtel.
Officier instructeur dès 1929.
Chef d'état-major des troupes d'aviation et de DCA (1953-1955).
Commandant de la brigade légère 1, de la division 2 (1958-1961), du corps d'armée de campagne 1 (1962-1967).
""".strip()

**Extraction part**

In [None]:
# Extract the information from the text
information = knex.extraction(text)

# Analyze the extracted information
info_dfs = information.dataframes()
for key in info_dfs.keys():
    print(key)
    display(info_dfs[key])

**Knowledge part**

In [None]:
# Transform the extracted information into a graph
graph = knex.knowledge(information)

# Analyze created entities and triples, and ontology
entities, triples, classes, properties = graph.dataframes()

# Save the dataframes
entities.to_csv('entities.csv', index=False)
triples.to_csv('triples.csv', index=False)
classes.to_csv('classes.csv', index=False)
properties.to_csv('properties.csv', index=False)

# Get the full generated graph in a global dataframe
graph_df = graph.to_dataframe()
graph_df.to_csv( './graph.csv', index=False)

# Visualize the generated graph
graph.get_visuals('./graph.html')

# Save the graph on disk
graph.save('./graph.pkl')

# Full pipeline of the library

In [None]:
import knex

knex.init('ollama', 'llama3.1', 'http://localhost:11434') # Local LLM (direct)
# knex.init('ollama', 'llama3.1', 'http://127.0.0.1:5000') # Local LLM (through proxy)
# knex.init('openai', 'gpt-4o') # OPENAI llm

text = """
René Dubois naît le 17.8.1905 aux Verrières, et meurt le 16.11.1976 à Marin-Epagnier, protestant, de Buttes.
Fils d'Henri Adolphe, fonctionnaire postal.
Marié(e) à Suzanne Mina Arnoux, Française, fille de Jules Cyprien.
Licence en sciences économiques à Neuchâtel.
Officier instructeur dès 1929.
Chef d'état-major des troupes d'aviation et de DCA (1953-1955).
Commandant de la brigade légère 1, de la division 2 (1958-1961), du corps d'armée de campagne 1 (1962-1967).
""".strip()

# Execute the full pipeline
graph = knex.knowledge_extraction(text, verbose=False)


# # Results
graph.to_dataframe().to_csv('./graph.csv', index=False)
graph.get_visuals('./graph.html')
graph.save('./graph.pkl')

# Batch executions

**Setup**

In [4]:
import os, sys
from langchain_community.callbacks.manager import get_openai_callback
import knex
import geovpylib.database as db
import gmpykit as kit
from datetime import datetime

eta = kit.Eta()
knex.init('ollama', 'llama3.1', 'http://localhost:11434') # Local LLM (direct)
# knex.init('ollama', 'llama3.1', 'http://127.0.0.1:5000') # Local LLM (through proxy)
# knex.init('openai', 'gpt-4o') # OPENAI llm
db.connect_yellow('switzerland_and_beyond')

**Text gathering**

In [5]:
table = db.query('select url, name, notice from hls.person')
texts = table.sample(2, random_state=42).copy()

texts['notice'] = texts['name'] + ':\n' + texts['notice']
texts.reset_index(inplace=True, drop=True)

**Execution**

In [6]:
begin_time = datetime.now()
with get_openai_callback() as cb:
    eta.begin(len(texts), 'Run KNEX')
    for _, text in texts.iterrows():

        # Prepare disk
        dir_name = 'knex_poc_hls_extract'
        file_name = kit.to_snake_case(text['name'])
        root_path = f'./{dir_name}/{file_name}/'
        if not os.path.exists(root_path): os.mkdir(root_path)
        with open(root_path + 'log.txt', 'w') as file:

            # initialize logs
            file.write(text['url'])
            file.write('\n\n' + kit.wrap(text['notice']))
            file.write('\n\n---------------\n\n')

            # Redirect logs to a file
            sys.stdout = file

            # Extract the information
            graph = knex.knowledge_extraction(text, verbose=True)

            # Save the result
            sys.stdout = open(os.devnull, 'w')
            graph.get_visuals(root_path + 'graph.html')
            sys.stdout = sys.__stdout__
            df = graph.to_dataframe()
            df.to_csv(root_path + 'graph.csv')
        eta.iter()
    eta.end()
    end_time = datetime.now()

    print()
    print(f'FOR {len(texts)} PERSONS:')
    print(f"Total Tokens: {cb.total_tokens}")
    print(f"Prompt Tokens: {cb.prompt_tokens}")
    print(f"Completion Tokens: {cb.completion_tokens}")
    print(f"Total Cost (USD): ${cb.total_cost}")
    print(f"Total execution time: {end_time - begin_time}")
    print()
    print('AVERAGE FOR 1 PERSON')
    print(f"Total Tokens: {cb.total_tokens / len(texts)}")
    print(f"Prompt Tokens: {cb.prompt_tokens / len(texts)}")
    print(f"Completion Tokens: {cb.completion_tokens / len(texts)}")
    print(f"Total Cost (USD): ${cb.total_cost / len(texts)}")
    print(f"Total execution time: {(end_time - begin_time) / len(texts)}")
    print()
    print('ESTIMATION FOR THE FULL HLS')
    print(f"Total Tokens: {(cb.total_tokens / len(texts)) * len(table)}")
    print(f"Prompt Tokens: {(cb.prompt_tokens / len(texts)) * len(table)}")
    print(f"Completion Tokens: {(cb.completion_tokens / len(texts)) * len(table)}")
    print(f"Total Cost (USD): ${(cb.total_cost / len(texts)) * len(table)}")
    print(f"Total execution time: {((end_time - begin_time) / len(texts)) * len(table)}")
