In [1]:
from llama_cpp import Llama
from llama_cpp_agent.providers import LlamaCppPythonProvider

# Create an instance of the Llama class and load the model
llama_model = Llama("/Users/benedikt/.cache/lm-studio/models/NousResearch/Hermes-3-Llama-3.1-8B-GGUF/Hermes-3-Llama-3.1-8B.Q4_K_M.gguf", n_batch=1024, n_threads=10, n_gpu_layers=40)

# Create the provider by passing the Llama class instance to the LlamaCppPythonProvider class
provider = LlamaCppPythonProvider(llama_model)

llama_load_model_from_file: using device Metal (Apple M2 Pro) - 21845 MiB free
llama_model_loader: loaded meta data with 27 key-value pairs and 292 tensors from /Users/benedikt/.cache/lm-studio/models/NousResearch/Hermes-3-Llama-3.1-8B-GGUF/Hermes-3-Llama-3.1-8B.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Hermes 3 Llama 3.1 8B
llama_model_loader: - kv   3:                       general.organization str              = NousResearch
llama_model_loader: - kv   4:                           general.basename str              = Hermes-3-Llama-3.1
llama_model_loader: - kv   5:                         general.size_label s

In [2]:
import sys
import os

sys.path.insert(0, os.path.abspath(".."))
sys.path.append(os.path.abspath("../backend"))
sys.path.append(os.path.abspath(""))

In [4]:
from backend.ontology import *
base_path = "../data"
onto_path = base_path + "/hero-ontology/hereditary_clinical.ttl"
brainteaser_graph = Graph().parse(onto_path, format="turtle")
brainteaser_graph.bind("bto", "http://www.semanticweb.org/ontologies/2020/3/bto#")
oman = OntologyManager(OntologyConfig(), brainteaser_graph)

In [14]:
classes=oman.q_to_df("""
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT ?s
WHERE {
    ?s rdf:type owl:Class.
}
""")
classes.to_csv("classes.csv")

In [19]:
list(classes.columns)

[0]

In [22]:
classes_enriched = [print(c[0]) for i, c in classes.iterrows()]

n1c9b84795635445d9e1594d522dba40cb1
http://ontology.eil.utoronto.ca/GCI/Environment/Pollution.owl#Air_pollution_concentration
http://purl.oclc.org/NET/ssnx/ssn#SensingDevice
http://www.w3.org/2000/10/swap/pim/contact#Person
http://www.w3.org/2006/time#Instant
http://www.wurvoc.org/vocabularies/om-1.8/Unit_of_measure
https://w3id.org/brainteaser/ontology/schema/ALSFRS
https://w3id.org/brainteaser/ontology/schema/Activity
https://w3id.org/brainteaser/ontology/schema/Administration
https://w3id.org/brainteaser/ontology/schema/AdverseDrugReaction
https://w3id.org/brainteaser/ontology/schema/AnatomicalSite
https://w3id.org/brainteaser/ontology/schema/BeforeOnset
https://w3id.org/brainteaser/ontology/schema/C6H6_concentration
https://w3id.org/brainteaser/ontology/schema/CO_concentration
https://w3id.org/brainteaser/ontology/schema/CSFAnalysis
https://w3id.org/brainteaser/ontology/schema/ClinicalEvaluation
https://w3id.org/brainteaser/ontology/schema/ClinicalTrial
https://w3id.org/brainteaser

In [None]:
from llama_cpp_agent.llm_output_settings import LlmStructuredOutputSettings
from llama_cpp_agent.llm_agent import LlamaCppAgent
from llama_cpp_agent.chat_history import (
    BasicChatHistory,
    BasicChatMessageStore,
    BasicChatHistoryStrategy,
)
from llama_cpp_agent.messages_formatter import MessagesFormatterType
import math

from typing import Union


def calculate_a_to_the_power_b(a: Union[int, float], b: Union[int, float]):
    """
    Calculates a to the power of b

    Args:
        a: number
        b: exponent

    """
    return f"Result: {math.pow(a, b)}"


# Now let's create an instance of the LlmStructuredOutput class by calling the `from_functions` function of it and passing it a list of functions.

output_settings = LlmStructuredOutputSettings.from_functions(
    [calculate_a_to_the_power_b], allow_parallel_function_calling=True
)


chat_history = BasicChatHistory(
    chat_message_store=BasicChatMessageStore(),
    chat_history_strategy=BasicChatHistoryStrategy(),
)

# Create a LlamaCppAgent instance as before, including a system message with information about the tools available for the LLM agent.
llama_cpp_agent = LlamaCppAgent(
    provider,
    debug_output=True,
    system_prompt=f"You are an advanced AI, tasked to assist the user by calling functions in JSON format.",
    predefined_messages_formatter_type=MessagesFormatterType.CHATML,
)

# Define some user input
user_input = "Calculate a to the power of b: a = 2, b = 3"

# Pass the user input together with output settings to `get_chat_response` method.
# This will print the result of the function the LLM will call, it is a list of dictionaries containing the result.
print(
    llama_cpp_agent.get_chat_response(
        user_input, structured_output_settings=output_settings
    )
)

<|im_start|>system
Read and follow the instructions below:

<system_instructions>
You are an advanced AI, tasked to assist the user by calling functions in JSON format.
</system_instructions>


You can call functions to help you with your tasks and user queries. The available functions are:

<function_list>
Function: calculate_a_to_the_power_b
  Description: Calculates a to the power of b
  Parameters:
    a (int or float): number
    b (int or float): exponent
</function_list>

To call a function, respond with a JSON object (to call one function) or a list of JSON objects (to call multiple functions), with each object containing these fields:

- "function": Put the name of the function to call here. 
- "arguments": Put the arguments to pass to the function here.

The result of each function call will be returned to you before you need to respond again.<|im_end|>
<|im_start|>user
Calculate a to the power of b: a = 2, b = 3<|im_end|>
<|im_start|>assistant

llama_perf_context_print:        load time =    2505.26 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   213 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    41 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    4398.66 ms /   254 tokens



[
  {
    "function":  "calculate_a_to_the_power_b",
    "arguments": {
      "a": 2,
      "b": 3
    }
  }
]
[{'function': 'calculate_a_to_the_power_b', 'arguments': {'a': 2, 'b': 3}, 'return_value': 'Result: 8.0'}]
