# Before starting

Be sure to have install `langchain>=1.0` and `langchain-openai>=1.1.9` (and of course `ToPG`)

In [None]:
import yaml
from topg import Topg

# import langchain agents utilities 
from langchain.agents import AgentState, create_agent
from langchain.chat_models import init_chat_model
from langchain.tools import ToolRuntime, tool
from langgraph.checkpoint.memory import InMemorySaver
from langgraph.store.memory import InMemoryStore
from langchain_core.messages import AIMessage, HumanMessage, ToolMessage

In [None]:
from huggingface_hub import hf_hub_download
import json

# Because we are in a notebook
import nest_asyncio
nest_asyncio.apply()

# Data loading

First, let's get some already pre-processed data form the hf repository. Here hyperpropositions (propositions + entities) have already been extracted. We are going to use the HotPotQA dataset for example, but you could try with any other dataset.


In [None]:
REPO_ID = "mdelmas/Topg-kb"

with open(hf_hub_download(repo_id=REPO_ID, filename="hotpotqa_hyperpropositions.json", repo_type="dataset")) as f_hyper:
    hyperpropositions = json.load(f_hyper)

with open(hf_hub_download(repo_id=REPO_ID, filename="hotpotqa_passages.json", repo_type="dataset")) as f_passages:
    passages = json.load(f_passages)

Just to make it faster we are only going to load the first 100 passages. Of course, you can try with more if you want !

In [None]:
selected_passages = [passages[i] for i in range(100)]
selected_passages_ids = set([p["passage_id"] for p in selected_passages])
selected_hyperpropositions = [hp for hp in hyperpropositions if hp['metadata']['passage_id'] in selected_passages_ids]

# save them
with open("selected_passages.json", "w") as f_p_out:
    json.dump(selected_passages, f_p_out, indent=4)
with open("selected_hyperpropositions.json", "w") as f_p_out:
    json.dump(selected_hyperpropositions, f_p_out, indent=4)

In [None]:
# IF IT IS THE FIRST TIME
# Initialize a new KB at the given path
config_path = Topg.initialize(
    base_path="test",
    collection_name="MyKB"
)
# IF NOT DO NOT RUN (it will return an error - the directory already exists.)

# Load the system

In [None]:
# Load the generated config / you could manually edit the config before loading
with open(config_path, "r") as f:
    system_config = yaml.safe_load(f)

# Mount the system
system = Topg(config=system_config)

In [None]:
# Import passages
system.load_passages_from_json(json_passages="selected_passages.json")

# Import hyperpropositions
system.load_hyperpropositions_from_json(json_hyperpropositions="selected_hyperpropositions.json")


In [None]:
system.store.load_graphs()

# Inference

Let's try to just run one !

In [None]:
# Just a test !
answer, memory = system.query(
    question="The Distribution of Industry Act was passed by a man who was prime minister when?",
    mode="local"
)
print(answer)
print(memory)

`ToPG` does not have a routing mecanism by default, to decide which mode should be call (naive, local or global). In order to mimic one, we can assigned each ToPG mode a tool (`@tool`) and equip an agent with it.
First, let's define the 3 tools:

In [None]:
@tool(
    description="Use for simple factual, single-hop questions requiring direct factual retrieval. Produces short, fact-based answers."
)
def naive_qa(query: str) -> str:
    answer, memory = system.query(
        question=query,
        mode="naive"
    )
    return answer

@tool(
    description="Use for complex questions requiring multi-hop retrieval. Produces short, fact-based answers."
)
def local_qa(query: str) -> str:
    answer, memory = system.query(
        question=query,
        mode="local"
    )
    return answer

@tool(
    description="Use this tool for broad, analytical, or conceptual questions that require synthesis, explanation, or multi-factor reasoning. Produces long-form, comprehensive answers. Not for factual lookup."
)
def global_qa(query: str) -> str:
    answer, memory = system.query(
        question=query,
        mode="global"
    )
    return answer


Now, let's define a Agent.

In [None]:
model = init_chat_model("gpt-4o-mini")

In [68]:

system_prompt = (
    "You are a retrieval agent specialised in Question Answering."
    "You have access to three tools that can help you answer potential related question from the user: 'naive_qa', 'local_qa' and 'global_qa'."
    "The 'naive_qa' tool is for specific factual questions."
    "The 'local_qa' is used for more complex, multi-hop questions."
    "The 'global_qa' is used for abstract/conceptual questions when a long-form answer (mini report) is expected."
    "Otherwise, keep a normal flow of conversation with the user as a polite assistant."
)

class AgentState(AgentState):
    user_id: str

# A simple agent with the 3 tools
agent = create_agent(
    model,
    tools=[naive_qa, local_qa, global_qa], 
    system_prompt=system_prompt,
    state_schema=AgentState,
)


Let's try some query messages. For instance:
- Tell me everything about the history of Loch Leven Castle (should call `global_qa`)
- The Distribution of Industry Act was passed by a man who was prime minister when? (should call `naive_qa` or `local_qa`)
- In which county is the town in which Raymond Robertsen was born ? (should call `naive_qa` or `local_qa`)


These should trigger different tools !

Below we are going to display the different messages and see the sequence (HumanMessage, ToolCall, ToolResponse, AssistantResponse)


In [None]:
result = agent.invoke({
    "messages": [{"role": "user", "content": "Tell me everything about the history of Loch Leven Castle"}]
})
# then print
for msg in result['messages']:
    if isinstance(msg, HumanMessage):
        print(f"\n\n> Human message: {msg.content}")
    elif isinstance(msg, AIMessage):
        if not msg.content:
            # This is a tool message:
            for tool_call in msg.tool_calls:
                tool_name = tool_call.get("name", "unknown")
                tool_args = tool_call.get("args", {})
            
                args_str = "; ".join(
                    [f"{k}: {v}" for k, v in tool_args.items()]
                )
            
                print(f"\n\n<< Calling Tool: {tool_name} with arguments: {args_str} >>")
        else:
            print(f"\n\n> Assistant Message: {msg.content}")
    elif isinstance(msg, ToolMessage):
        tool_name = msg.name
        tool_response = msg.content
        print(f"\n\n<< Tool {tool_name} response: {tool_response} >>")
    else:
        pass