#### Test for connecting llamaindex retrivel outputs with langgraph DAGs

Define llamaindex models 

In [None]:
from src.model import get_llamaindex_model, get_llamaindex_model_mini, get_huggingface_embedding_model
from llama_index.core import Settings

llm = get_llamaindex_model_mini()

llm2 = get_llamaindex_model()

embed_model = get_huggingface_embedding_model()
Settings.embed_model = embed_model
Settings.llm = llm2

define parser, vectorstore and kg

In [None]:
from src.parser import markdownParser
nodes = markdownParser(input_dir="../kgdata/")
print(f"Processed {len(nodes)} nodes.")
if nodes:
    print("First node text:", nodes[1].text)

Processed 945 nodes.
First node text: ## criteria
- critical | unconscious adult, not breathing normally


In [58]:
nodes[1].get_content("All")

'file_path: c:\\Users\\newac\\OneDrive\\Desktop\\Master\\final_structure\\..\\kgdata\\01.md\nfile_name: 01.md\nfile_size: 8702\ncreation_date: 2025-04-21\nlast_modified_date: 2025-04-21\nheader_path: /01 Unconscious adult – not breathing normally/CRITERIA\n\n## criteria\n- critical | unconscious adult, not breathing normally'

In [5]:
# Stores 100 the nodes in a vector store
from src import create_faiss_vector_store_and_context
from llama_index.core import  VectorStoreIndex

vector_store, storage_context = create_faiss_vector_store_and_context(nodes[:100])
vector_index = VectorStoreIndex(nodes, storage_context=storage_context)

In [None]:
from src.dynamicKG_builder import knowledge_graph_construction
import asyncio

# nodes = nodes  # Use a subset for testing, adjust as needed
# if not nodes:
#     print("No nodes available for testing. Please ensure you have loaded your data correctly.")
# Run the test
kg_index = asyncio.run(knowledge_graph_construction( extractor="Custom", load_persist="./kg_index_storage_v1/pg_store_v2_custom.json"))

🔄 Loading persisted knowledge graph from ./kg_index_storage_v1/pg_store_v2_custom.json
✅ Successfully loaded persisted knowledge graph!


In [None]:
kg_index.storage_context.persist(persist_dir="final_structure\kg_index_storage_v1\pg_store_v2_custom.json")

In [None]:
"""An alternative approach to specifying the depth for a knowledge graph retriever is to use metadata filtering or recursive retrieval techniques to dynamically control the depth based on query requirements. This provides more flexibility compared to static depth settings.

Alternative Suggestion: Use Recursive Retrieval
Instead of setting a fixed depth, you can use recursive retrieval techniques to dynamically explore the graph based on the query. For example:

python

from llama_index.core.retrievers import RecursiveRetriever
recursive_retriever = RecursiveRetriever(
    graph_store=graph_store,
    max_depth=2,  # Maximum depth for recursive retrieval
    similarity_top_k=5  # Number of nodes to retrieve at each level
)
nodes = recursive_retriever.retrieve("Your query text here")
for node in nodes:
    print(node.text)
This approach allows the retriever to explore the graph recursively, fetching nodes and their relationships up to the specified depth.

Alternative Suggestion: Use Metadata Filtering
If your graph nodes have metadata indicating their depth or relationships, you can use metadata filters to control the retrieval depth dynamically:

python

from llama_index.core.vector_stores import MetadataFilter, MetadataFilters
filters = MetadataFilters(filters=[
    MetadataFilter(key="depth", operator="LE", value=2)  # Retrieve nodes with depth <= 2
])
retriever = graph_store.as_retriever(filters=filters)
nodes = retriever.retrieve("Your query text here")
for node in nodes:
    print(node.text)
This approach is particularly useful if your graph is annotated with metadata that specifies the depth or other hierarchical information.

Tradeoffs
Recursive Retrieval: Provides dynamic control over depth but may introduce additional computational overhead.
Metadata Filtering: Offers precise control based on node attributes but requires well-structured metadata in the graph.
"""
# Example of using the knowledge graph index to retrieve nodes

In [14]:
## can use them independently or use QueryFusionRetriever to combine results


# vector_retriever = vector_index.as_retriever(similarity_top_k=10)
kg_retriever = kg_index.as_retriever(retriever_mode="embedding",include_text=True, similarity_top_k=10, response_mode="compact")



In [15]:
# vector_results = vector_retriever.retrieve("help me revive a unconcious person")
kg_results = kg_retriever.retrieve("help me revive a unconcious person")
print("Vector Store Results:")
# for node in vector_results:
#     print(node.text)
print(f"\nKnowledge Graph Results:{len(kg_results)} nodes retrieved")
for node in kg_results:
    print(node)
    

Vector Store Results:

Knowledge Graph Results:19 nodes retrieved
Node ID: 0507dbba-58c6-445f-bdcc-07203456c566
Text: Here are some facts extracted from the provided text:  Tidlig
HLR -> provides -> Veiledning We -> provides -> Instructions  ##
tidlig hlr tidlig hlr kan redde liv, men mange er usikre pÃ¥ hvordan
det skal gjÃ¸res og redde for skade. vi vil derfor gi instruksjoner,
veiledning og oppmuntring til alle innringere. innringere som vet
hvordan de kan b...
Score:  0.000

Node ID: a96384ea-843a-48fe-9ebf-9ddbc1abdae2
Text: Here are some facts extracted from the provided text:  Hypoksi
-> prevents -> normal pusting  ## sirkulasjonsstans hos barn
sirkulasjonsstans hos barn skyldes langt oftere hypoksi enn akutt
hjertesykdom. derfor bÃ¸r du alltid instruere i bÃ¥de innblÃ¥singer og
brystkompresjoner. hvis du er usikker pÃ¥ om barnet puster normalt,
start med Ã¥ sikre...
Score:  0.000

Node ID: c926eeb1-1339-41c5-8659-57310ad82361
Text: Here are some facts extracted from the provide

In [13]:
all_nodes_dict = {n.node_id: n for n in nodes}


In [14]:
from llama_index.core.retrievers import RecursiveRetriever
# Set up recursive retriever
retriever_chunk = RecursiveRetriever(
    "vector",
    retriever_dict={"vector": vector_retriever},
    node_dict=all_nodes_dict,
    verbose=True,
)

In [None]:
rrnodes = retriever_chunk.retrieve(
    "help me revive an unconcious person"
)
for node in rrnodes:
    print(node.text)

In [43]:
from llama_index.core.indices.property_graph import VectorContextRetriever

vector_retriever = VectorContextRetriever(
    kg_index.property_graph_store,
    # only needed when the graph store doesn't support vector queries
    vector_store=kg_index.vector_store,
    
    # include source chunk text with retrieved paths
    include_text=True,
    # the number of nodes to fetch
    similarity_top_k=10,
    # the depth of relations to follow after node retrieval
    path_depth=2,
    # can provide any other kwargs for the VectorStoreQuery class
    
)

kgretriever = kg_index.as_retriever(sub_retrievers=[vector_retriever])

In [None]:
kgr = kgretriever.retrieve(
    "help me revive an unconcious person"
)
print(len(kgr), "nodes retrieved from knowledge graph retriever")
for node in kgr:
    print(node)

#### this is cool and all but focus on connecting lg and li (from self_eval.ipynb)

In [48]:
from src import get_azure_openai_model, get_azure_openai_chat_model, get_azure_openai_mini_model

model = get_azure_openai_chat_model()
model_mini = get_azure_openai_mini_model()


In [54]:
#### Grader retrieval
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.output_parsers import StrOutputParser


# Data model
class GradeDocuments(BaseModel):
    """Binary score for relevance check on retrieved documents."""

    binary_score: str = Field(
        description="Documents are relevant to the question, 'yes' or 'no'"
    )


# LLM with function call
llm = model
structured_llm_grader = llm.with_structured_output(GradeDocuments)

# Prompt
system = """You are a grader assessing relevance of a retrieved document to a user question. \n 
    It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.  \n"""
grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
    ]
)

retrieval_grader = grade_prompt | structured_llm_grader #| StrOutputParser()
question = "how to revive an unconscious person"
docs = vector_retriever.retrieve(question)

sample_doc = docs[:2]
# Print the first two documents for inspection

retrieval_grader.invoke({"question": question, "document": docs})
for doc in sample_doc:
    print(doc.text)
    response = retrieval_grader.invoke({"question": question, "document": doc.text})
    print("Grader response:", response)











Here are some facts extracted from the provided text:

caller -> watches -> unconscious person
medics -> arrives -> unconscious person

### advice 1. important information to the caller
- help is on the way. i may need to phone you back, so keep this phone free until the medics arrive.
- watch the person all the time. let me know immediately if anything changes.
Grader response: binary_score='no'
Here are some facts extracted from the provided text:

monitoring -> requires -> unconscious person
monitoring -> applies_to -> unconscious person

### if yes
- help the person to find a comfortable position.
- keep an eye on (monitor) him / her continuously.
- help is on the way.
- call back immediately if s/he deteriorates (gets worse)
- keep in touch with the caller if necessary
Grader response: binary_score='yes'
