In [None]:
import os
import logging
import sys
import json
from dotenv import load_dotenv

from llama_index.core import (
    SimpleDirectoryReader,
    KnowledgeGraphIndex,
    StorageContext,
    load_index_from_storage,
    Settings,
    Document,
    QueryBundle,
    VectorStoreIndex, # We might need this temporarily if we want node parsing
    PromptTemplate,
)
from llama_index.core.graph_stores import SimpleGraphStore
from llama_index.core.vector_stores.simple import SimpleVectorStore
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.schema import TextNode
from llama_index.core.program import LLMTextCompletionProgram
from llama_index.core.output_parsers import PydanticOutputParser
from llama_index.llms.openai import OpenAI


# --- Property Graph Imports ---
from llama_index.core.graph_stores.simple_labelled import SimplePropertyGraphStore
from llama_index.core.indices.property_graph import PropertyGraphIndex

# --- Query Engine and Retriever ---
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.retrievers import BaseRetriever # For type hinting
from llama_index.core import get_response_synthesizer # Factory for response synthesizer

from pyvis.network import Network
from tqdm.notebook import tqdm # Or standard tqdm if not in notebook

from pydantic import BaseModel, Field
from typing import List, Tuple, Any, Dict

In [2]:
import nest_asyncio
nest_asyncio.apply()

In [3]:
from dotenv import load_dotenv
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
load_dotenv()


endpoint = "https://d-ais-eus-ais-chatbots.openai.azure.com/"
model_name = "o1-mini"
deployment = "o1-mini"
subscription_key = os.getenv("AZURE_OPENAI_API_KEY")
api_version = "2024-12-01-preview" # Use a valid API version

llm = AzureOpenAI(
    azure_endpoint=endpoint,
    api_key=subscription_key,
    api_version=api_version,
    deployment_name=deployment,
    model_name=model_name,
    temperature=1.0
)

embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-m3"
    )

Settings.embed_model = embed_model

llm2 = AzureOpenAI(
    azure_endpoint=endpoint,
    api_key=subscription_key,
    api_version="2024-05-01-preview",
    deployment_name="gpt-4o-mini-test",
    model_name="gpt-4o-mini-test",
    temperature=1.0
)

#### Graph store
the raw file is stored here: "storage_with_embeddings_in_kg.json"

the SimpleGraphStore is here: "/storage"

In [24]:
kg_raw = r'storage_with_embeddings_in_kg.json'

kg_index, node_embeddings = load_knowledge_graph_from_json(kg_raw)
# kg_index = load_index_from_storage(storage_context, index_id="knowledge_graph")

AttributeError: 'SimpleGraphStore' object has no attribute 'add_node'

In [164]:

from llama_index.core.graph_stores.types import (
    EntityNode, Relation, LabelledPropertyGraph
)
from llama_index.core.graph_stores import SimplePropertyGraphStore

def load_knowledge_graph_from_json(json_file_path):
    """
    Load knowledge graph data from a JSON file and initialize LlamaIndex stores 
    using SimplePropertyGraphStore instead of SimpleGraphStore.
    
    Args:
        json_file_path: Path to the JSON file containing nodes and relations
        
    Returns:
        kg_index: Initialized KnowledgeGraphIndex
        node_embeddings: Dictionary mapping node IDs to their embeddings
    """
    # Read the JSON file
    with open(json_file_path, 'r') as f:
        graph_data = json.load(f)
    
    # Extract nodes and relations
    nodes = graph_data.get("nodes", {})
    relations = graph_data.get("relations", {})
    
    # Initialize stores
    property_graph = LabelledPropertyGraph()
    property_graph_store = SimplePropertyGraphStore(graph=property_graph)
    vector_store = SimpleVectorStore()
    
    storage_context = StorageContext.from_defaults(
        graph_store=property_graph_store,
        vector_store=vector_store
    )
    
    # Store node embeddings for later use
    node_embeddings = {}
    
    # Create EntityNodes for the property graph and TextNodes for the vector store
    entity_nodes = []
    text_nodes = []

    
    for node_id, node_data in nodes.items():
        # Extract node properties
        label = node_data.get("label", "node")
        properties = node_data.get("properties", {}) or {}
        name = node_data.get("name", node_id)
        
        # Extract and process embedding
        embedding = node_data.get("embedding", [])
        if isinstance(embedding, str):
            # Handle string-formatted embeddings
            if embedding.startswith("[") and embedding.endswith("]"):
                embedding = json.loads(embedding)
            else:
                embedding = []
        
        # Create an EntityNode for the property graph
        entity_node = EntityNode(
            name=name,
            label=label,
            properties=properties
        )
        entity_nodes.append(entity_node)
        
        # Create a TextNode for the vector store
        node_text = f"{name}: {label}".lower()
        text_node = TextNode(
            text=node_text,
            id_=node_id,
            metadata={
                "name": name,
                "label": label,
                **properties
            }
        )
        
        # Store embedding if available
        if embedding:
            embedding_array = np.array(embedding, dtype=np.float32)
            node_embeddings[node_id] = embedding_array
            text_node.embedding = embedding_array.tolist()
        
        text_nodes.append(text_node)
    
    # Add all EntityNodes to the property graph store
    property_graph_store.upsert_nodes(entity_nodes)
    
    # Add all TextNodes to the vector store
    vector_store.add(text_nodes)
    
    # Create Relations and add them to the property graph store
    relation_objects = []
    for rel_id, rel_data in relations.items():
        source_id = rel_data.get("source_id")
        target_id = rel_data.get("target_id")
        rel_label = rel_data.get("label")
        rel_properties = rel_data.get("properties", {}) or {}
        
        if source_id and target_id and rel_label:
            relation = Relation(
                label=rel_label,
                source_id=source_id,
                target_id=target_id,
                properties=rel_properties
            )
            relation_objects.append(relation)
    
    # Add all relations to the property graph store
    property_graph_store.upsert_relations(relation_objects)
    
    # Create embedding model
    embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-m3"
    )  # Replace with your preferred embedding model
    Settings.embed_model = embed_model # Replace with your preferred model
    
    # Persist the storage context
    # graph_store = SimplePropertyGraphStore.persist(persist_path="./SPGstorage")
    storage_context.persist(persist_dir="./storage")
   
    kg_index = KnowledgeGraphIndex(
        [],  # No documents needed since we built the graph manually
        llm=llm,
        storage_context=storage_context,
        
    )

    # property graph index
    p_kg_index = PropertyGraphIndex.from_existing(
    property_graph_store=property_graph_store,
    vector_store=vector_store,
    embed_model=embed_model,
    storage_context=storage_context,  
    llm=llm                   
)
    
    return kg_index, node_embeddings, p_kg_index

In [149]:
load_knowledge_graph_from_json(kg_raw)

Number of nodes: 6007
Number of relations: 6496
Unique relation labels: 1332
Number of unique node names: 5698
Node ID: Important Information to the Caller
Node Data: {'label': 'subject', 'embedding': [0.016924625262618065, 0.03575928136706352, -0.05892634019255638, 0.004378217272460461, -0.018147598952054977, -0.04115688428282738, 0.006678886711597443, -0.025580881163477898, -0.0076911235228180885, 0.01850573904812336, 0.014300176873803139, 0.0041771866381168365, -0.010296151041984558, -0.0063529484905302525, -0.00010613004269544035, -0.022726163268089294, -0.005216141231358051, -0.02079581469297409, -0.01584470644593239, 0.005753336939960718, -0.01436888612806797, 0.03183547034859657, -0.025949664413928986, 0.003342696465551853, -0.017934715375304222, 0.021911881864070892, -0.012286962009966373, -0.007130986545234919, -0.019872350618243217, -0.008588476106524467, -0.0066472003236413, 0.00037115486338734627, 0.013172182254493237, -0.009251204319298267, -0.009280216880142689, -0.041780

In [165]:
kg_index, node_embeddings, p_kg_index = load_knowledge_graph_from_json(kg_raw)

  kg_index = KnowledgeGraphIndex(


In [166]:
# save and load
p_kg_index.storage_context.persist(persist_dir="./SPGstorage")

In [None]:
# Save as HTML
p_kg_index.property_graph_store.save_networkx_graph(name="property_graph.html")



In [202]:
nodes = p_kg_index._graph_store.get(ids=["Serious suicide attempt by drowning"])
triplets = p_kg_index._graph_store.get_rel_map(nodes, depth=2, limit=50)
print(len(triplets))
for triple in triplets:
    # print(triple)
    s,r,o = triple

    relation_info = {
        'relation_label': r.label,
        'source_id': r.source_id,
        'target_id': r.target_id,
        'subject_name': s.name,
        'object_name': s.name
    }

    print(f"{r.source_id} -> {r.label} -> {r.target_id}")






50
Serious suicide attempt by drowning -> has_criteria -> Critical
Serious suicide attempt by drowning -> has_code -> 1.2.7
Critical -> has_symptom -> Unwell and very frail
Major blood loss -> has_level -> Critical
Unnormal Bleeding in Mother After Delivery -> can become -> Critical
Chest pain or discomfort in the chest - Unwell, nauseous -> has_level -> Critical
Critical -> has_symptom -> Alert but decreasing consciousness
Fall from a height and electricity passing through the body at the same time -> has_criteria_level -> Critical
Pregnant and contractions between weeks 20 and 36 -> has_criteria_level -> Critical
Has an injury, high risk patient -> has_code -> 1.2.7
Act of war -> has_criteria -> Critical
Critical -> has_symptom -> Alert, but decreasing consciousness
Paralysis -> has_criteria_level -> Critical
Serious suicide attempt by Involving other method of injury or other damaging situation -> has_criteria -> Critical
May have a serious injury due to fire, smoke, scalding or ele

In [128]:
q ="Important Information to the Caller" #node_id

# triplets = kg_index._graph_store.get_triplets(ids=[q])
# print(triplets)
# for source, relation, target in triplets:
#     print(f"(source.name) -[{relation.label}]-> {target.name}")

#embed query
query_embedding = embed_model.get_text_embedding(q)
# query_embedding_array = np.array(query_embedding)
vector_store = p_kg_index._vector_store

# Query the vector store for top-k similar nodes
from llama_index.core.vector_stores.types import VectorStoreQuery
query = VectorStoreQuery(query_embedding=query_embedding, similarity_top_k=5)
result = vector_store.query(query)
print(result)


VectorStoreQueryResult(nodes=None, similarities=[1.0, 0.9913299963160538, 0.9825727771385746, 0.9674328471836776, 0.9624834402397919], ids=['Important Information to the Caller', 'Important Information to Caller', 'Important information to the Caller', 'Important information to the caller', 'Important information to caller'])


In [210]:
from llama_index.core.indices.property_graph import VectorContextRetriever

# Initialize the retriever
vector_retriever = VectorContextRetriever(
    graph_store=kg_index._graph_store,
    vector_store=kg_index._vector_store,
    embed_model=embed_model,
    similarity_top_k=3,  # Retrieve top 5 similar nodes
    path_depth=2,        # Traverse 1 level of relationships
    include_text=True,   # Include node text in the results
    similarity_threshold=0.8,  # Set a similarity threshold
    limit=100,  # Limit the number of nodes to retrieve
)

# Retrieve nodes based on user query
nodes = vector_retriever.retrieve("drowning adult ") # TODO: pass the user query to llm first to get kinda make the cypher query and reranking using llm or something else......

# Process and print the results
print(len(nodes))
for node in nodes:
    # print(f"Node Text: {node.node.get_content(metadata_mode='llm')}")
    print(node.get_text())


97
Serious suicide attempt by drowning -> has_criteria -> Critical
Serious suicide attempt by drowning -> has_code -> 1.2.7
Unconscious adult -> has_code -> 1.2
Unconscious adult -> has_criteria_level -> Critical
Drowning -> has_criterion -> Could have inhaled water, no other symptoms
Drowning -> has_criterion -> Diving accident, possible neck injury
Drowning -> has_criterion -> Danger of (or suspected) moderate, severe or profound hypothermia
Drowning -> requires_action -> Start giving breaths quickly
Drowning -> has_criterion -> Seems cold, unable to find shelter and warmth
Drowning -> has_criterion -> Shortness of breath or difficulty breathing
Drowning -> has_criterion -> Fallen into water from a height
Break in gas supply or sudden illness in diver -> can_cause -> Drowning
Drowning -> has_criterion -> Exhausted, but alert and breathing normally
Most common cause of respiratory problems in divers -> is -> Drowning
Drowning -> has_criterion -> Missing persons in or by water
Drowning

In [162]:
from llama_index.core.indices.property_graph import TextToCypherRetriever

cypher_retriever = TextToCypherRetriever(
    p_kg_index.property_graph_store,
    llm=llm,
    text_to_cypher_template="MATCH (n)-[r]->(m) WHERE n.name = '(query)' RETURN n, r, m LIMIT 10",
)

nodes = cypher_retriever.retrieve("Drowning")
for node in nodes:
    print(node.text)


ValueError: The provided graph store does not support cypher queries.

In [163]:
from llama_index.core.indices.property_graph import CypherTemplateRetriever

cypher_template_retriever = CypherTemplateRetriever(
    p_kg_index.property_graph_store,
    template="MATCH (n)-[r]->(m) WHERE n.name = '{query}' RETURN n, r, m LIMIT 10",
)

nodes = cypher_template_retriever.retrieve("Drowning")
for node in nodes:
    print(node.text)


TypeError: CypherTemplateRetriever.__init__() missing 2 required positional arguments: 'output_cls' and 'cypher_query'

In [153]:
nodes = vector_retriever.retrieve("drowning adult")
# print(nodes)
context_str = "\n\n".join([node.node.get_content() for node in nodes])
# print("context_str\n",context_str)

prompt = f"""Context information is below.
---------------------
{context_str}
---------------------
Given the context information and not prior knowledge, answer the query.


Only answer the question based on the context information. Say "I don't know" if the context information does not provide enough information to answer the question.
Query: i found a drowning adult, what should I do?
Answer:"""
# print(prompt)
response = llm.complete(prompt)
response.text


'You should start giving breaths quickly.'

In [92]:
prompt = f"""Context information is below.
---------------------
{context_str}
---------------------
Given the context information and not prior knowledge, provide a comprehensive step-by-step response to the query.

This is an emergency situation, so include ALL relevant actions, precautions, and critical information from the context.
Organize your answer into:
1. Immediate life-saving actions
2. Assessment criteria to check
3. Additional important considerations
4. Level of criticality

Query: i found a drowning adult, what should I do?
Answer:"""

In [93]:
response = llm.complete(prompt)
response.text

"If you have found a drowning adult, it is crucial to act swiftly and effectively. Follow the comprehensive steps below to maximize the chances of a successful rescue and ensure the safety of both you and the victim.\n\n---\n\n### 1. Immediate Life-Saving Actions\n\n- **Ensure Your Safety First:**\n  - Before attempting a rescue, assess the situation to ensure you are not putting yourself in danger. Use a reaching tool (like a pole or a rope) if available.\n  \n- **Rescue the Person Safely:**\n  - If it is safe, carefully pull the person out of the water to prevent both of you from drowning.\n  \n- **Check Responsiveness and Breathing:**\n  - **Unconsciousness:** If the person is unconscious, check for breathing.\n  - **Start Giving Breaths Quickly:** Begin rescue breathing immediately if the person is not breathing or only gasping.\n  \n- **Call Emergency Services:**\n  - Dial emergency services (e.g., 911) immediately to get professional medical help on the way.\n  \n- **Begin CPR if

In [81]:
def transform_triplets_to_natural_language(context_str):
    lines = context_str.strip().split('\n\n')
    transformed_lines = []
    
    for line in lines:
        if "->" in line:
            parts = line.split("->")
            if len(parts) == 3:
                subject = parts[0].strip()
                predicate = parts[1].strip()
                obj = parts[2].strip()
                
                # Transform triplet into natural language
                if predicate == "has_criterion":
                    transformed_lines.append(f"A person who is drowning may present with: {obj}")
                elif predicate == "requires_action":
                    transformed_lines.append(f"CRITICAL ACTION REQUIRED: {obj}")
                elif predicate == "has_code" or predicate == "has_percentage":
                    # Skip codes and percentages as they're less relevant for emergency response
                    continue
                elif predicate == "is_level_of_criticality":
                    transformed_lines.append(f"This situation is classified as: {obj}")
                else:
                    transformed_lines.append(f"{subject} {predicate.replace('_', ' ')} {obj}.")
        else:
            transformed_lines.append(line)
    
    return "\n".join(transformed_lines)

# Transform the context before using it in the prompt
transformed_context = transform_triplets_to_natural_language(context_str)

print("Transformed Context:\n", transformed_context)

Transformed Context:
 Serious suicide attempt by drowning has criteria Critical.
A person who is drowning may present with: Could have inhaled water, no other symptoms
A person who is drowning may present with: Diving accident, possible neck injury
A person who is drowning may present with: Danger of (or suspected) moderate, severe or profound hypothermia
CRITICAL ACTION REQUIRED: Start giving breaths quickly
A person who is drowning may present with: Seems cold, unable to find shelter and warmth
A person who is drowning may present with: Shortness of breath or difficulty breathing
A person who is drowning may present with: Fallen into water from a height
Break in gas supply or sudden illness in diver can cause Drowning.
A person who is drowning may present with: Exhausted, but alert and breathing normally
Most common cause of respiratory problems in divers is Drowning.
A person who is drowning may present with: Missing persons in or by water
A person who is drowning may present with: 

In [94]:
prompt = f"""Context information is below.
---------------------
{context_str}
---------------------
Given the context information and not prior knowledge, provide a comprehensive answer to help someone dealing with a drowning emergency.

IMPORTANT INSTRUCTIONS:
1. Prioritize information that indicates immediate actions to take
2. Include all critical assessment criteria that would help evaluate the severity
3. Mention any specific warnings or precautions from the context
4. Note the level of criticality of this situation
5. Organize your answer in a clear, step-by-step format for someone in an emergency

Only answer the question based on the context information. Say "I don't know" if the context information does not provide enough information to answer the question.

Query: i found a drowning adult, what should I do?
Answer:"""

response = llm.complete(prompt)
response.text  # This will parse the response into a structured format
#structured response




lets try propertyknowledgegraphs and texttocypher query

In [155]:
# querying propertygraphindex
from llama_index.core.indices.property_graph import (
    LLMSynonymRetriever,
    VectorContextRetriever,
    TextToCypherRetriever,
)

sub_retrievers = [
    LLMSynonymRetriever(p_kg_index.property_graph_store, llm=llm),
    VectorContextRetriever(p_kg_index.property_graph_store, embed_model=embed_model),
    # TextToCypherRetriever(p_kg_index.property_graph_store, llm=llm),
]

retriever = p_kg_index.as_retriever(sub_retrievers=sub_retrievers)
nodes = retriever.retrieve("drowning adult")
nodes


[]

In [156]:
from llama_index.core.indices.property_graph import PGRetriever, LLMSynonymRetriever, VectorContextRetriever

# Define sub-retrievers
sub_retrievers = [
    LLMSynonymRetriever(p_kg_index.property_graph_store, llm=llm),
    VectorContextRetriever(p_kg_index.property_graph_store, embed_model=embed_model),
]

# Create a PGRetriever with the sub-retrievers
retriever = PGRetriever(sub_retrievers=sub_retrievers)

# Retrieve nodes using the PGRetriever
query_bundle = QueryBundle(query_str="drowning adult")
nodes = retriever.retrieve(query_bundle)

# Process retrieved nodes
for node in nodes:
    print(node.text)


Myocardial infarction -> can_present_with -> Pain perceived as heartburn
Has had a seizure -> has_condition -> Recent head injury
All emergency services -> evaluate -> HRS inclusion in telephone conference during triple alert
Vasovagal Syncope -> has symptom -> Pallor
Serious burns injury -> has_code -> 1.2.5
Shortness of breath -> has criteria -> Asthma
Normal -> requires_transport_to -> Hospital
Labour has started -> has_advice -> Find lots of clean towels and warm blankets, as quickly as possible.
GHB concentration -> is -> Often unknown
Contact the nearest maternity hospital -> requires_advice -> Relevant situation
Large Internal Bleeding -> is -> Life-threatening
Person cannot sit up -> requires_action -> Observe breathing
Angina Symptoms that Worsen over Days/Weeks -> are called -> Unstable Angina
Severe Pain Coming in Waves -> has condition -> Side
Breathing difficulties -> is_a -> Nonspecific symptom
Urgent -> has_symptom -> Wound infection, abscess or puncture wound from a dir