In [1]:
import nest_asyncio

nest_asyncio.apply()

In [38]:
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader("./kgdata/").load_data()
# documents = SimpleDirectoryReader("./few_examples/").load_data()

In [3]:
from llama_index.core import PropertyGraphIndex
from langchain_huggingface import HuggingFaceEmbeddings
from llama_index.llms.azure_openai import AzureOpenAI

import os
from dotenv import load_dotenv
load_dotenv()


# For Azure OpenAI
api_key = "<api-key>"
azure_endpoint = "https://<your-resource-name>.openai.azure.com/"
api_version = "2023-07-01-preview"

endpoint = "https://d-ais-eus-ais-chatbots.openai.azure.com/"
model_name = "o1-mini"
deployment = "o1-mini"
subscription_key = os.getenv("AZURE_OPENAI_API_KEY")
api_version = "2024-12-01-preview" # Use a valid API version

llm = AzureOpenAI(
    azure_endpoint=endpoint,
    api_key=subscription_key,
    api_version=api_version,
    deployment_name=deployment,
    model_name=model_name,
    temperature=1.0
)


# TO-DO: If you want, you can also customize the prompt and the function used to parse the paths. 
# Here's a simple (but naive) example: 
# prompt = (
#     "Some text is provided below. Given the text, extract up to "
#     "{max_paths_per_chunk} "
#     "knowledge triples in the form of `subject,predicate,object` on each line. Avoid stopwords.\n"
# )


# def parse_fn(response_str: str) -> List[Tuple[str, str, str]]:
#     lines = response_str.split("\n")
#     triples = [line.split(",") for line in lines]
#     return triples


# kg_extractor = SimpleLLMPathExtractor(
#     llm=llm,
#     extract_prompt=prompt,
#     parse_fn=parse_fn,
# )


# Now use this wrapped model with PropertyGraphIndex
# index = PropertyGraphIndex.from_documents(
#     documents,
#     llm=llm,  # Use the wrapped model
#     embed_model=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2"),
#     show_progress=True,
# )

In [4]:
CoT_prompt_template = """
-Goal-
Given a text document containing medical protocols or guidelines, identify all entities of the specified medical types and all relationships among the identified entities to build a knowledge graph.

-Steps-
1.  Identify all entities within the text. For each identified entity, extract the following information:
    *   `entity_name`: Name of the entity, capitalized (e.g., CPR, UNCONSCIOUSNESS, NEWBORN).
    *   `entity_type`: One of the relevant medical types provided below.
    *   `entity_description`: Comprehensive description of the entity's attributes, purpose, or actions as described in the text.
    Format each entity as ("entity"{{tuple_delimiter}}<entity_name>{{tuple_delimiter}}<entity_type>{{tuple_delimiter}}<entity_description>)

2.  From the entities identified in step 1, identify all pairs of (source_entity, target_entity) that are *clearly and directly related* within the context of the provided text.
    For each pair of related entities, extract the following information:
    *   `source_entity`: name of the source entity, as identified in step 1.
    *   `target_entity`: name of the target entity, as identified in step 1.
    *   `relationship_description`: explanation in English describing the nature of the relationship between the source and target entity based *only* on the text (e.g., "is a symptom of", "is used to treat", "is performed on", "is a type of", "is indicated for", "uses", "should be checked in case of").
    *   `relationship_strength`: an integer score between 1 (weakly related) to 10 (very strongly and explicitly related).
    Format each relationship as ("relationship"{{tuple_delimiter}}<source_entity>{{tuple_delimiter}}<target_entity>{{tuple_delimiter}}<relationship_description>{{tuple_delimiter}}<relationship_strength>)

3.  Return the output as a single list containing all identified entities and relationships. Use **{{record_delimiter}}** as the delimiter between each entity or relationship record. The primary language of the provided text is mixed English and Norwegian.

4.  Translate Norwegian descriptions into English for the `entity_description` and `relationship_description` fields *only*. Keep entity names and types consistent (preferably English where obvious equivalents exist, otherwise use the capitalized term from the text).

5.  When finished, output {{completion_delimiter}}.

-Relevant Medical Entity Types-
[Medical Condition, Symptom, Patient Group, Medical Procedure, Medical Device/Tool, Medication/Substance, Anatomical Location, Guideline Section, Organization/Role, Medical Concept]

-Examples-
######################

Example 1 (Based on 01.md/02.md):

entity_types: [Medical Condition, Symptom, Patient Group, Medical Procedure, Medical Device/Tool, Medication/Substance, Anatomical Location, Guideline Section, Organization/Role, Medical Concept]
text:
# 01 Unconscious adult – not breathing normally
## CRITERIA
- Critical | Unconscious adult, not breathing normally
## SITUATIONAL GUIDANCE & IMPORTANT TO ASCERTAIN
- Help is on the way as I speak to you.
- You must start CPR (reviving the person). I will tell you what to do.
- Don’t hang up, put the phone on speaker if you can.
- If there is defibrillator at hand, get someone else to fetch it. Check Hjertestarterregisteret (the caller must not fetch a defibrillator / AED if alone)
## EMERGENCY RESPONSE
### SCENARIO
- BCPR (CARDIO PULMONARY RESUSCITATION)
### IF YES
- Push down at this rate 30 times.
- Now give rescue breaths.
- Tilt the head back with one hand on the forehead.
- Lift the chin up with the other hand.
- Pinch the nose and give 2 gentle rescue breaths.
- Continue with 30 pushes and 2 rescue breaths until medics take over or the person wakes up.
- Lay the person on the floor, on his / her back.
- Kneel beside the person’s chest.
- Place your hands in the middle of his / her chest... Push down hard...
------------------------
output:
("entity"{{tuple_delimiter}}UNCONSCIOUS ADULT{{tuple_delimiter}}Patient Group{{tuple_delimiter}}An adult patient who is unconscious and not breathing normally, requiring critical intervention.){{record_delimiter}}
("entity"{{tuple_delimiter}}NOT BREATHING NORMALLY{{tuple_delimiter}}Symptom{{tuple_delimiter}}A critical symptom indicating lack of normal respiration, often associated with unconsciousness.){{record_delimiter}}
("entity"{{tuple_delimiter}}CPR{{tuple_delimiter}}Medical Procedure{{tuple_delimiter}}Cardiopulmonary Resuscitation, a life-saving procedure involving chest compressions and rescue breaths, instructed by the call handler.){{record_delimiter}}
("entity"{{tuple_delimiter}}CHEST COMPRESSIONS{{tuple_delimiter}}Medical Procedure{{tuple_delimiter}}A component of CPR involving pushing down hard on the center of the chest 30 times.){{record_delimiter}}
("entity"{{tuple_delimiter}}RESCUE BREATHS{{tuple_delimiter}}Medical Procedure{{tuple_delimiter}}A component of CPR involving tilting the head, lifting the chin, pinching the nose, and giving 2 gentle breaths into the mouth.){{record_delimiter}}
("entity"{{tuple_delimiter}}AED{{tuple_delimiter}}Medical Device/Tool{{tuple_delimiter}}Automated External Defibrillator (also referred to as defibrillator or Hjertestarter), a device to be fetched if available and if someone else is present.){{record_delimiter}}
("entity"{{tuple_delimiter}}CALLER{{tuple_delimiter}}Organization/Role{{tuple_delimiter}}The person calling for help who is instructed to perform CPR.){{record_delimiter}}
("entity"{{tuple_delimiter}}MEDICS{{tuple_delimiter}}Organization/Role{{tuple_delimiter}}Emergency medical personnel who are on their way and will take over CPR upon arrival.){{record_delimiter}}
("entity"{{tuple_delimiter}}CRITERIA{{tuple_delimiter}}Guideline Section{{tuple_delimiter}}Section defining the conditions under which this protocol applies, such as an unconscious adult not breathing normally.){{record_delimiter}}
("entity"{{tuple_delimiter}}CHEST{{tuple_delimiter}}Anatomical Location{{tuple_delimiter}}The location on the body (middle of the chest) where chest compressions are applied during CPR.){{record_delimiter}}
("entity"{{tuple_delimiter}}HEAD{{tuple_delimiter}}Anatomical Location{{tuple_delimiter}}Body part manipulated during rescue breaths (tilt the head back).){{record_delimiter}}
("entity"{{tuple_delimiter}}CHIN{{tuple_delimiter}}Anatomical Location{{tuple_delimiter}}Body part manipulated during rescue breaths (lift the chin up).){{record_delimiter}}
("relationship"{{tuple_delimiter}}UNCONSCIOUS ADULT{{tuple_delimiter}}NOT BREATHING NORMALLY{{tuple_delimiter}}Is a defining symptom for this patient group according to the criteria.{{tuple_delimiter}}9){{record_delimiter}}
("relationship"{{tuple_delimiter}}CPR{{tuple_delimiter}}UNCONSCIOUS ADULT{{tuple_delimiter}}Is the required procedure for an unconscious adult not breathing normally.{{tuple_delimiter}}10){{record_delimiter}}
("relationship"{{tuple_delimiter}}CPR{{tuple_delimiter}}CHEST COMPRESSIONS{{tuple_delimiter}}Incorporates chest compressions as a key component (30 pushes).{{tuple_delimiter}}9){{record_delimiter}}
("relationship"{{tuple_delimiter}}CPR{{tuple_delimiter}}RESCUE BREATHS{{tuple_delimiter}}Incorporates rescue breaths as a key component (2 breaths).{{tuple_delimiter}}9){{record_delimiter}}
("relationship"{{tuple_delimiter}}CALLER{{tuple_delimiter}}CPR{{tuple_delimiter}}Is instructed to perform CPR.{{tuple_delimiter}}8){{record_delimiter}}
("relationship"{{tuple_delimiter}}AED{{tuple_delimiter}}CPR{{tuple_delimiter}}Should be fetched and used during CPR if available and feasible.{{tuple_delimiter}}7){{record_delimiter}}
("relationship"{{tuple_delimiter}}CHEST COMPRESSIONS{{tuple_delimiter}}CHEST{{tuple_delimiter}}Are applied to the middle of the chest.{{tuple_delimiter}}9){{record_delimiter}}
("relationship"{{tuple_delimiter}}RESCUE BREATHS{{tuple_delimiter}}HEAD{{tuple_delimiter}}Involves tilting the head back.{{tuple_delimiter}}8){{record_delimiter}}
("relationship"{{tuple_delimiter}}RESCUE BREATHS{{tuple_delimiter}}CHIN{{tuple_delimiter}}Involves lifting the chin up.{{tuple_delimiter}}8){{record_delimiter}}
("relationship"{{tuple_delimiter}}MEDICS{{tuple_delimiter}}CPR{{tuple_delimiter}}Will take over CPR from the caller upon arrival.{{tuple_delimiter}}8)
{{completion_delimiter}}
#############################

Example 2 (Based on 20.md):

entity_types: [Medical Condition, Symptom, Patient Group, Medical Procedure, Medical Device/Tool, Medication/Substance, Anatomical Location, Guideline Section, Organization/Role, Medical Concept]
text:
# 20 Diabetes
## CRITERIA
- Critical | Drowsy (decreased level of consciousness) - May have a low blood sugar (hypo) | 1.2.3.5.6
## ADVICE
### Advice 5. IF THE PERSON HAS A HYPO
If necessary and the person has a glucagon injection or nasal spray:
– Give one dose (1 mg) Glucagon...
### Advice 7. DROWSY OR DAZED AND UNABLE TO DRINK
– Do not force the person to drink...
– Alternatively, you can put one or two spoonfuls of honey in the mouth. You can also spread honey or granulated sugar on the gums, between the lips and the teeth.
### Advice 8. THE PERSON IS AWAKE ENOUGH TO DRINK
– Give the person several glasses of sugary drink e.g. fizzy drink, cordial, juice or milk.
## INFORMATION
### HYPOGLYKEMI – LAVT BLODSUKKER
Når blodsukkeret synker under 4 mmol/l... Hvis blodsukkeret faller ytterligere under 3 mmol/l, opptrer føling (insulinføling)... Pas. kan hurtig bli sløv, bevisstløs eller få kramper. Behandlingen er rask tilførsel av sukker eller Glukagon®
------------------------
output:
("entity"{{tuple_delimiter}}DIABETES{{tuple_delimiter}}Medical Condition{{tuple_delimiter}}The underlying medical condition being addressed in this protocol.){{record_delimiter}}
("entity"{{tuple_delimiter}}HYPOGLYCEMIA{{tuple_delimiter}}Medical Condition{{tuple_delimiter}}Low blood sugar (føling/insulinføling), defined as blood sugar below 4 mmol/L or 3 mmol/L, potentially causing drowsiness, unconsciousness, or seizures. Referred to as 'hypo'.){{record_delimiter}}
("entity"{{tuple_delimiter}}DROWSINESS{{tuple_delimiter}}Symptom{{tuple_delimiter}}Decreased level of consciousness, listed as a critical criterion possibly indicating hypoglycemia.){{record_delimiter}}
("entity"{{tuple_delimiter}}GLUCAGON{{tuple_delimiter}}Medication/Substance{{tuple_delimiter}}A medication administered via injection (1mg or 0.5mg dose) or nasal spray (3mg) to treat severe hypoglycemia if available.){{record_delimiter}}
("entity"{{tuple_delimiter}}GLUCAGON KIT{{tuple_delimiter}}Medical Device/Tool{{tuple_delimiter}}Refers to the glucagon injection or nasal spray kit the person might have.){{record_delimiter}}
("entity"{{tuple_delimiter}}HONEY{{tuple_delimiter}}Medication/Substance{{tuple_delimiter}}A sugary substance that can be placed in the mouth or on the gums of a drowsy person unable to drink.){{record_delimiter}}
("entity"{{tuple_delimiter}}GRANULATED SUGAR{{tuple_delimiter}}Medication/Substance{{tuple_delimiter}}A sugary substance that can be spread on the gums of a drowsy person unable to drink.){{record_delimiter}}
("entity"{{tuple_delimiter}}SUGARY DRINK{{tuple_delimiter}}Medication/Substance{{tuple_delimiter}}Drinks like fizzy drinks, cordial, juice, or milk given to a person awake enough to drink to raise blood sugar.){{record_delimiter}}
("entity"{{tuple_delimiter}}GUMS{{tuple_delimiter}}Anatomical Location{{tuple_delimiter}}Location in the mouth where honey or granulated sugar can be applied for absorption in a drowsy patient.){{record_delimiter}}
("entity"{{tuple_delimiter}}ADVICE{{tuple_delimiter}}Guideline Section{{tuple_delimiter}}Section providing instructions on how to manage specific situations like hypoglycemia.){{record_delimiter}}
("entity"{{tuple_delimiter}}INFORMATION{{tuple_delimiter}}Guideline Section{{tuple_delimiter}}Section providing background information on conditions like hypoglycemia.){{record_delimiter}}
("relationship"{{tuple_delimiter}}HYPOGLYCEMIA{{tuple_delimiter}}DIABETES{{tuple_delimiter}}Is a potential complication or state related to Diabetes.{{tuple_delimiter}}9){{record_delimiter}}
("relationship"{{tuple_delimiter}}DROWSINESS{{tuple_delimiter}}HYPOGLYCEMIA{{tuple_delimiter}}Is listed as a potential symptom or consequence of Hypoglycemia.{{tuple_delimiter}}8){{record_delimiter}}
("relationship"{{tuple_delimiter}}GLUCAGON{{tuple_delimiter}}HYPOGLYCEMIA{{tuple_delimiter}}Is a treatment for severe Hypoglycemia.{{tuple_delimiter}}10){{record_delimiter}}
("relationship"{{tuple_delimiter}}HONEY{{tuple_delimiter}}HYPOGLYCEMIA{{tuple_delimiter}}Is an alternative treatment for Hypoglycemia in drowsy patients unable to drink.{{tuple_delimiter}}7){{record_delimiter}}
("relationship"{{tuple_delimiter}}GRANULATED SUGAR{{tuple_delimiter}}HYPOGLYCEMIA{{tuple_delimiter}}Is an alternative treatment for Hypoglycemia in drowsy patients unable to drink.{{tuple_delimiter}}7){{record_delimiter}}
("relationship"{{tuple_delimiter}}SUGARY DRINK{{tuple_delimiter}}HYPOGLYCEMIA{{tuple_delimiter}}Is a treatment for Hypoglycemia in patients awake enough to drink.{{tuple_delimiter}}8){{record_delimiter}}
("relationship"{{tuple_delimiter}}GLUCAGON KIT{{tuple_delimiter}}GLUCAGON{{tuple_delimiter}}Is the delivery method for Glucagon medication.{{tuple_delimiter}}9){{record_delimiter}}
("relationship"{{tuple_delimiter}}HONEY{{tuple_delimiter}}GUMS{{tuple_delimiter}}Can be applied to the gums for absorption.{{tuple_delimiter}}7){{record_delimiter}}
("relationship"{{tuple_delimiter}}GRANULATED SUGAR{{tuple_delimiter}}GUMS{{tuple_delimiter}}Can be applied to the gums for absorption.{{tuple_delimiter}}7)
{{completion_delimiter}}
#############################

-Real Data-
######################
entity_types: [Medical Condition, Symptom, Patient Group, Medical Procedure, Medical Device/Tool, Medication/Substance, Anatomical Location, Guideline Section, Organization/Role, Medical Concept]
text: {input_text}
######################
output:
"""

In [41]:
from llama_index.core.indices.property_graph import SimpleLLMPathExtractor, DynamicLLMPathExtractor
from typing import List, Tuple, Dict, Any, Optional
import re

def parse_fn(response_str: str) -> List[Tuple[str, str, str]]:
    """Parse the structured output from the CoT prompt into triples.
    
    Args:
        response_str: The output string from the LLM
        
    Returns:
        List of triples in (subject, predicate, object) format
    """
    # Define the delimiters (same as in the prompt)
    tuple_delimiter = "\{tuple_delimiter\}"
    record_delimiter = "\{record_delimiter\}"
    completion_delimiter = "\{completion_delimiter\}"
    
    # Clean up the response string
    response_str = response_str.replace(completion_delimiter, "").strip()
    
    # Extract records using record delimiter
    records = re.split(record_delimiter, response_str)
    
    triples = []
    
    # Process each record
    for record in records:
        record = record.strip()
        if not record:
            continue
            
        # Check if it's an entity or relationship
        parts = re.split(tuple_delimiter, record.strip('(")'))
        if len(parts) < 3:
            continue
            
        record_type = parts[0]
        
        if record_type == "entity":
            # Entities become self-referential triples with type as predicate
            if len(parts) >= 3:
                entity_name = parts[1].strip()
                entity_type = parts[2].strip()
                triples.append((entity_name, "type", entity_type))
                
                # If there's a description, add it too
                if len(parts) >= 4:
                    description = parts[3].strip()
                    triples.append((entity_name, "description", description))
                    
        elif record_type == "relationship":
            # Relationships become direct triples
            if len(parts) >= 4:
                source = parts[1].strip()
                target = parts[2].strip()
                relation = parts[3].strip()
                triples.append((source, relation, target))
                
                # Also add strength if available
                if len(parts) >= 5:
                    strength = parts[4].strip()
                    triples.append((source + "_to_" + target, "strength", strength))
    
    return triples

# Create the extractor with our parsing function
kg_extractor = SimpleLLMPathExtractor(
    llm=llm,
    extract_prompt=CoT_prompt_template,
    parse_fn=parse_fn,
    max_paths_per_chunk=50
)


kg_extractor_2 = DynamicLLMPathExtractor(
    llm=llm,
    extract_prompt=CoT_prompt_template,
    num_workers=4,
    # Let the LLM infer entities and their labels (types) on the fly
    allowed_entity_types=None,
    # Let the LLM infer relationships on the fly
    allowed_relation_types=None,
    # LLM will generate any entity properties, set `None` to skip property generation (will be faster without)
    allowed_relation_props=[],
    # LLM will generate any relation properties, set `None` to skip property generation (will be faster without)
    allowed_entity_props=[],
)

  tuple_delimiter = "\{tuple_delimiter\}"
  record_delimiter = "\{record_delimiter\}"
  completion_delimiter = "\{completion_delimiter\}"


In [42]:
# Now use this wrapped model with PropertyGraphIndex
index = PropertyGraphIndex.from_documents(
    documents,
    llm=llm,  # Use the wrapped model
    embed_model=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2"),
    kg_extractor=kg_extractor_2,
    show_progress=True
)

Parsing nodes:   0%|          | 0/37 [00:00<?, ?it/s]

Extracting paths from text: 100%|██████████| 145/145 [04:51<00:00,  2.01s/it]
Extracting implicit paths: 100%|██████████| 145/145 [00:00<?, ?it/s]
Generating embeddings: 100%|██████████| 15/15 [00:09<00:00,  1.65it/s]
Generating embeddings: 100%|██████████| 266/266 [01:32<00:00,  2.88it/s]


In [11]:
from llama_index.core.graph_stores import SimplePropertyGraphStore
# from llama_index.core.constants import DEFAULT_PERSIST_DIR # Default directory name
import os

# Assume 'pg_store' is your populated SimplePropertyGraphStore instance

# --- Define the directory to save in ---
save_directory = "./my_graph_persist" # Or use DEFAULT_PERSIST_DIR
# Ensure the directory exists
os.makedirs(save_directory, exist_ok=True)

# --- Persist the graph store state to the directory ---
print(f"Persisting graph store state to directory {save_directory}...")
try:
    # This uses the StorageContext's persist method implicitly if part of an index
    # If using the store standalone, you might call persist directly as above
    # For consistency with index patterns:
    from llama_index.core import StorageContext
    storage_context = StorageContext.from_defaults(property_graph_store=index.property_graph_store)
    storage_context.persist(persist_dir=save_directory)

    # Note: The standalone pg_store doesn't have persist_dir,
    # but when used with StorageContext (common pattern),
    # storage_context.persist handles saving components to the dir.
    # If you only have the store object, use pg_store.persist() as in method 1.

    print("Graph store state saved successfully within the directory.")
except Exception as e:
    print(f"Error persisting graph store via storage context: {e}")

# Now, './my_graph_persist/property_graph.json' (or similar default name)
# should contain the graph data.

Persisting graph store state to directory ./my_graph_persist...
Error persisting graph store via storage context: 'charmap' codec can't encode character '\u2265' in position 867420: character maps to <undefined>


In [None]:
index.property_graph_store.save_networkx_graph(name="./kg_all_v2_dynamic_extractor.html")

In [19]:
all_nodes = index.property_graph_store.get_triplets()

print("All nodes in the graph:")
print(all_nodes)
# Print the nodes
for node in all_nodes:
    print(node)

All nodes in the graph:
[]


In [None]:
list(index.property_graph_store.graph)

In [24]:
index.property_graph_store.graph

LabelledPropertyGraph(nodes={'5cd37a06-f5b1-45ef-b142-a90bb5e78b13': ChunkNode(label='text_chunk', embedding=[-0.06111140549182892, 0.08269305527210236, -0.04113225266337395, -0.021237963810563087, 0.07472843676805496, -0.037213802337646484, 0.01818927191197872, 0.05844612419605255, 0.051417648792266846, -0.04708836227655411, 0.11202287673950195, -0.06777142733335495, -0.002738642506301403, -0.0012083032634109259, -0.04388347268104553, -0.07670344412326813, -0.0226672925055027, 0.004014578182250261, -0.059080109000205994, 0.07477640360593796, 0.0409235954284668, 0.08977816253900528, 0.05949852243065834, -0.037541475147008896, -0.0017264256021007895, 0.03445855900645256, 0.011490818113088608, -0.12018003314733505, -0.028601525351405144, -0.015634480863809586, 0.08584728837013245, -0.08219211548566818, 0.04282643646001816, -0.046176426112651825, 0.02847852185368538, 0.15294092893600464, 0.07927283644676208, 0.0591605119407177, -0.08182580769062042, 0.043484948575496674, -0.01931494846940

In [None]:
list(index.property_graph_store.graph.nodes)

In [33]:
list(index.property_graph_store.graph.nodes.values())[-1]

EntityNode(label='entity', embedding=None, properties={}, name='8d130335-250f-48b7-88b4-b05e62c05c6e')

In [20]:
list(index.property_graph_store.graph.relations.values())[0]

Relation(label='Is', source_id='Unconscious adult', target_id='Not breathing normally', properties={'file_path': 'c:\\Users\\newac\\OneDrive\\Desktop\\Master\\few_examples\\01.md', 'file_name': '01.md', 'file_size': 8702, 'creation_date': '2025-04-30', 'last_modified_date': '2025-04-21', 'triplet_source_id': '5cd37a06-f5b1-45ef-b142-a90bb5e78b13'})

In [22]:
list(index.property_graph_store.graph.relations.values())[0]

Relation(label='Is', source_id='Unconscious adult', target_id='Not breathing normally', properties={'file_path': 'c:\\Users\\newac\\OneDrive\\Desktop\\Master\\kgdata\\01.md', 'file_name': '01.md', 'file_size': 8702, 'creation_date': '2025-04-21', 'last_modified_date': '2025-04-21', 'triplet_source_id': '8a94fbf1-9654-42b9-b939-33548157e8e5'})

In [39]:
import re
import networkx as nx
from graspologic.partition import hierarchical_leiden

from llama_index.core.graph_stores import SimplePropertyGraphStore
from llama_index.core.llms import ChatMessage
from llama_index.llms.openai import OpenAI # Assuming you use OpenAI LLM from LlamaIndex

# --- Your Custom Graph Store Class ---
class GraphRAGStore(SimplePropertyGraphStore):
    # Removed class variables that should be instance variables

    def __init__(self, llm=None, max_cluster_size=5, **kwargs):
        """
        Initialize the custom graph store.

        Args:
            llm: An initialized LlamaIndex LLM instance (e.g., OpenAI()).
                 If None, a default OpenAI() instance will be created.
            max_cluster_size (int): Maximum size for Leiden clustering.
            **kwargs: Additional arguments for the parent SimplePropertyGraphStore.
        """
        super().__init__(**kwargs) # Initialize the parent class
        self.community_summary = {} # Instance variable
        self.max_cluster_size = max_cluster_size # Instance variable
        # Store the LLM instance
        self.llm = llm or OpenAI() # Use provided LLM or create a default one
        print(f"Initialized GraphRAGStore with LLM: {type(self.llm)}")


    def generate_community_summary(self, text):
        """
        Generate summary for a given text using the configured LLM.
        (Handles models that might not support the 'system' role).
        """
        system_instructions = (
            "You are provided with a set of relationships from a knowledge graph, each represented as "
            "entity1->entity2->relation->relationship_description. Your task is to create a summary of these "
            "relationships. The summary should include the names of the entities involved and a concise synthesis "
            "of the relationship descriptions. The goal is to capture the most critical and relevant details that "
            "highlight the nature and significance of each relationship. Ensure that the summary is coherent and "
            "integrates the information in a way that emphasizes the key aspects of the relationships."
        )

        # Combine system instructions with the actual user request into a single user message
        # Use clear separators to help the LLM distinguish instructions from data
        combined_user_content = f"System Instructions:\n---\n{system_instructions}\n---\n\nRelationships to Summarize:\n---\n{text}\n---"

        messages = [
            # Only use the 'user' role
            ChatMessage(role="user", content=combined_user_content),
            # Some models might still expect an alternating user/assistant structure,
            # but often a single detailed user prompt works for summarization.
            # If you get poor results, you might need to add an empty 'assistant' message
            # if the specific LLM requires it to start generation, e.g.:
            # ChatMessage(role="assistant", content="")
        ]

        print(f"DEBUG: Sending message to LLM for community summary:\n{messages}") # Added for debugging

        try:
            # Make sure self.llm is the initialized LLM instance passed during __init__
            response = self.llm.chat(messages)

            # Parse the response content (structure might vary slightly by LLM)
            if hasattr(response, 'message') and hasattr(response.message, 'content'):
                clean_response = str(response.message.content).strip()
            elif isinstance(response, str): # Handle cases where chat might just return string
                clean_response = response.strip()
            else: # Fallback parsing
                clean_response = re.sub(r"^assistant:\s*", "", str(response)).strip()

            print(f"DEBUG: Received LLM summary response:\n{clean_response}") # Added for debugging
            return clean_response

        except Exception as e:
            # Catch potential API errors during the chat call
            print(f"ERROR: LLM chat call failed in generate_community_summary: {e}")
            # Return an error message or raise the exception depending on desired behavior
            # It might be helpful to see the original text that caused the error
            print(f"ERROR: Failed on text: {text[:500]}...") # Log beginning of problematic text
            # Returning an error string instead of raising, to potentially allow other communities to be summarized
            return f"Error generating summary: LLM call failed ({e})"

    def build_communities(self):
        """Builds communities from the graph and summarizes them."""
        if not self.graph.nodes:
             print("Graph is empty. Cannot build communities.")
             return

        nx_graph = self._create_nx_graph()
        if not nx_graph.nodes:
             print("NetworkX graph is empty. Cannot build communities.")
             return
        if not nx_graph.edges:
            print("NetworkX graph has no edges. Skipping community detection.")
            # Handle nodes without edges if necessary, maybe assign each to its own 'community'
            # For now, we just won't generate summaries based on edges.
            # You could potentially summarize node properties instead if desired.
            self.community_summary = {node: f"Node: {node}" for node in nx_graph.nodes()}
            return

        # Ensure graph is suitable for leiden if it requires connected components etc.
        # Handle potential errors from hierarchical_leiden if graph is disconnected or too small
        try:
             community_hierarchical_clusters = hierarchical_leiden(
                 nx_graph, max_cluster_size=self.max_cluster_size
             )
        except Exception as e:
             print(f"Error during Leiden clustering: {e}")
             # Decide how to handle this - maybe skip summarization or use a fallback
             return # Or raise the error if it's critical


        community_info = self._collect_community_info(
            nx_graph, community_hierarchical_clusters
        )
        self._summarize_communities(community_info)
        print(f"Built and summarized {len(self.community_summary)} communities.")


    def _create_nx_graph(self):
        """Converts internal graph representation to NetworkX graph."""
        nx_graph = nx.Graph()
        # Add nodes using their IDs (keys of the nodes dictionary)
        for node_id, node_obj in self.graph.nodes.items():
            # Use node_id which is the primary identifier
            nx_graph.add_node(node_id, label=node_obj.label, properties=node_obj.properties)

        # Add edges using source_id and target_id from relations
        for rel_key, relation in self.graph.relations.items():
            # Ensure nodes exist before adding edge
            if relation.source_id in nx_graph and relation.target_id in nx_graph:
                 # Extract description safely
                 description = relation.properties.get("relationship_description", "N/A")
                 nx_graph.add_edge(
                     relation.source_id,
                     relation.target_id,
                     relationship=relation.label,
                     description=description,
                 )
            else:
                 print(f"Warning: Skipping edge due to missing node(s): {relation.source_id} -> {relation.target_id}")

        return nx_graph


    def _collect_community_info(self, nx_graph, clusters):
        """Collect detailed information for each node based on their community."""
        if not clusters:
             print("No clusters found to collect info from.")
             return {}

        community_mapping = {item.node: item.cluster for item in clusters}
        community_info = {}
        processed_edges = set() # To avoid duplicate edge entries (A->B and B->A)

        for item in clusters:
            cluster_id = item.cluster
            node = item.node # This should be the node_id used in nx_graph

            if cluster_id not in community_info:
                community_info[cluster_id] = []

            # Check neighbors within the same community
            if node in nx_graph: # Ensure node exists in graph
                 for neighbor in nx_graph.neighbors(node):
                     # Ensure neighbor is also in the mapping and in the same cluster
                     if neighbor in community_mapping and community_mapping[neighbor] == cluster_id:
                         # Create a canonical representation for the edge to avoid duplicates
                         edge_tuple = tuple(sorted((node, neighbor)))
                         if edge_tuple not in processed_edges:
                             edge_data = nx_graph.get_edge_data(node, neighbor)
                             if edge_data:
                                 # Use .get for safety
                                 relationship = edge_data.get('relationship', 'unknown_relation')
                                 description = edge_data.get('description', 'no_description')
                                 # Get node labels if available
                                 node_label = nx_graph.nodes[node].get('label', node)
                                 neighbor_label = nx_graph.nodes[neighbor].get('label', neighbor)

                                 detail = f"{node_label} <-> {neighbor_label} :: {relationship} [{description}]"
                                 community_info[cluster_id].append(detail)
                                 processed_edges.add(edge_tuple) # Mark edge as processed
            else:
                 print(f"Warning: Node {node} from cluster data not found in NetworkX graph.")

        # Filter out empty communities if any were created but had no intra-community edges
        community_info = {k: v for k, v in community_info.items() if v}
        return community_info


    def _summarize_communities(self, community_info):
        """Generate and store summaries for each community."""
        self.community_summary = {} # Reset summaries before generating new ones
        if not community_info:
             print("No community information to summarize.")
             return

        for community_id, details in community_info.items():
            if not details: # Skip if a community ended up with no details
                 print(f"Skipping empty community {community_id}")
                 continue
            details_text = "\n".join(details)
            # Add a simple fallback or skip if details_text is empty
            if not details_text.strip():
                summary = f"Community {community_id} has nodes but no descriptive relationships found within it."
            else:
                 try:
                    summary = self.generate_community_summary(details_text + ".") # Ensure it ends with a period
                 except Exception as e:
                    print(f"Error generating summary for community {community_id}: {e}")
                    summary = f"Error summarizing community {community_id}." # Provide fallback summary

            self.community_summary[community_id] = summary


    def get_community_summaries(self):
        """Returns the community summaries, building them if not already done."""
        # Always rebuild for now, or add logic to check if graph changed
        # if not self.community_summary:
        print("Building communities to get summaries...")
        self.build_communities()
        return self.community_summary



In [45]:
# --- How to Use It ---

from llama_index.core import PropertyGraphIndex, StorageContext, Document
from llama_index.embeddings.openai import OpenAIEmbedding # Example embedding model
# Assume necessary API keys (like OPENAI_API_KEY) are set as environment variables

# 1. Initialize your dependencies (LLM, Embed Model)
my_llm = llm
my_embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") # Example embedding model

# 2. Instantiate YOUR custom graph store, passing the LLM
my_custom_graph_store = GraphRAGStore(llm=my_llm, max_cluster_size=50)

# 3. Create a StorageContext using your custom store
storage_context = StorageContext.from_defaults(property_graph_store=my_custom_graph_store)

print(f"DEBUG: Type of store IN storage_context: {type(storage_context.property_graph_store)}")
assert isinstance(storage_context.property_graph_store, GraphRAGStore), "Store in storage_context is not a GraphRAGStore!"

# 4. Create the PropertyGraphIndex, passing the storage_context
#    Make sure you have 'documents' defined (list of LlamaIndex Document objects)
#    Example dummy documents:
# documents = [
#     Document(text="Alice works at Google."),
#     Document(text="Bob works at Microsoft."),
#     Document(text="Alice knows Bob."),
#     Document(text="Charlie lives in London."),
#     Document(text="Google is a tech company."),
#     Document(text="Microsoft is a tech company."),
# ]

index = PropertyGraphIndex.from_documents(
    documents,
    llm=llm,  # Use the wrapped model
    embed_model=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2"),
    kg_extractor=kg_extractor_2,
    show_progress=True,
    storage_context=storage_context,  # Use the custom storage context
)

# # --- Now you can call your custom method ---
# # The object at index.property_graph_store IS NOW your GraphRAGStore instance

# # Ensure the graph has been populated by the index creation first





Initialized GraphRAGStore with LLM: <class 'llama_index.llms.azure_openai.base.AzureOpenAI'>
DEBUG: Type of store IN storage_context: <class '__main__.GraphRAGStore'>


Parsing nodes:   0%|          | 0/37 [00:00<?, ?it/s]

Extracting paths from text: 100%|██████████| 145/145 [04:36<00:00,  1.91s/it]
Extracting implicit paths: 100%|██████████| 145/145 [00:00<00:00, 70553.84it/s]
Generating embeddings: 100%|██████████| 15/15 [00:08<00:00,  1.67it/s]
Generating embeddings: 100%|██████████| 264/264 [01:28<00:00,  2.98it/s]


In [46]:
print(f"Graph Nodes: {len(index.property_graph_store.graph.nodes)}")
print(f"Graph Relations: {len(index.property_graph_store.graph.relations)}")

Graph Nodes: 1840
Graph Relations: 1634


In [None]:
index.property_graph_store.graph.get_triplets()

In [47]:
import networkx as nx
from graspologic.partition import hierarchical_leiden
# Assuming 'index' is your initialized PropertyGraphIndex
# from llama_index.core.schema import EntityNode # Import needed for type check

print("Retrieving triplets...")
if hasattr(index.property_graph_store, 'graph') and hasattr(index.property_graph_store.graph, 'get_triplets'):
    triplets = index.property_graph_store.graph.get_triplets()
else:
    print("Error: Cannot retrieve triplets from the graph store.")
    triplets = []

print(f"Found {len(triplets)} triplets. Building NetworkX graph (Entity Nodes Only)...")
G = nx.Graph()
entity_edges_added = 0
nodes_added = set() # Keep track of nodes added

for source, relation, target in triplets:
    if source.__class__.__name__ == "EntityNode" and target.__class__.__name__ == "EntityNode":
        # Ensure nodes are added explicitly if desired (optional but good practice)
        if source.name not in nodes_added:
             G.add_node(source.name)
             nodes_added.add(source.name)
        if target.name not in nodes_added:
             G.add_node(target.name)
             nodes_added.add(target.name)
        # Add edge
        G.add_edge(source.name, target.name, relationship=relation.label)
        entity_edges_added += 1

print(f"NetworkX graph built with {G.number_of_nodes()} entity nodes and {entity_edges_added} edges between them.")

if G.number_of_edges() > 0:
    print("Applying community detection...")
    try:
        partition_result = hierarchical_leiden(G)

        # --- Corrected Community Processing ---
        community_info = {} # Initialize as a dictionary
        print(f"DEBUG: partition_result type is {type(partition_result)}")

        # Try iterating directly over the result, assuming items have .node and .cluster
        processed_count = 0
        try:
            for item in partition_result:
                # DEBUG: Print the first few items to see their structure
                if processed_count < 5:
                    print(f"DEBUG: Partition item: {item}, type: {type(item)}")
                    # If it has attributes, print them
                    if hasattr(item, '__dict__'):
                         print(f"DEBUG: Item attributes: {item.__dict__}")
                    elif isinstance(item, tuple):
                         print(f"DEBUG: Item is tuple of len {len(item)}")


                # Check if the item has the expected attributes
                if hasattr(item, 'node') and hasattr(item, 'cluster'):
                    node = item.node
                    community_id = item.cluster
                    if community_id not in community_info:
                        community_info[community_id] = []
                    community_info[community_id].append(node)
                    processed_count += 1
                # Add fallback checks if the structure is different (e.g., maybe item is a tuple?)
                elif isinstance(item, tuple) and len(item) == 2:
                     node, community_id = item # Unpack if it's a 2-tuple
                     if community_id not in community_info:
                        community_info[community_id] = []
                     community_info[community_id].append(node)
                     processed_count += 1
                else:
                    # Stop printing warnings after a few times to avoid flooding
                    if processed_count < 10:
                         print(f"Warning: Skipping partition item {item} - does not have .node/.cluster attributes or is not a 2-tuple.")
                    # Increment anyway to eventually stop warnings
                    processed_count +=1


        except TypeError as iter_error:
            # If direct iteration fails, maybe it has a '.partition' dict?
            print(f"DEBUG: Direct iteration failed ({iter_error}), trying .partition attribute...")
            if hasattr(partition_result, 'partition') and isinstance(partition_result.partition, dict):
                print("DEBUG: Accessing partition_result.partition.items()")
                for node, community_id in partition_result.partition.items():
                     if community_id not in community_info:
                         community_info[community_id] = []
                     community_info[community_id].append(node)
                processed_count = len(community_info) # Update count
            else:
                print("ERROR: Cannot process partition result - direct iteration failed and no '.partition' dict found.")


        print(f"\nProcessed {len(community_info)} communities.")
        print("\nCommunity Information (Entities Only):")
        if not community_info:
             print("No communities found or partition result could not be processed.")
        for community_id, nodes in community_info.items():
            print(f"Community {community_id}: {nodes}")

    except Exception as e:
        print(f"Error during community detection or processing: {e}")
        # --- Fix for Issue 2 below ---
        print(f"Graph Info: Nodes={G.number_of_nodes()}, Edges={G.number_of_edges()}")

else:
    print("Skipping community detection as the graph has no edges between entities.")

Retrieving triplets...
Found 1634 triplets. Building NetworkX graph (Entity Nodes Only)...
NetworkX graph built with 1658 entity nodes and 1273 edges between them.
Applying community detection...
DEBUG: partition_result type is <class 'graspologic.partition.leiden.HierarchicalClusters'>
DEBUG: Partition item: HierarchicalCluster(node='Pale clammy skin', cluster=0, parent_cluster=None, level=0, is_final_cluster=True), type: <class 'graspologic.partition.leiden.HierarchicalCluster'>
DEBUG: Item is tuple of len 5
DEBUG: Partition item: HierarchicalCluster(node='Person', cluster=0, parent_cluster=None, level=0, is_final_cluster=True), type: <class 'graspologic.partition.leiden.HierarchicalCluster'>
DEBUG: Item is tuple of len 5
DEBUG: Partition item: HierarchicalCluster(node='With blankets after convulsions stop', cluster=0, parent_cluster=None, level=0, is_final_cluster=True), type: <class 'graspologic.partition.leiden.HierarchicalCluster'>
DEBUG: Item is tuple of len 5
DEBUG: Partition i

    problem here is that the noes nd enteties are N/A

In [48]:
# Check if the graph store actually contains data before proceeding
if not hasattr(index.property_graph_store, 'graph') or not index.property_graph_store.graph.nodes or not index.property_graph_store.graph.relations:
     print("\nWarning: The graph store seems empty or lacks relations after indexing. Community building might not produce results.")
else:
     # Now call your custom method on the correct object type
     print("\nCalling get_community_summaries()...")
     # This call should now work
     summaries = index.property_graph_store.build_communities() # Or call build_communities directly

     print("\nCommunity Summaries:")
     if not summaries:
         print("No community summaries were generated.")
     else:
         for cid, summary in summaries.items():
             print(f"--- Community {cid} ---")
             print(summary)
             print("-" * 20)

# Final verification
print(f"\nFinal type of store in index: {type(index.property_graph_store)}")


Calling get_community_summaries()...
DEBUG: Sending message to LLM for community summary:
[ChatMessage(role=<MessageRole.USER: 'user'>, additional_kwargs={}, blocks=[TextBlock(block_type='text', text='System Instructions:\n---\nYou are provided with a set of relationships from a knowledge graph, each represented as entity1->entity2->relation->relationship_description. Your task is to create a summary of these relationships. The summary should include the names of the entities involved and a concise synthesis of the relationship descriptions. The goal is to capture the most critical and relevant details that highlight the nature and significance of each relationship. Ensure that the summary is coherent and integrates the information in a way that emphasizes the key aspects of the relationships.\n---\n\nRelationships to Summarize:\n---\ntext_chunk <-> entity :: SOURCE [N/A]\ntext_chunk <-> text_chunk :: PREVIOUS [N/A]\nentity <-> text_chunk :: SOURCE [N/A]\nentity <-> text_chunk :: SOUR

KeyboardInterrupt: 

In [54]:
from src import get_azure_openai_chat_model
llmR = get_azure_openai_chat_model()

my_custom_graph_store = GraphRAGStore(llm=llmR, max_cluster_size=50)

# Check if the graph store actually contains data before proceeding
if not hasattr(index.property_graph_store, 'graph') or not index.property_graph_store.graph.nodes or not index.property_graph_store.graph.relations:
     print("\nWarning: The graph store seems empty or lacks relations after indexing. Community building might not produce results.")
else:
     # Now call your custom method on the correct object type
     print("\nCalling get_community_summaries()...")
     # This call should now work
     summaries = index.property_graph_store.build_communities() # Or call build_communities directly

     print("\nCommunity Summaries:")
     if not summaries:
         print("No community summaries were generated.")
     else:
         for cid, summary in summaries.items():
             print(f"--- Community {cid} ---")
             print(summary)
             print("-" * 20)

# Final verification
print(f"\nFinal type of store in index: {type(index.property_graph_store)}")

Initialized GraphRAGStore with LLM: <class 'langchain_openai.chat_models.azure.AzureChatOpenAI'>

Calling get_community_summaries()...
Error generating summary for community 0: Error code: 400 - {'error': {'message': "Unsupported value: 'messages[0].role' does not support 'system' with this model.", 'type': 'invalid_request_error', 'param': 'messages[0].role', 'code': 'unsupported_value'}}
Error generating summary for community 1: Error code: 400 - {'error': {'message': "Unsupported value: 'messages[0].role' does not support 'system' with this model.", 'type': 'invalid_request_error', 'param': 'messages[0].role', 'code': 'unsupported_value'}}
Error generating summary for community 2: Error code: 400 - {'error': {'message': "Unsupported value: 'messages[0].role' does not support 'system' with this model.", 'type': 'invalid_request_error', 'param': 'messages[0].role', 'code': 'unsupported_value'}}
Error generating summary for community 3: Error code: 400 - {'error': {'message': "Unsuppor

KeyboardInterrupt: 

In [None]:
pg_store = index.property_graph_store

# --- Access ALL triplets directly from the internal graph object ---
if pg_store and hasattr(pg_store, 'graph') and hasattr(pg_store.graph, 'get_triplets'):
    all_triplets = pg_store.graph.get_triplets() # Access the underlying graph's method

    print(f"Successfully retrieved {len(all_triplets)} triplets directly from the graph.")

    
    for i, triplet in enumerate(all_triplets):
        if i >= 5: break
        # Triplet is usually (source_node, relation_node, target_node)
        # Access properties like triplet[0].id, triplet[0].label, triplet[0].properties
        print(f"Triplet {i}: ({triplet[0].id})-[{triplet[1].label}]->({triplet[2].id})")

else:
    print("Could not access the internal graph or its get_triplets method.")



Successfully retrieved 1639 triplets directly from the graph.
Triplet 0: (Smerten)-[Stråler fra]->(Ryggen og flanken ned mot lysken)
Triplet 1: (Lavt blodsukker hos diabetikere)-[Kan føre til]->(Krampeanfall)
Triplet 2: (Critical)-[Includes]->(Decreasing consciousness)
Triplet 3: (Besvimelsestendens)-[Kan føre til]->(Hjertestans)
Triplet 4: (Astmapasienter)-[Er utsatt for]->(Sirkulasjonssvikt)


In [60]:
all_triplets[0][0].id

'Tilstanden'

In [18]:
import networkx as nx
from graspologic.partition import hierarchical_leiden
from llama_index.core.graph_stores.simple_labelled import SimplePropertyGraphStore

# Initialize the graph store
graph_store = SimplePropertyGraphStore()

# Retrieve triplets
triplets = graph_store.get_triplets()

# Create a NetworkX graph
G = nx.Graph()
for source, relation, target in triplets:
    G.add_edge(source.name, target.name, relationship=relation.label)

# Apply community detection using the Leiden algorithm
partition = hierarchical_leiden(G)

# Process and print community information
community_info = ()
for node, community_id in partition.items():
    if community_id not in community_info:
        community_info[community_id] = []
    community_info[community_id].append(node)

for community_id, nodes in community_info.items():
    print(f"Community {community_id}: {nodes}")


EmptyNetworkError: EmptyNetworkError

In [12]:
from llama_index.core import Settings

Settings.llm = llm
Settings.embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [13]:
retriever = index.as_retriever(
    include_text=False,  # include source text, default True
)

nodes = retriever.retrieve("advice for unconcious adult")

for node in nodes:
    print(node.text)

Hypothermic patients with decreased consciousness -> Need -> Establishing open airways
Critical -> Includes -> Unconscious (unresponsive
Critical -> Includes symptom -> Unconscious (unresponsive
Unconscious adult -> Has condition -> Not breathing normally
Unconscious adult -> Has severity -> Critical
Drowsy decreased consciousness -> Classified as -> Critical


In [None]:
retriever = index.as_retriever(
    include_text=True,  # include source text, default True
)

nodes = retriever.retrieve("i have an adult person who is unconcious from scuba diving")

# for node in nodes:
#     print(node.text)

In [13]:
from neo4j import GraphDatabase

# URI examples: "neo4j://localhost", "neo4j+s://xxx.databases.neo4j.io"
URI = "neo4j+s://97570de5.databases.neo4j.io"
AUTH = ("neo4j", "ShJGrnbkb76SVewfRfnl8I9n64BuOI2RY1MHbb-HhT4")

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    driver.verify_connectivity()

In [None]:
from llama_index.core import SimpleDirectoryReader
from llama_index.core import PropertyGraphIndex
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.core.indices.property_graph import SchemaLLMPathExtractor
from llama_index.llms.azure_openai import AzureOpenAI

from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore

documents = SimpleDirectoryReader("./kgdata/").load_data()
graph_store = Neo4jPropertyGraphStore(
    username="neo4j",
    password="ShJGrnbkb76SVewfRfnl8I9n64BuOI2RY1MHbb-HhT4",
    url="neo4j+s://97570de5.databases.neo4j.io",
)

llm = AzureOpenAI(
    azure_endpoint=endpoint,
    api_key=subscription_key,
    api_version=api_version,
    deployment_name=deployment,
    model_name=model_name,
    temperature=1.0,
    system_prompt=""  # Add this line
)


# Now use this wrapped model with PropertyGraphIndex
index = PropertyGraphIndex.from_documents(
    documents,
    llm=llm,  # Use the wrapped model
    embed_model=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2"),
    show_progress=True,
    property_graph_store=graph_store

)



Parsing nodes: 100%|██████████| 37/37 [00:00<00:00, 60.14it/s]
Extracting paths from text: 100%|██████████| 145/145 [04:54<00:00,  2.03s/it]
Extracting implicit paths: 100%|██████████| 145/145 [00:00<00:00, 16496.88it/s]
Generating embeddings: 100%|██████████| 15/15 [00:10<00:00,  1.43it/s]
Generating embeddings: 100%|██████████| 268/268 [01:52<00:00,  2.39it/s]


In [16]:
import re
import networkx as nx
from graspologic.partition import hierarchical_leiden
from collections import defaultdict

from llama_index.core.llms import ChatMessage
from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore


class GraphRAGStore(Neo4jPropertyGraphStore):
    community_summary = {}
    entity_info = None
    max_cluster_size = 5

    def generate_community_summary(self, text):
        """Generate summary for a given text using an LLM."""
        messages = [
            ChatMessage(
                role="system",
                content=(
                    "You are provided with a set of relationships from a knowledge graph, each represented as "
                    "entity1->entity2->relation->relationship_description. Your task is to create a summary of these "
                    "relationships. The summary should include the names of the entities involved and a concise synthesis "
                    "of the relationship descriptions. The goal is to capture the most critical and relevant details that "
                    "highlight the nature and significance of each relationship. Ensure that the summary is coherent and "
                    "integrates the information in a way that emphasizes the key aspects of the relationships."
                ),
            ),
            ChatMessage(role="user", content=text),
        ]
        response = OpenAI().chat(messages)
        clean_response = re.sub(r"^assistant:\s*", "", str(response)).strip()
        return clean_response

    def build_communities(self):
        """Builds communities from the graph and summarizes them."""
        nx_graph = self._create_nx_graph()
        community_hierarchical_clusters = hierarchical_leiden(
            nx_graph, max_cluster_size=self.max_cluster_size
        )
        self.entity_info, community_info = self._collect_community_info(
            nx_graph, community_hierarchical_clusters
        )
        self._summarize_communities(community_info)

    def _create_nx_graph(self):
        """Converts internal graph representation to NetworkX graph."""
        nx_graph = nx.Graph()
        triplets = self.get_triplets()
        for entity1, relation, entity2 in triplets:
            nx_graph.add_node(entity1.name)
            nx_graph.add_node(entity2.name)
            nx_graph.add_edge(
                relation.source_id,
                relation.target_id,
                relationship=relation.label,
                description=relation.properties["relationship_description"],
            )
        return nx_graph

    def _collect_community_info(self, nx_graph, clusters):
        """
        Collect information for each node based on their community,
        allowing entities to belong to multiple clusters.
        """
        entity_info = defaultdict(set)
        community_info = defaultdict(list)

        for item in clusters:
            node = item.node
            cluster_id = item.cluster

            # Update entity_info
            entity_info[node].add(cluster_id)

            for neighbor in nx_graph.neighbors(node):
                edge_data = nx_graph.get_edge_data(node, neighbor)
                if edge_data:
                    detail = f"{node} -> {neighbor} -> {edge_data['relationship']} -> {edge_data['description']}"
                    community_info[cluster_id].append(detail)

        # Convert sets to lists for easier serialization if needed
        entity_info = {k: list(v) for k, v in entity_info.items()}

        return dict(entity_info), dict(community_info)

    def _summarize_communities(self, community_info):
        """Generate and store summaries for each community."""
        for community_id, details in community_info.items():
            details_text = (
                "\n".join(details) + "."
            )  # Ensure it ends with a period
            self.community_summary[
                community_id
            ] = self.generate_community_summary(details_text)

    def get_community_summaries(self):
        """Returns the community summaries, building them if not already done."""
        if not self.community_summary:
            self.build_communities()
        return self.community_summary

In [17]:
from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore

# Initialize the graph store
graph_store = Neo4jPropertyGraphStore(
    username="neo4j",
    password="ShJGrnbkb76SVewfRfnl8I9n64BuOI2RY1MHbb-HhT4",
    url="neo4j+s://97570de5.databases.neo4j.io",
)

# Build and detect communities
graph_store.build_communities()

# Retrieve community summaries
community_summaries = graph_store.get_community_summaries()
for community_id, summary in community_summaries.items():
    print(f"Community (community_id): {summary}")


AttributeError: 'Neo4jPropertyGraphStore' object has no attribute 'build_communities'

In [21]:
from llama_index.core import Settings
# llm = AzureOpenAI(
#     azure_endpoint=endpoint,
#     api_key=subscription_key,
#     api_version=api_version,
#     deployment_name=deployment,
#     model_name=model_name,
#     temperature=1.0,
#     system_prompt=""  # Add this line
# )
from src import get_azure_openai_chat_model
llmR = get_azure_openai_chat_model()

Settings.llm = llmR
Settings.embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")



In [22]:
# Define retriever
retriever = index.as_retriever(
    include_text=False,  # include source text in returned nodes, default True
)
results = retriever.retrieve("advice for unconcious adult")
for record in results:
    print(record.text)

print("--------------------------------------------------")
# Question answering
query_engine = index.as_query_engine()
response = query_engine.query("i rescuded an adult from scuba diving, what can i do till the paramedics arrive?")
print("--------------------------------------------------")
print(str(response))
print("--------------------------------------------------")

Critical -> Has symptom -> Unconscious unresponsive
Unconscious adult -> Condition -> Breathing normally
Unconscious adult -> Classified as -> Critical
Unconscious adult -> Is -> Not breathing normally
Unconscious child over 1 year -> Is -> Critical
Unconscious infant under 1 year -> Is -> Critical
--------------------------------------------------
--------------------------------------------------
If you have rescued an adult from scuba diving, it is important to monitor their condition closely while waiting for paramedics. Here are some steps you can take:

1. **Check Responsiveness**: Ensure the person is conscious and responsive. If they are unresponsive, check their breathing.

2. **Positioning**: If the person is conscious, help them sit up or find a comfortable position. If they are unresponsive, place them on their side to keep the airway clear.

3. **Monitor Breathing**: Observe their breathing. If they stop breathing or are not breathing normally, you must start CPR immediate