In [1]:
# %load_ext autoreload
# %autoreload 2

import os

from neo4j import GraphDatabase
from ms_graphrag_neo4j import CustomGraphRAG

In [2]:
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Access the environment variables
neo4j_uri = os.getenv("NEO4J_URI")
neo4j_username = os.getenv("NEO4J_USERNAME")
neo4j_password = os.getenv("NEO4J_PASSWORD")

In [3]:
max_workers = os.cpu_count()  # Total logical cores (threads)
print(f"Available logical CPU cores: {max_workers}")

Available logical CPU cores: 8


In [None]:
# Connect to Neo4j
driver = GraphDatabase.driver(
    os.environ["NEO4J_URI"], 
    auth=(os.environ["NEO4J_USERNAME"], os.environ["NEO4J_PASSWORD"])
)

# Initialize MsGraphRAG
ms_graph = CustomGraphRAG(driver=driver, max_workers=0)  # No concurrent work

In [5]:
# Define example texts and entity types
example_texts = [
    "Tomaz works for Neo4j",
    "Tomaz lives in Grosuplje", 
    "Tomaz went to school in Grosuplje"
]
allowed_entities = ["Person", "Organization", "Location"]

In [6]:
print(example_texts, allowed_entities)

['Tomaz works for Neo4j', 'Tomaz lives in Grosuplje', 'Tomaz went to school in Grosuplje'] ['Person', 'Organization', 'Location']


In [7]:
# Extract entities and relationships
result = await ms_graph.extract_nodes_and_rels(example_texts, allowed_entities)
print(result)

All Tasks are completely processed.                 
Not Using semaphore to to limit concurrent tasks


Extracting nodes & relationships:  33%|███▎      | 1/3 [00:01<00:03,  1.51s/it]

ChatCompletion(id='gen-1747115236-NpZqvNWoGLeBoyoEjzh9', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='("entity";TOMAZ;PERSON;Tomaz is an individual who attended school in Grosuplje)\n|\n("entity";GROSUPLJE;LOCATION;Grosuplje is a location where Tomaz attended school)', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None, reasoning=None), native_finish_reason='stop')], created=1747115236, model='openai/gpt-4o', object='chat.completion', service_tier=None, system_fingerprint='fp_d8864f8b6b', usage=CompletionUsage(completion_tokens=48, prompt_tokens=1529, total_tokens=1577, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=None, audio_tokens=None, reasoning_tokens=0, rejected_prediction_tokens=None), prompt_tokens_details=PromptTokensDetails(audio_tokens=None, cached_tokens=1408)), provider='OpenAI')


Extracting nodes & relationships: 100%|██████████| 3/3 [00:01<00:00,  1.60it/s]

ChatCompletion(id='gen-1747115236-swxwtgPNUY9j2TWhmewj', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='("entity";TOMAZ;PERSON;Tomaz is an individual who works for Neo4j)\n|\n("entity";NEO4J;ORGANIZATION;Neo4j is an organization that employs Tomaz)\n|\n("relationship";TOMAZ;NEO4J;Tomaz is employed by Neo4j;8)', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None, reasoning=None), native_finish_reason='stop')], created=1747115236, model='openai/gpt-4o', object='chat.completion', service_tier=None, system_fingerprint='fp_d8864f8b6b', usage=CompletionUsage(completion_tokens=70, prompt_tokens=1527, total_tokens=1597, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=None, audio_tokens=None, reasoning_tokens=0, rejected_prediction_tokens=None), prompt_tokens_details=PromptTokensDetails(audio_tokens=None, cached_tokens=1408)), provider='OpenAI')
ChatCompletion(id='ge




In [8]:
# Generate summaries for nodes and relationships
result = await ms_graph.summarize_nodes_and_rels()
print(result)

Summarizing nodes:  33%|███▎      | 1/3 [00:01<00:02,  1.00s/it]

ChatCompletion(id='gen-1747115240-uNHU1KcdHxa8LsNAZhmL', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Neo4j is an organization that employs Tomaz.', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None, reasoning=None), native_finish_reason='stop')], created=1747115240, model='openai/gpt-4o', object='chat.completion', service_tier=None, system_fingerprint='fp_d8864f8b6b', usage=CompletionUsage(completion_tokens=12, prompt_tokens=182, total_tokens=194, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=None, audio_tokens=None, reasoning_tokens=0, rejected_prediction_tokens=None), prompt_tokens_details=PromptTokensDetails(audio_tokens=None, cached_tokens=0)), provider='OpenAI')


Summarizing nodes: 100%|██████████| 3/3 [00:01<00:00,  2.15it/s]

ChatCompletion(id='gen-1747115240-K51xizaLhQIqI70w6cY6', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Grosuplje is a location where Tomaz both attended school and currently lives.', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None, reasoning=None), native_finish_reason='stop')], created=1747115240, model='openai/gpt-4o', object='chat.completion', service_tier=None, system_fingerprint='fp_d8864f8b6b', usage=CompletionUsage(completion_tokens=18, prompt_tokens=240, total_tokens=258, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=None, audio_tokens=None, reasoning_tokens=0, rejected_prediction_tokens=None), prompt_tokens_details=PromptTokensDetails(audio_tokens=None, cached_tokens=0)), provider='OpenAI')
ChatCompletion(id='gen-1747115240-Zt7XgQrtzDBB87pV7fol', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(conten

Summarizing nodes: 100%|██████████| 3/3 [00:01<00:00,  1.80it/s]
Summarizing relationships:  50%|█████     | 1/2 [00:01<00:01,  1.04s/it]

ChatCompletion(id='gen-1747115242-cCflSPhdv7j2lbKrwz37', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Tomaz is employed by Neo4j.', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None, reasoning=None), native_finish_reason='stop')], created=1747115242, model='openai/gpt-4o', object='chat.completion', service_tier=None, system_fingerprint='fp_d8864f8b6b', usage=CompletionUsage(completion_tokens=10, prompt_tokens=174, total_tokens=184, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=None, audio_tokens=None, reasoning_tokens=0, rejected_prediction_tokens=None), prompt_tokens_details=PromptTokensDetails(audio_tokens=None, cached_tokens=0)), provider='OpenAI')


Summarizing relationships: 100%|██████████| 2/2 [00:01<00:00,  1.24it/s]

ChatCompletion(id='gen-1747115242-n05uYcifJXJc8iMWZFeV', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Tomaz is a resident of Grosuplje, where he also attended school.', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None, reasoning=None), native_finish_reason='stop')], created=1747115242, model='openai/gpt-4o', object='chat.completion', service_tier=None, system_fingerprint='fp_d8864f8b6b', usage=CompletionUsage(completion_tokens=17, prompt_tokens=181, total_tokens=198, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=None, audio_tokens=None, reasoning_tokens=0, rejected_prediction_tokens=None), prompt_tokens_details=PromptTokensDetails(audio_tokens=None, cached_tokens=0)), provider='OpenAI')
Successfuly summarized nodes and relationships





In [9]:
# Identify and summarize communities
result = await ms_graph.summarize_communities()
print(result)

Leiden algorithm identified 1 community levels with 1 communities on the last level.


Summarizing communities: 100%|██████████| 1/1 [00:08<00:00,  8.85s/it]

ChatCompletion(id='gen-1747115246-dGdGqTDfQknhMGENQUE7', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='```json\n{\n    "title": "Tomaz, Grosuplje, and Neo4j Community",\n    "summary": "The community is centered around Tomaz, an individual residing in Grosuplje, who is employed by Neo4j. The relationships within this community highlight Tomaz\'s connection to his place of residence and employment, forming a simple yet interconnected network.",\n    "rating": 2.0,\n    "rating_explanation": "The impact severity rating is low due to the limited scope and influence of the community\'s entities.",\n    "findings": [\n        {\n            "summary": "Tomaz\'s Residency in Grosuplje",\n            "explanation": "Tomaz is a resident of Grosuplje, a location where he also attended school. This connection indicates a strong personal and educational tie to the area, suggesting that Grosuplje plays a significant role in his life. The relat




In [11]:
# Close the connection
ms_graph.close()