# Agentic AI Knowledge Graph - Test Notebook

This notebook tests the Knowledge Graph schema and queries.

**Schema**: Principle -> Method -> Implementation -> StandardVersion

## 1. Setup & Connection

In [1]:
import sys
sys.path.insert(0, '..')

import os
from dotenv import load_dotenv

# .env 파일 로드
load_dotenv()

# 환경 변수 읽기
URI = os.getenv("NEO4J_URI")
USERNAME = os.getenv("NEO4J_USERNAME")
PASSWORD = os.getenv("NEO4J_PASSWORD")
DATABASE = os.getenv("NEO4J_DATABASE")


from src.graph import Neo4jClient

# Create client (uses settings from .env)
client = Neo4jClient()
client.connect()
print("Connected to Neo4j!")

Connected to Neo4j!


## 2. Initialize Database (Optional)

Run this cell to clear and reload seed data. **Skip if data already exists.**

In [None]:
# Uncomment to initialize (clears existing data!)
# stats = client.initialize(clear_first=True)
# print(f"Initialized: {stats.total_nodes} nodes, {stats.total_relationships} relationships")

Clearing all data from database!


Initialized: 61 nodes, 60 relationships


## 3. Database Statistics

In [3]:
stats = client.get_stats()

print(f"Total Nodes: {stats.total_nodes}")
print(f"Total Relationships: {stats.total_relationships}")
print("\nNodes by Label:")
for label, count in sorted(stats.nodes_by_label.items()):
    print(f"  {label}: {count}")
print("\nRelationships by Type:")
for rel_type, count in sorted(stats.relationships_by_type.items()):
    print(f"  {rel_type}: {count}")

Total Nodes: 61
Total Relationships: 60

Nodes by Label:
  Document: 3
  Implementation: 16
  Method: 25
  Principle: 11
  Standard: 3
  StandardVersion: 3

Relationships by Type:
  ADDRESSES: 37
  COMPLIES_WITH: 2
  HAS_VERSION: 3
  IMPLEMENTS: 10
  INTEGRATES_WITH: 3
  PROPOSES: 3
  USES: 2


## 4. Principle -> Method -> Implementation Paths

In [4]:
paths = client.get_principle_method_impl_paths()

print(f"Found {len(paths)} paths\n")
for path in paths[:10]:
    impls = ", ".join(path['implementations'][:3])
    if len(path['implementations']) > 3:
        impls += f" (+{len(path['implementations'])-3})"
    print(f"{path['principle']} <- {path['method']} <- [{impls}]")

Found 12 paths

Grounding <- GraphRAG <- [microsoft/graphrag]
Grounding <- Retrieval-Augmented Generation <- [LangChain, LlamaIndex]
Memory <- MemGPT <- [Mem0]
Memory <- Temporal KG Memory <- [Zep]
Multi-Agent Collaboration <- CAMEL <- [AutoGen]
Multi-Agent Collaboration <- MetaGPT <- [CrewAI]
Planning <- MemGPT <- [Mem0]
Planning <- MetaGPT <- [CrewAI]
Planning <- Workflow Graph Orchestration <- [LangGraph]
Reasoning <- ReAct <- [LangChain, LangGraph]


## 5. Principles Coverage

In [5]:
coverage = client.get_principles_coverage()

print("Principle Coverage (Methods / Implementations):\n")
for row in coverage:
    bar_m = '#' * row['method_count']
    bar_i = '*' * row['impl_count']
    print(f"{row['principle']:25} | M:{row['method_count']:2} {bar_m}")
    print(f"{'':25} | I:{row['impl_count']:2} {bar_i}")

Principle Coverage (Methods / Implementations):

Planning                  | M: 7 #######
                          | I: 3 ***
Reasoning                 | M: 7 #######
                          | I: 2 **
Reflection                | M: 5 #####
                          | I: 0 
Grounding                 | M: 5 #####
                          | I: 3 ***
Memory                    | M: 4 ####
                          | I: 2 **
Tool Use & Action         | M: 4 ####
                          | I: 3 ***
Learning                  | M: 3 ###
                          | I: 0 
Multi-Agent Collaboration | M: 2 ##
                          | I: 2 **
Perception                | M: 0 
                          | I: 0 
Guardrails                | M: 0 
                          | I: 0 
Tracing                   | M: 0 
                          | I: 0 


## 6. Method Family Distribution

In [None]:
families = client.get_method_family_distribution()

print("Method Family Distribution:\n")
for row in families:
    bar = '#' * row['count']
    print(f"{row['family']:30} | {row['count']:2} {bar}")

## 7. Standard Compliance

In [None]:
compliance = client.get_standard_compliance()

if compliance:
    print("Standard Compliance:\n")
    for row in compliance:
        print(f"{row['implementation']} -> {row['standard']} v{row['version']}")
        print(f"  Role: {row['role']}, Level: {row['level']}")
else:
    print("No standard compliance relationships found")

## 8. Query Specific Method (ReAct)

In [None]:
# Get ReAct method details
react = client.get_node("Method", "m:react")
if react:
    print("ReAct Method:")
    print(f"  Name: {react['name']}")
    print(f"  Family: {react['method_family']}")
    print(f"  Type: {react['method_type']}")
    print(f"  Granularity: {react['granularity']}")
    print(f"  Year: {react.get('year_introduced', 'N/A')}")
    print(f"  Maturity: {react['maturity']}")
    print(f"  Description: {react['description'][:100]}...")

# Get implementations
print("\nImplementations of ReAct:")
impls = client.get_implementations_by_method("m:react")
for impl in impls:
    print(f"  - {impl['name']} ({impl['impl_type']}) - {impl['support_level']}")

## 9. Composite Methods

In [None]:
composites = client.get_composite_methods()

print("Composite Methods:\n")
for c in composites:
    components = " + ".join(c['components'])
    print(f"{c['composite_method']} = {components}")

## 10. Data Quality Validation

In [None]:
print("Data Quality Checks:\n")

# Orphan methods (not connected to Principle)
orphan_methods = client.get_orphan_methods()
print(f"Methods without ADDRESSES -> Principle: {len(orphan_methods)}")
for m in orphan_methods:
    print(f"  - {m['name']}")

# Orphan implementations (not connected to Method)
orphan_impls = client.get_orphan_implementations()
print(f"\nImplementations without IMPLEMENTS -> Method: {len(orphan_impls)}")
for i in orphan_impls:
    print(f"  - {i['name']}")

# Uncovered principles (no methods)
uncovered = client.get_uncovered_principles()
print(f"\nPrinciples with no methods: {len(uncovered)}")
for p in uncovered:
    print(f"  - {p['name']}")

# Methods without paper
no_paper = client.get_methods_without_paper()
print(f"\nMethods without paper/seminal_source: {len(no_paper)}")

## 11. Custom Cypher Query

In [None]:
# Example: Find all RAG-related methods
query = """
MATCH (m:Method)
WHERE m.method_family = 'retrieval_grounding'
RETURN m.name AS name, m.description AS description
ORDER BY m.name
"""

results = client.run_cypher(query)
print("Retrieval/Grounding Methods:\n")
for r in results:
    print(f"{r['name']}")
    print(f"  {r['description'][:80]}...\n")

## 12. Cleanup

In [None]:
client.close()
print("Connection closed.")

## 13. Using driver.execute_query() 

`driver.execute_query()` is a simplified API for executing Cypher queries without explicit session management. It's ideal for simple, one-off queries.

In [None]:
# Reconnect for driver.execute_query() examples
from neo4j import GraphDatabase

driver = GraphDatabase.driver(URI, auth=(USERNAME, PASSWORD))

print("Driver created for execute_query() examples")

### Example 1: Simple Read Query

Basic query without parameters. Returns `EagerResult` with records, keys, and summary.

In [None]:
# Example 1: Simple read query
query = """
MATCH (p:Principle)
RETURN p.id AS id, p.name AS name
ORDER BY p.name
LIMIT 5
"""

result = driver.execute_query(query, database_=DATABASE)

# Result is an EagerResult with records, keys, and summary
print(f"Keys: {result.keys}")
print(f"Records count: {len(result.records)}\n")

for record in result.records:
    print(f"  {record['id']}: {record['name']}")

# Access summary metadata
print(f"\nQuery completed in {result.summary.result_available_after} ms")

### Example 2: Query with Parameters

Using parameterized queries to prevent injection and improve query plan caching.

In [None]:
# Example 2: Query with parameters
query = """
MATCH (p:Principle {id: $principle_id})<-[a:ADDRESSES]-(m:Method)
RETURN m.name AS method, a.role AS role, a.weight AS weight
ORDER BY a.weight DESC
LIMIT $limit
"""

parameters = {
    "principle_id": "p:reasoning",
    "limit": 5
}

result = driver.execute_query(query, parameters_=parameters, database_=DATABASE)

print(f"Methods addressing '{parameters['principle_id']}':\n")
for record in result.records:
    print(f"  {record['method']:30} | role: {record['role']:10} | weight: {record['weight']:.1f}")

print(f"\nFound {len(result.records)} methods")

### Example 3: Write Query (Create Node)

Creating a test node and then deleting it.

In [None]:
# Example 3: Write query - Create a test node
create_query = """
CREATE (m:Method {
    id: $id,
    name: $name,
    method_family: 'test',
    method_type: 'test',
    granularity: 'atomic',
    description: 'Test method for execute_query() example',
    maturity: 'research'
})
RETURN m.id AS id, m.name AS name
"""

params = {
    "id": "m:test-method",
    "name": "Test Method"
}

result = driver.execute_query(create_query, parameters_=params, database_=DATABASE)
created = result.records[0]
print(f"Created: {created['name']} ({created['id']})")

# Verify it exists
verify_query = "MATCH (m:Method {id: $id}) RETURN count(m) AS count"
result = driver.execute_query(verify_query, parameters_={"id": "m:test-method"}, database_=DATABASE)
print(f"Verification: {result.records[0]['count']} node(s) found")

# Clean up - delete the test node
delete_query = "MATCH (m:Method {id: $id}) DELETE m"
driver.execute_query(delete_query, parameters_={"id": "m:test-method"}, database_=DATABASE)
print("Test node deleted")

### Example 4: Aggregation with Result Transformation

Using `result_transformer_` to customize how results are returned.

In [None]:
# Example 4: Aggregation with result_transformer_
from neo4j import Result

# Custom transformer: convert records to list of dicts
def to_dict_list(result: Result):
    """Transform result to list of dictionaries."""
    return [dict(record) for record in result]

query = """
MATCH (m:Method)
RETURN m.method_family AS family, count(*) AS count
ORDER BY count DESC
"""

# Use result_transformer_ to get list of dicts directly
families = driver.execute_query(
    query,
    database_=DATABASE,
    result_transformer_=to_dict_list
)

print("Method families (using custom transformer):\n")
for family in families:
    bar = '#' * family['count']
    print(f"  {family['family']:30} | {family['count']:2} {bar}")

print(f"\nTotal families: {len(families)}")

### Example 5: Complex Path Query

Finding paths through the knowledge graph.

In [None]:
# Example 5: Complex path query
query = """
MATCH path = (p:Principle)<-[:ADDRESSES]-(m:Method)<-[:IMPLEMENTS]-(i:Implementation)
WHERE p.id = $principle_id
RETURN p.name AS principle,
       m.name AS method,
       collect(DISTINCT i.name) AS implementations,
       length(path) AS path_length
ORDER BY size(implementations) DESC
LIMIT 3
"""

result = driver.execute_query(
    query,
    parameters_={"principle_id": "p:reasoning"},
    database_=DATABASE
)

print("Top 3 Reasoning methods by implementation count:\n")
for record in result.records:
    impls = ", ".join(record['implementations'][:3])
    if len(record['implementations']) > 3:
        impls += f" (+{len(record['implementations']) - 3} more)"
    
    print(f"Method: {record['method']}")
    print(f"  Implementations ({len(record['implementations'])}): {impls}")
    print(f"  Path length: {record['path_length']}\n")

# Access summary for query statistics
summary = result.summary
print(f"Query type: {summary.query_type}")
print(f"Nodes created: {summary.counters.nodes_created}")
print(f"Relationships created: {summary.counters.relationships_created}")

### Key Features of `execute_query()`

**Advantages:**
- **Simplified API**: No need to manage sessions explicitly
- **Automatic retry**: Built-in retry logic for transient failures
- **Result transformation**: Custom transformers via `result_transformer_`
- **Routing control**: `routing_` parameter for cluster routing

**When to use:**
- Simple, one-off queries
- Read queries that don't need transaction control
- Quick prototyping and testing

**When NOT to use:**
- Complex multi-query transactions
- Need explicit session/transaction management
- Streaming large result sets (use session with streaming)

In [None]:
# Close driver connection
driver.close()
print("Driver closed. All examples completed!")