# Exploring the Knowledge Graph

Peek under the hood: raw Cypher queries, graph statistics, Kùzu quirks,
and visualization with networkx.

**Requirements**: `OPENAI_API_KEY` in `.env`. Optional: `networkx`, `matplotlib` for visualization.

## Setup + Seed Data

In [None]:
import shutil
import tempfile
from pathlib import Path

from dotenv import load_dotenv

from tracemem_core import DefaultResourceExtractor, Message, ToolCall, TraceMem, TraceMemConfig

load_dotenv()

_tmpdir = tempfile.mkdtemp(prefix="tracemem_tutorial_")


def project_path(rel: str) -> str:
    """Absolute path within the temp project."""
    return str(Path(_tmpdir) / rel)


config = TraceMemConfig(home=Path(_tmpdir) / ".tracemem")
tm = TraceMem(
    config=config,
    resource_extractor=DefaultResourceExtractor(mode="local", home=config.home),
)
await tm.__aenter__()
print("TraceMem ready.")

In [None]:
# Seed: 3 conversations with tool calls and overlapping files
await tm.import_trace("conv-1", [
    Message(role="user", content="Add input validation to the user registration endpoint."),
    Message(role="assistant", content="I'll check the current routes.", tool_calls=[
        ToolCall(id="t1", name="read_file", args={"file_path": project_path("src/routes/users.py")}),
    ]),
    Message(role="tool", content="@router.post('/register')\nasync def register(data: dict): ...\n", tool_call_id="t1"),
    Message(role="assistant", content="I'll add Pydantic validation models.", tool_calls=[
        ToolCall(id="t2", name="edit_file", args={"file_path": project_path("src/routes/users.py")}),
    ]),
    Message(role="tool", content="from pydantic import BaseModel, EmailStr\n\nclass RegisterRequest(BaseModel):\n    email: EmailStr\n    password: str\n\n@router.post('/register')\nasync def register(data: RegisterRequest): ...\n", tool_call_id="t2"),
    Message(role="assistant", content="Added Pydantic models for registration validation with email and password fields."),
])

await tm.import_trace("conv-2", [
    Message(role="user", content="Write tests for user registration."),
    Message(role="assistant", content="Let me read the endpoint first.", tool_calls=[
        ToolCall(id="t3", name="read_file", args={"file_path": project_path("src/routes/users.py")}),
    ]),
    Message(role="tool", content="from pydantic import BaseModel, EmailStr\n# ... registration endpoint ...\n", tool_call_id="t3"),
    Message(role="assistant", content="Writing test file.", tool_calls=[
        ToolCall(id="t4", name="edit_file", args={"file_path": project_path("tests/test_users.py")}),
    ]),
    Message(role="tool", content="import pytest\n\ndef test_register_valid_email(): ...\ndef test_register_invalid_email(): ...\n", tool_call_id="t4"),
    Message(role="assistant", content="Tests cover valid and invalid email scenarios."),
])

await tm.import_trace("conv-3", [
    Message(role="user", content="Add rate limiting to the registration endpoint."),
    Message(role="assistant", content="Reading the route file.", tool_calls=[
        ToolCall(id="t5", name="read_file", args={"file_path": project_path("src/routes/users.py")}),
    ]),
    Message(role="tool", content="from pydantic import BaseModel\n# ... registration with validation ...\n", tool_call_id="t5"),
    Message(role="assistant", content="I'll also check the middleware setup.", tool_calls=[
        ToolCall(id="t6", name="read_file", args={"file_path": project_path("src/middleware.py")}),
    ]),
    Message(role="tool", content="from fastapi import Request\n# empty middleware\n", tool_call_id="t6"),
    Message(role="assistant", content="Adding rate limiter.", tool_calls=[
        ToolCall(id="t7", name="edit_file", args={"file_path": project_path("src/middleware.py")}),
    ]),
    Message(role="tool", content="from slowapi import Limiter\nlimiter = Limiter(key_func=get_remote_address)\n", tool_call_id="t7"),
    Message(role="assistant", content="Rate limiting added using slowapi with per-IP limits."),
])

print("Seeded 3 conversations.")

## Graph Statistics

Use raw Cypher via `tm._graph_store.execute_cypher()` to inspect the graph.

In [None]:
# Count nodes by type
for label in ["UserText", "AgentText", "ResourceVersion", "Resource"]:
    rows = await tm._graph_store.execute_cypher(
        f"MATCH (n:{label}) RETURN count(n) as cnt"
    )
    print(f"  {label}: {rows[0]['cnt']}")

In [None]:
# Count relationships
for rel_type in ["MESSAGE", "TOOL_USE", "VERSION_OF"]:
    # Kùzu needs typed endpoints for relationship queries
    if rel_type == "MESSAGE":
        # MESSAGE is a rel group — count each sub-type
        combos = [
            ("UserText", "AgentText"),
            ("AgentText", "UserText"),
            ("AgentText", "AgentText"),
        ]
        total = 0
        for src, tgt in combos:
            rows = await tm._graph_store.execute_cypher(
                f"MATCH (:{src})-[r:{rel_type}]->(:{tgt}) RETURN count(r) as cnt"
            )
            total += rows[0]["cnt"]
        print(f"  {rel_type}: {total}")
    elif rel_type == "TOOL_USE":
        rows = await tm._graph_store.execute_cypher(
            f"MATCH (:AgentText)-[r:{rel_type}]->(:ResourceVersion) RETURN count(r) as cnt"
        )
        print(f"  {rel_type}: {rows[0]['cnt']}")
    elif rel_type == "VERSION_OF":
        rows = await tm._graph_store.execute_cypher(
            f"MATCH (:ResourceVersion)-[r:{rel_type}]->(:Resource) RETURN count(r) as cnt"
        )
        print(f"  {rel_type}: {rows[0]['cnt']}")

In [None]:
# List distinct conversations
rows = await tm._graph_store.execute_cypher(
    "MATCH (u:UserText) RETURN DISTINCT u.conversation_id as conv ORDER BY conv"
)
print("Conversations:")
for r in rows:
    print(f"  {r['conv']}")

## Query the Graph Directly

Raw Cypher examples for common graph queries.

In [None]:
# All user messages in a conversation
rows = await tm._graph_store.execute_cypher(
    "MATCH (u:UserText) WHERE u.conversation_id = $cid "
    "RETURN u.text as question, u.turn_index as turn "
    "ORDER BY u.turn_index",
    {"cid": "conv-3"},
)
print("User messages in conv-3:")
for r in rows:
    print(f"  Turn {r['turn']}: {r['question']}")

In [None]:
# Find agent responses that used a specific tool
rows = await tm._graph_store.execute_cypher(
    "MATCH (a:AgentText)-[r:TOOL_USE]->(v:ResourceVersion) "
    "WHERE r.tool_name = $tool "
    "RETURN a.text as agent_text, v.uri as file, a.conversation_id as conv",
    {"tool": "EDIT_FILE"},
)
print("Agent messages that used edit_file:")
for r in rows:
    print(f"  [{r['conv']}] {r['agent_text'][:60]} -> {r['file']}")

In [None]:
# All files (resources) in the graph
rows = await tm._graph_store.execute_cypher(
    "MATCH (r:Resource) RETURN r.uri as uri, r.current_content_hash as hash ORDER BY r.uri"
)
print("Resources tracked:")
for r in rows:
    print(f"  {r['uri']}  (hash: {r['hash'][:12]}...)")

In [None]:
# Walk a conversation via MESSAGE edges
rows = await tm._graph_store.execute_cypher(
    "MATCH (start:UserText)-[:MESSAGE*0..10]->(n) "
    "WHERE start.conversation_id = $cid AND start.turn_index = 1 "
    "AND n.conversation_id = start.conversation_id "
    "RETURN n.id as id, label(n) as type, n.text as text "
    "ORDER BY n.created_at",
    {"cid": "conv-1"},
)
print("Conversation walk (conv-1):")
for r in rows:
    print(f"  [{r['type']}] {r['text'][:70]}")

## Resource Connectivity

Find "hub" resources — files touched by multiple conversations.

In [None]:
rows = await tm._graph_store.execute_cypher(
    "MATCH (a:AgentText)-[:TOOL_USE]->(v:ResourceVersion)-[:VERSION_OF]->(r:Resource) "
    "WITH r.uri as uri, collect(DISTINCT a.conversation_id) as convs "
    "RETURN uri, convs, size(convs) as num_convs "
    "ORDER BY num_convs DESC"
)
print("Resource connectivity (hub files):")
for r in rows:
    print(f"  {r['uri']}: {r['num_convs']} conversations — {r['convs']}")

## Kùzu vs Neo4j Cypher Differences

| Feature | Kùzu | Neo4j |
|---------|------|-------|
| Node label query | `label(n)` → `"UserText"` | `labels(n)` → `["UserText"]` |
| Multi-label match | Use `UNION ALL` | `(n:A OR n:B)` or `(n:A\|B)` |
| Tool relationships | Single `TOOL_USE` table with `tool_name` property | Dynamic rel types (`-[:READ_FILE]->`) |
| Variable-length paths | Max depth 30 | Unlimited |
| Relationship groups | `CREATE REL TABLE GROUP` for multi-type edges | Single rel type per `CREATE` |
| String functions | Standard (`starts_with`, `contains`) | Same |
| Parameters | `$param` syntax | `$param` syntax |

In [None]:
# Example: label(n) returns a string in Kùzu
rows = await tm._graph_store.execute_cypher(
    "MATCH (n:UserText) RETURN label(n) as lbl LIMIT 1"
)
print(f"label() returns: {rows[0]['lbl']!r} (type: {type(rows[0]['lbl']).__name__})")

# Example: UNION ALL to query multiple node types
rows = await tm._graph_store.execute_cypher(
    "MATCH (n:UserText) WHERE n.conversation_id = 'conv-1' "
    "RETURN n.id as id, label(n) as type, n.text as text "
    "UNION ALL "
    "MATCH (n:AgentText) WHERE n.conversation_id = 'conv-1' "
    "RETURN n.id as id, label(n) as type, n.text as text"
)
print(f"\nAll nodes in conv-1 ({len(rows)} nodes):")
for r in rows:
    print(f"  [{r['type']}] {r['text'][:60]}")

In [None]:
# Example: TOOL_USE with tool_name property (instead of dynamic rel types)
rows = await tm._graph_store.execute_cypher(
    "MATCH (a:AgentText)-[r:TOOL_USE]->(v:ResourceVersion) "
    "RETURN r.tool_name as tool, v.uri as file, a.conversation_id as conv "
    "ORDER BY conv, r.created_at"
)
print("All tool uses:")
for r in rows:
    print(f"  [{r['conv']}] {r['tool']}({r['file']})")

## Visualization with networkx

Extract graph data and render with matplotlib.

In [None]:
try:
    import matplotlib.pyplot as plt
    import networkx as nx

    HAS_VIZ = True
except ImportError:
    HAS_VIZ = False
    print("Install networkx and matplotlib for visualization:")
    print("  uv pip install networkx matplotlib")

In [None]:
if HAS_VIZ:
    G = nx.DiGraph()

    # Add message nodes
    for label, color in [("UserText", "#4CAF50"), ("AgentText", "#2196F3")]:
        rows = await tm._graph_store.execute_cypher(
            f"MATCH (n:{label}) RETURN n.id as id, n.text as text, "
            f"n.conversation_id as conv, label(n) as type"
        )
        for r in rows:
            short_id = r["id"][:6]
            short_text = r["text"][:25] + "..." if len(r["text"]) > 25 else r["text"]
            G.add_node(short_id, label=f"{r['type'][0]}:{short_text}", color=color,
                       node_type=r["type"], conv=r["conv"])

    # Add resource nodes
    rows = await tm._graph_store.execute_cypher(
        "MATCH (r:Resource) RETURN r.id as id, r.uri as uri"
    )
    for r in rows:
        short_id = r["id"][:6]
        G.add_node(short_id, label=r["uri"], color="#FF9800", node_type="Resource")

    # Add MESSAGE edges
    for src, tgt in [("UserText", "AgentText"), ("AgentText", "UserText"), ("AgentText", "AgentText")]:
        rows = await tm._graph_store.execute_cypher(
            f"MATCH (s:{src})-[r:MESSAGE]->(t:{tgt}) RETURN s.id as src, t.id as tgt"
        )
        for r in rows:
            G.add_edge(r["src"][:6], r["tgt"][:6], rel="MSG", color="gray")

    # Add TOOL_USE -> VERSION_OF edges (simplified: agent -> resource)
    rows = await tm._graph_store.execute_cypher(
        "MATCH (a:AgentText)-[t:TOOL_USE]->(v:ResourceVersion)-[:VERSION_OF]->(r:Resource) "
        "RETURN a.id as src, r.id as tgt, t.tool_name as tool"
    )
    for r in rows:
        G.add_edge(r["src"][:6], r["tgt"][:6], rel=r["tool"], color="#FF5722")

    # Draw
    fig, ax = plt.subplots(1, 1, figsize=(14, 8))
    pos = nx.spring_layout(G, k=2, seed=42)

    node_colors = [G.nodes[n].get("color", "gray") for n in G.nodes()]
    edge_colors = [G.edges[e].get("color", "gray") for e in G.edges()]

    nx.draw_networkx_nodes(G, pos, node_color=node_colors, node_size=800, alpha=0.9, ax=ax)
    nx.draw_networkx_edges(G, pos, edge_color=edge_colors, arrows=True,
                           arrowsize=15, alpha=0.6, ax=ax)
    labels = {n: G.nodes[n].get("label", n) for n in G.nodes()}
    nx.draw_networkx_labels(G, pos, labels, font_size=6, ax=ax)

    # Legend
    from matplotlib.patches import Patch
    legend_elements = [
        Patch(facecolor="#4CAF50", label="UserText"),
        Patch(facecolor="#2196F3", label="AgentText"),
        Patch(facecolor="#FF9800", label="Resource"),
    ]
    ax.legend(handles=legend_elements, loc="upper left")
    ax.set_title("TraceMem Knowledge Graph")
    plt.tight_layout()
    plt.show()
else:
    print("Skipping visualization (networkx/matplotlib not installed).")

## Using the query_graph.py Skill Script

TraceMem includes a CLI tool for graph inspection outside notebooks.
These commands work from your terminal.

In [None]:
# Show the available commands (these are shell commands — run in terminal)
print("""CLI usage (run from project root):

  # Graph statistics
  uv run .claude/skills/tracemem/query_graph.py --stats

  # File history for a specific file
  uv run .claude/skills/tracemem/query_graph.py --file-history src/routes/users.py

  # Raw Cypher query
  uv run .claude/skills/tracemem/query_graph.py "MATCH (n:UserText) RETURN n.text LIMIT 5"

  # JSON output for scripting
  uv run .claude/skills/tracemem/query_graph.py --json "MATCH (r:Resource) RETURN r.uri"
""")

## Cleanup

In [None]:
await tm.__aexit__(None, None, None)
shutil.rmtree(_tmpdir, ignore_errors=True)
print("Cleaned up.")