# GraphRAG Knowledge Graph Exploration

This notebook helps you explore the knowledge graph built by GraphRAG 3.0.x.

**Knowledge Graph Stats (10 documents):**
- 147 entities
- 263 relationships
- 32 communities

**Prerequisites:**
- Run `poetry run python -m core.index` to build the knowledge graph
- Ensure output files exist in `output/` directory (*.parquet files)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
from pathlib import Path

# Set up paths
output_dir = Path("..") / "output"

print(f"üìÇ Output directory: {output_dir}")
print(f"   Exists: {output_dir.exists()}")

## 1. Load Entities

In [None]:
# Load entities (GraphRAG 3.x uses entities.parquet)
entities_df = pd.read_parquet(output_dir / "entities.parquet")

print(f"üìä Total Entities: {len(entities_df)}")
print(f"\nüè∑Ô∏è  Entity Types:")
print(entities_df['type'].value_counts())

entities_df.head()

In [None]:
# Display entities in a table with their types and connection counts
entity_table = entities_df[['title', 'type']].copy()
entity_table.columns = ['Entity', 'Type']

# We'll add connections after building the graph (for now show without)
entity_table = entity_table.sort_values('Type').reset_index(drop=True)
entity_table.index = entity_table.index + 1  # Start index at 1

In [None]:
entity_table.head(20)

In [None]:
entity_table.tail(15)

## 2. Load Relationships

In [None]:
# Load relationships (GraphRAG 3.x uses relationships.parquet)
relationships_df = pd.read_parquet(output_dir / "relationships.parquet")

print(f"üîó Total Relationships: {len(relationships_df)}")
print(f"\nüìà Relationship Types:")
if 'description' in relationships_df.columns:
    print(relationships_df['description'].value_counts().head(10))
else:
    print("No relationship types found")

relationships_df.head()

## 3. Visualize Knowledge Graph

Create a network graph visualization of entities and relationships.

In [None]:
# Create a NetworkX graph
G = nx.Graph()

# Add nodes (entities) - GraphRAG 3.x uses 'title' column
for idx, row in entities_df.iterrows():
    G.add_node(row['title'], type=row['type'])

# Add edges (relationships) - limit to top relationships for visualization
top_relationships = relationships_df.head(50)  # Limit for readability
for idx, row in top_relationships.iterrows():
    if 'source' in row and 'target' in row:
        G.add_edge(row['source'], row['target'], relationship=row.get('description', 'related'))

print(f"üìä Graph Stats:")
print(f"   Nodes: {G.number_of_nodes()}")
print(f"   Edges: {G.number_of_edges()}")
if G.number_of_nodes() > 1:
    print(f"   Density: {nx.density(G):.4f}")

In [None]:
# Visualize the graph
if G.number_of_nodes() == 0:
    print("‚ö†Ô∏è  No nodes to visualize. Check if entities and relationships were loaded correctly.")
else:
    plt.figure(figsize=(16, 12))

    # Use spring layout for positioning
    pos = nx.spring_layout(G, k=0.5, iterations=50)

    # Color nodes by entity type
    entity_types = entities_df.set_index('title')['type'].to_dict()
    node_colors = [entity_types.get(node, 'unknown') for node in G.nodes()]

    # Create color map
    unique_types = list(set(node_colors))
    color_map = plt.cm.Set3(range(len(unique_types)))
    type_to_color = {t: color_map[i] for i, t in enumerate(unique_types)}
    node_colors_rgb = [type_to_color[t] for t in node_colors]

    # Draw network
    nx.draw_networkx_nodes(G, pos, node_color=node_colors_rgb, node_size=500, alpha=0.8)
    nx.draw_networkx_edges(G, pos, alpha=0.3, width=1)
    nx.draw_networkx_labels(G, pos, font_size=8, font_weight='bold')

    plt.title("Knowledge Graph Visualization (Top 50 Relationships)", fontsize=16)
    plt.axis('off')
    plt.tight_layout()
    plt.show()

    # Legend
    print("\nüé® Legend:")
    for entity_type in unique_types:
        count = sum(1 for t in node_colors if t == entity_type)
        print(f"   {entity_type}: {count} nodes")

## 4. Explore Communities

In [None]:
# Load community reports (GraphRAG 3.x uses community_reports.parquet)
communities_df = pd.read_parquet(output_dir / "community_reports.parquet")

print(f"üèòÔ∏è  Total Communities: {len(communities_df)}")
print(f"\nüìã Community Levels:")
if 'level' in communities_df.columns:
    print(communities_df['level'].value_counts())
else:
    print("No level information found")

communities_df.head()

In [None]:
# Show sample community summaries
print("üìñ Sample Community Summaries:\n")

for idx, row in communities_df.head(3).iterrows():
    community_id = row.get('community', row.get('id', 'Unknown'))
    level = row.get('level', 'N/A')
    print(f"Community {community_id} (Level {level}):")
    print(f"Title: {row.get('title', 'N/A')}")
    summary = row.get('summary', row.get('full_content', 'N/A'))
    if isinstance(summary, str) and len(summary) > 200:
        print(f"Summary: {summary[:200]}...")
    else:
        print(f"Summary: {summary}")
    print("\n" + "="*60 + "\n")

## 5. Entity Analysis

Find the most connected entities (highest degree centrality).

In [None]:
# Calculate degree centrality
degree_centrality = nx.degree_centrality(G)

# Sort by centrality
top_entities = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)[:10]

print("‚≠ê Top 10 Most Connected Entities:\n")
for entity, centrality in top_entities:
    entity_type = entity_types.get(entity, 'unknown')
    print(f"   {entity} ({entity_type}): {centrality:.3f}")

In [None]:
# Visualize degree distribution
degrees = [G.degree(n) for n in G.nodes()]

plt.figure(figsize=(10, 6))
plt.hist(degrees, bins=20, edgecolor='black', alpha=0.7)
plt.xlabel('Degree (Number of Connections)')
plt.ylabel('Frequency')
plt.title('Degree Distribution of Knowledge Graph')
plt.grid(True, alpha=0.3)
plt.show()

print(f"\nüìä Degree Statistics:")
print(f"   Mean: {sum(degrees)/len(degrees):.2f}")
print(f"   Max: {max(degrees)}")
print(f"   Min: {min(degrees)}")

## 6. Search Examples

Test some search queries (requires running search scripts).

In [None]:
print("üí° Search Queries using Python API:\n")
print("# In terminal:")
print("# Local Search (specific questions):")
print('poetry run python -m core.example "Who leads Project Alpha?"')
print('poetry run python -m core.example "What technologies are used in Project Alpha?"')
print('poetry run python -m core.example "Who resolved the GraphRAG index corruption incident?"')
print("\n# Global Search (thematic questions):")
print('poetry run python -m core.example "What are the main projects?" --type global')
print('poetry run python -m core.example "Summarize the organizational structure" --type global')
print("\n# Or use Python API directly:")
print("""
import asyncio
from core import load_all, local_search, global_search

data = load_all()

# Local search
response, context = asyncio.run(local_search("Who leads Project Alpha?", data))
print(response)

# Global search  
response, context = asyncio.run(global_search("What are the main projects?", data))
print(response)
""")

## Next Steps

1. **Run More Queries:** Try cross-document queries like "Who resolved the GraphRAG index corruption incident?"
2. **Explore Different Entity Types:** Filter the graph by specific entity types
3. **Analyze Specific Communities:** Deep dive into community structures
4. **Add More Documents:** Expand the knowledge graph with additional content

**Python API Reference:**
```python
from core import load_all, local_search, global_search, drift_search, basic_search
```

For Part 2, we'll explore:
- FastMCP server integration
- RESTful API for GraphRAG queries
- Production deployment patterns