# GraphRAG 项目演示

这个notebook演示了如何使用我们的GraphRAG系统进行知识图谱构建和问答。


In [4]:
import sys
import os
sys.path.append('..')

import asyncio
import json
from src.rag_engine import GraphRAGEngine
from src.utils import load_config, visualize_graph

# 设置异步环境
import nest_asyncio
nest_asyncio.apply()

ImportError: cannot import name 'GraphRAGEngine' from 'src.rag_engine' (/Users/jiakechu/Desktop/llm rl 学习/graphrag/notebooks/../src/rag_engine.py)

## 1. 初始化GraphRAG引擎

In [None]:
# 加载配置
config = load_config('../config/config.yaml')

# 创建引擎
engine = GraphRAGEngine(config)

# 从SQuAD数据集初始化
await engine.initialize_from_dataset('squad')

print("GraphRAG引擎初始化完成！")

## 2. 查看图统计信息

In [None]:
# 获取图统计信息
stats = engine.get_graph_summary()

print("📊 知识图谱统计信息:")
for key, value in stats.items():
    print(f"  {key}: {value}")

## 3. 可视化知识图谱

In [None]:
# 可视化图
visualize_graph(engine.graph, max_nodes=50)

In [None]:
## 4. 进行查询

In [None]:
# 示例查询
questions = [
    "What is artificial intelligence?",
    "How does machine learning work?",
    "What are neural networks?"
]

results = []

for question in questions:
    print(f"\n🔍 问题: {question}")
    
    result = await engine.query(question)
    results.append(result)
    
    print(f"💡 回答: {result['answer'][:200]}...")
    print(f"🎯 置信度: {result['confidence']:.2f}")
    print(f"🏷️  相关实体: {[e['text'] for e in result['entities'][:3]]}")
    print(f"📚 来源数量: {len(result['sources'])}")

## 5. 分析实体信息

In [None]:
# 获取图中的重要实体
degrees = dict(engine.graph.degree())
top_entities = sorted(degrees.items(), key=lambda x: x[1], reverse=True)[:10]

print("🏆 最重要的实体 (按连接数排序):")
for entity, degree in top_entities:
    print(f"  {entity}: {degree} 个连接")
    
    # 获取详细信息
    entity_info = engine.get_entity_info(entity)
    print(f"    频次: {entity_info['frequency']}, 社区: {entity_info['community']}")
    print(f"    邻居: {entity_info['neighbors'][:3]}")
    print()


## 6. 社区分析

In [None]:
# 分析社区
from collections import Counter

community_counts = Counter(engine.communities.values())
print(f"📊 发现 {len(community_counts)} 个社区")

# 显示最大的几个社区
for comm_id, size in community_counts.most_common(5):
    print(f"\n🏘️  社区 {comm_id} (大小: {size})")
    
    comm_info = engine.get_community_info(comm_id)
    print(f"  实体: {comm_info['entities'][:5]}")
    print(f"  密度: {comm_info['density']:.3f}")

## 7. 保存结果

In [None]:
# 保存查询结果
with open('../output/notebook_results.json', 'w', encoding='utf-8') as f:
    json.dump(results, f, ensure_ascii=False, indent=2)

# 保存图结构
engine.save_graph('../output/notebook_graph.json')

print("✅ 结果已保存到 output/ 目录")
