# 批量基因分析 - 简洁版

In [1]:
import asyncio
import sys
from datetime import datetime

# 添加项目路径
sys.path.append('.')

from agent_core.agents.specialists.literature_expert import LiteratureExpert
from agent_core.agents.specialists.clinical_expert import ClinicalExpert
from agent_core.agents.specialists.commercial_expert import CommercialExpert
from agent_core.config.analysis_config import ConfigManager

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
async def analyze_gene_batch(genes, mode='standard', types=['literature', 'clinical', 'commercial']):
    """批量分析基因 - 每个基因单独保存文件"""
    # 获取配置
    if mode == 'quick':
        config = ConfigManager.get_quick_config()
    elif mode == 'standard':
        config = ConfigManager.get_standard_config()
    else:
        config = ConfigManager.get_deep_config()
    
    # 初始化专家（传入配置）
    experts = {}
    if 'literature' in types:
        experts['literature'] = LiteratureExpert(config)
    if 'clinical' in types:
        experts['clinical'] = ClinicalExpert(config)
    if 'commercial' in types:
        experts['commercial'] = CommercialExpert(config)
    
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    saved_files = []
    
    # 分析每个基因并单独保存
    for i, gene in enumerate(genes, 1):
        print(f"\n[{i}/{len(genes)}] 分析 {gene}")
        
        # 保存单个基因的报告
        filename = f"{gene}_analysis_{timestamp}.md"
        with open(filename, 'w', encoding='utf-8') as f:
            f.write(f"# {gene} 基因分析报告\n\n")
            f.write(f"生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
            f.write(f"分析模式: {mode}\n\n")
            
            # 文献分析
            if 'literature' in experts:
                try:
                    print(f"  📚 文献分析中...")
                    result = await experts['literature'].analyze(gene)
                    f.write(f"## 文献分析\n")
                    f.write(f"- 分析文献数: {result.total_literature}\n")
                    f.write(f"- 文本块数: {result.total_chunks}\n")
                    f.write(f"- 置信度: {result.confidence_score:.2f}\n\n")
                    f.write("### 疾病机制\n" + result.disease_mechanism + "\n\n")
                    f.write("### 治疗策略\n" + result.treatment_strategy + "\n\n")
                    f.write("### 靶点分析\n" + result.target_analysis + "\n\n")
                    print(f"     ✓ {result.total_literature} 篇文献")
                except Exception as e:
                    f.write(f"## 文献分析\n❌ 错误: {str(e)}\n\n")
                    print(f"     ✗ 失败: {e}")
            

        
        saved_files.append(filename)
        print(f"  📄 已保存: {filename}")
    
    print(f"\n✅ 完成! 共保存 {len(saved_files)} 个文件:")
    for f in saved_files:
        print(f"  • {f}")
    
    return saved_files

In [4]:
# 使用示例 - 分析你的基因列表
my_genes = ['PNPLA3']

# 运行分析（每个基因会保存为独立的md文件）
saved_files = await analyze_gene_batch(
    genes=my_genes,
    mode='deep',  # 'quick'/'standard'/'deep'
    types=['literature']  # 选择需要的分析
)


[1/1] 分析 PNPLA3
  📚 文献分析中...
🏗️ 构建文献索引: PNPLA3 (gene)
📚 检索文献: PNPLA3 (gene)
   目标: 1000 篇
  🔍 搜索策略: PNPLA3[Title/Abstract]
    ✅ 新增 169 篇，累计 169 篇
  🔍 搜索策略: "PNPLA3" AND (disease OR treatment OR therapy)
    ✅ 新增 189 篇，累计 297 篇
  🔍 搜索策略: PNPLA3 AND (clinical trial[Publication Type] OR clinical study[Publication Type])
    ✅ 新增 41 篇，累计 335 篇
  🔍 搜索策略: PNPLA3 AND (mechanism OR pathway OR function)
    ✅ 新增 194 篇，累计 400 篇
  🔍 搜索策略: PNPLA3 AND (drug OR inhibitor OR target OR therapeutic)
    ✅ 新增 194 篇，累计 460 篇
📊 检索完成: 共 460 篇文献
📝 开始文本分块，块大小: 250
✅ 分块完成: 460 篇 → 3878 块
🔍 构建向量索引，模型: /public/home/chenziqing/agent_test/agent_core/agents/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/c9745ed1d9f207416be6d2e6f8de32d1f16199bf
  📊 编码 3878 个文本块...


Batches: 100%|██████████| 122/122 [00:13<00:00,  8.89it/s]


✅ 索引构建完成: 3878 块, 维度: 384
💾 缓存已保存: decad06ce8592252293a9cc0d9c95c9a
🤖 开始RAG查询...
🤖 RAG查询: PNPLA3 - disease_mechanism
  📊 检索到 300 个相关块
🤖 RAG查询: PNPLA3 - treatment_strategy
  📊 检索到 300 个相关块
🤖 RAG查询: PNPLA3 - target_analysis
  📊 检索到 300 个相关块
     ✓ 460 篇文献
  📄 已保存: PNPLA3_analysis_20250901_174839.md

✅ 完成! 共保存 1 个文件:
  • PNPLA3_analysis_20250901_174839.md
