In [11]:
# test_literature.ipynb

# %% [markdown]
# # Literature Expert 测试
# 测试文献分析模块的基本功能

# %% Cell 1: 环境设置和导入
import asyncio
import json
from dataclasses import dataclass
from typing import Optional, List

# 添加项目路径
import sys
sys.path.append('/public/home/chenziqing/agent_test_2')

from agent_core.agents.specialists.literature_expert import LiteratureExpert
from agent_core.agents.control_agent import Entity



In [12]:
# %% Cell 2: 定义测试实体
# 创建不同类型的测试实体

# 测试1: 疾病+靶点
entity1 = Entity(
    disease="lung cancer",
    target="EGFR",
    drug=None,
    therapy=None
)

# 测试2: 疾病+治疗方式
entity2 = Entity(
    disease="breast cancer",
    target=None,
    drug=None,
    therapy="CAR-T"
)

# 测试3: 靶点+药物
entity3 = Entity(
    disease=None,
    target="PD-1",
    drug="pembrolizumab",
    therapy=None
)

print("测试实体已创建")



测试实体已创建


In [13]:
# %% Cell 3: 初始化Expert
expert = LiteratureExpert()
print("Literature Expert 已初始化")

# %% Cell 4: 测试基本功能
async def test_basic():
    """测试基本的文献分析功能"""
    
    print("=" * 60)
    print("测试1: EGFR in lung cancer")
    print("=" * 60)
    
    result = await expert.analyze(
        entity=entity1,
        search_terms=["EGFR", "lung cancer", "NSCLC", "erlotinib"],
        focus="EGFR mutations in lung cancer treatment"
    )
    
    print(f"检索到文献: {result['total_papers']} 篇")
    print(f"证据等级: {result['evidence_level']}")
    print(f"使用查询: {result['query_used']}")
    
    # 查看分析维度
    if result['analysis']:
        print("\n完成的分析维度:")
        for dim, content in result['analysis'].items():
            chunks = content.get('chunks_used', 0)
            print(f"  - {dim}: 使用 {chunks} 个文献片段")
    
    # 显示前3篇关键文献
    if result['key_papers']:
        print("\n关键文献 (前3篇):")
        for i, paper in enumerate(result['key_papers'][:3], 1):
            print(f"  {i}. {paper['title'][:80]}...")
            print(f"     {paper['journal']} ({paper['year']})")
    
    return result

# 运行测试
result1 = await test_basic()



No sentence-transformers model found with name sentence-transformers/all-MiniLM-L6-v2. Creating a new one with mean pooling.


OSError: We couldn't connect to 'https://huggingface.co' to load the files, and couldn't find them in the cached files.
Check your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'.

In [18]:
# 在notebook中检查配置
from agent_core.config.settings import config
print("Config attributes:", dir(config))
print("Embedding model:", config.embedding_model)

Config attributes: ['__annotations__', '__class__', '__dataclass_fields__', '__dataclass_params__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__match_args__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'embedding_model', 'llm_max_tokens', 'llm_temperature', 'max_articles', 'max_chunks_per_query', 'pubmed_api_key', 'pubmed_email']
Embedding model: all-MiniLM-L6-v2


In [None]:
# %% Cell 5: 测试治疗方式分析
async def test_therapy():
    """测试治疗方式相关分析"""
    
    print("\n" + "=" * 60)
    print("测试2: CAR-T in breast cancer")
    print("=" * 60)
    
    result = await expert.analyze(
        entity=entity2,
        search_terms=["CAR-T", "chimeric antigen receptor", "breast cancer", "T cell therapy"],
        focus="CAR-T cell therapy for breast cancer"
    )
    
    print(f"检索到文献: {result['total_papers']} 篇")
    print(f"证据等级: {result['evidence_level']}")
    
    # 查看治疗策略分析
    if 'treatment_strategy' in result['analysis']:
        treatment = result['analysis']['treatment_strategy']
        print(f"\n治疗策略分析:")
        print(f"  使用文献片段: {treatment.get('chunks_used', 0)}")
        if treatment.get('content'):
            # 显示分析内容的前500字
            content_preview = treatment['content'][:500]
            print(f"  内容预览: {content_preview}...")
    
    return result

# 运行测试
result2 = await test_therapy()

# %% Cell 6: 测试药物靶点分析
async def test_drug_target():
    """测试药物-靶点分析"""
    
    print("\n" + "=" * 60)
    print("测试3: PD-1 + pembrolizumab")
    print("=" * 60)
    
    result = await expert.analyze(
        entity=entity3,
        search_terms=["PD-1", "PDCD1", "pembrolizumab", "keytruda", "immune checkpoint"],
        focus="PD-1 inhibitor pembrolizumab mechanism and efficacy"
    )
    
    print(f"检索到文献: {result['total_papers']} 篇")
    print(f"证据等级: {result['evidence_level']}")
    
    # 查看靶点分析
    if 'target_analysis' in result['analysis']:
        target = result['analysis']['target_analysis']
        print(f"\n靶点分析:")
        print(f"  使用文献片段: {target.get('chunks_used', 0)}")
        print(f"  引用PMID数: {len(target.get('pmids_referenced', []))}")
    
    return result

# 运行测试
result3 = await test_drug_target()

# %% Cell 7: 查看完整报告
def display_report(result, max_lines=50):
    """显示报告（限制行数）"""
    
    if 'report' in result:
        report_lines = result['report'].split('\n')
        
        if len(report_lines) > max_lines:
            # 显示前后部分
            print("报告内容 (部分):")
            print("=" * 60)
            print('\n'.join(report_lines[:30]))
            print("\n... [中间内容省略] ...\n")
            print('\n'.join(report_lines[-20:]))
        else:
            print("报告内容:")
            print("=" * 60)
            print(result['report'])

# 显示第一个测试的报告
print("\n" + "=" * 60)
print("EGFR肺癌分析报告")
print("=" * 60)
display_report(result1)

# %% Cell 8: 保存结果
# 保存分析结果到文件

import os
from datetime import datetime

# 创建输出目录
output_dir = "test_results"
os.makedirs(output_dir, exist_ok=True)

# 保存结果
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

# 保存JSON结果
with open(f"{output_dir}/literature_result_{timestamp}.json", 'w', encoding='utf-8') as f:
    json.dump({
        'test1_egfr': {
            'total_papers': result1['total_papers'],
            'evidence_level': result1['evidence_level'],
            'key_papers': result1['key_papers'][:3]
        },
        'test2_cart': {
            'total_papers': result2['total_papers'],
            'evidence_level': result2['evidence_level']
        },
        'test3_pd1': {
            'total_papers': result3['total_papers'],
            'evidence_level': result3['evidence_level']
        }
    }, f, ensure_ascii=False, indent=2)

# 保存报告
with open(f"{output_dir}/report_egfr_{timestamp}.md", 'w', encoding='utf-8') as f:
    f.write(result1['report'])

print(f"\n结果已保存到 {output_dir}/ 目录")

# %% Cell 9: 简单性能测试
import time

async def quick_performance_test():
    """快速性能测试"""
    
    test_cases = [
        ("BRCA1", Entity(target="BRCA1", disease="breast cancer")),
        ("TP53", Entity(target="TP53", disease=None)),
        ("RNAi", Entity(therapy="RNAi", disease="liver fibrosis"))
    ]
    
    print("性能测试:")
    print("-" * 40)
    
    for name, entity in test_cases:
        start = time.time()
        
        result = await expert.analyze(
            entity=entity,
            search_terms=[name],
            focus=f"Analysis of {name}"
        )
        
        elapsed = time.time() - start
        papers = result['total_papers']
        
        print(f"{name:10} | {elapsed:6.2f}s | {papers:3} papers")
    
    print("-" * 40)

await quick_performance_test()

# %% Cell 10: 测试空查询处理
async def test_edge_cases():
    """测试边缘情况"""
    
    print("\n边缘情况测试:")
    
    # 测试空实体
    empty_entity = Entity()
    result = await expert.analyze(
        entity=empty_entity,
        search_terms=["test"],
        focus="Test empty entity"
    )
    print(f"空实体测试: {result['total_papers']} 篇文献")
    
    # 测试罕见靶点
    rare_entity = Entity(target="XYZ123ABC")
    result = await expert.analyze(
        entity=rare_entity,
        search_terms=[],
        focus="Test rare target"
    )
    print(f"罕见靶点测试: {result['total_papers']} 篇文献")

await test_edge_cases()

print("\n所有测试完成！")