In [2]:
# 测试Google Search API和web_research功能
import os
import sys
from dotenv import load_dotenv
from google.genai import Client
import json
from pprint import pprint

# 添加项目根目录到路径
sys.path.append(os.path.join(os.getcwd(), 'src'))

# 加载环境变量
env_path = r'C:\GitRepo\gemini-fullstack-langgraph-quickstart\backend\.env'
load_dotenv(dotenv_path=env_path)

# 检查API密钥
print("GEMINI_API_KEY:", "已设置" if os.getenv("GEMINI_API_KEY") else "未设置")
print("AZURE_OPENAI_ENDPOINT:", os.getenv('AZURE_OPENAI_ENDPOINT'))

# 初始化Google Generative AI客户端
genai_client = Client(api_key=os.getenv("GEMINI_API_KEY"))


GEMINI_API_KEY: 已设置
AZURE_OPENAI_ENDPOINT: https://jz-fdpo-swn.openai.azure.com/


In [3]:
# 导入所需的工具函数和提示模板
from agent.prompts import get_current_date, web_searcher_instructions
from agent.utils import get_citations, resolve_urls, insert_citation_markers

# 测试搜索查询
test_query = "最新的人工智能发展趋势"

# 格式化搜索提示
current_date = get_current_date()
formatted_prompt = web_searcher_instructions.format(
    current_date=current_date,
    research_topic=test_query,
)

print("=== 格式化的提示词 ===")
print(formatted_prompt)
print("\n" + "="*50 + "\n")


=== 格式化的提示词 ===
Conduct targeted Google Searches to gather the most recent, credible information on "最新的人工智能发展趋势" and synthesize it into a verifiable text artifact.

Instructions:
- Query should ensure that the most current information is gathered. The current date is June 05, 2025.
- Conduct multiple, diverse searches to gather comprehensive information.
- Consolidate key findings while meticulously tracking the source(s) for each specific piece of information.
- The output should be a well-written summary or report based on your search findings. 
- Only include the information found in the search results, don't make up any information.

Research Topic:
最新的人工智能发展趋势





In [16]:
# $ test with no grounding tool
# 调用Google Search API
try:
    response_hello = genai_client.models.generate_content(
        model="gemini-2.0-flash-exp",  # 使用默认模型
        contents="hello",
        config={
            # "tools": [{"google_search": {}}],
            "temperature": 0,
        },
    )
    
    print("=== API调用成功! ===")
    print(f"响应状态: 成功")
    print(response_hello.text)
    
except Exception as e:
    print(f"API调用失败: {e}")
    response_hello = None

=== API调用成功! ===
响应状态: 成功
Hello! How can I help you today?



In [15]:
# $ test with no grounding tool
# 调用Google Search API
try:
    response_no_grounding = genai_client.models.generate_content(
        model="gemini-2.0-flash-exp",  # 使用默认模型
        contents=formatted_prompt,
        config={
            # "tools": [{"google_search": {}}],
            "temperature": 0,
        },
    )
    
    print("=== API调用成功! ===")
    print(f"响应状态: 成功")
    print(response_no_grounding.text)
    
except Exception as e:
    print(f"API调用失败: {e}")
    response_no_grounding = None

=== API调用成功! ===
响应状态: 成功
Okay, I will conduct targeted Google Searches to gather the most recent, credible information on "最新的人工智能发展趋势" (latest AI development trends) as of June 5, 2025, and synthesize it into a verifiable text artifact. I will focus on finding sources dated in 2025 or late 2024 to ensure the information is current.

**Search Queries:**

1.  "最新的人工智能发展趋势 2025" (Latest AI development trends 2025)
2.  "人工智能发展趋势 2025" (AI development trends 2025)
3.  "未来人工智能发展方向 2025" (Future AI development direction 2025)
4.  "人工智能技术最新进展 2025" (Latest advances in AI technology 2025)
5.  "AI发展趋势报告 2025" (AI development trend report 2025)
6.  "中国人工智能发展趋势 2025" (China AI development trends 2025) - *Given the language, focusing on China is relevant.*
7.  "最新AI应用领域 2025" (Latest AI application areas 2025)

**Expected Challenges:**

*   Finding information *specifically* dated in 2025 will be difficult, as it's only June. I will prioritize late 2024 and early 2025 sources and extrapolate tren

In [4]:
# 调用Google Search API
try:
    response = genai_client.models.generate_content(
        model="gemini-2.0-flash-exp",  # 使用默认模型
        contents=formatted_prompt,
        config={
            "tools": [{"google_search": {}}],
            "temperature": 0,
        },
    )
    
    print("=== API调用成功! ===")
    print(f"响应状态: 成功")
    
except Exception as e:
    print(f"API调用失败: {e}")
    response = None


=== API调用成功! ===
响应状态: 成功


In [14]:
# 查看原始响应内容
if response:
    print("=== 原始响应对象类型 ===")
    print(f"响应类型: {type(response)}")
    # print(f"响应属性: {dir(response)}")
    print("\n" + "="*50 + "\n")
    
    # 查看响应的主要内容
    print("=== 生成的文本内容 ===")
    print(response.text)
    print("\n" + "="*50 + "\n")
    
    # 查看候选结果
    print("=== 候选结果信息 ===")
    for i, candidate in enumerate(response.candidates):
        print(f"候选结果 {i+1}:")
        print(f"  - 内容: {candidate.content}")
        print(f"  - 安全评级: {candidate.safety_ratings}")
        if hasattr(candidate, 'grounding_metadata'):
            print(f"  - 有grounding_metadata: {candidate.grounding_metadata is not None}")
        print()
    print("="*50 + "\n")


=== 原始响应对象类型 ===
响应类型: <class 'google.genai.types.GenerateContentResponse'>


=== 生成的文本内容 ===
基于最新的研究和分析，以下是2025年人工智能发展的主要趋势：

**1. 人工智能的可及性和更低的培训成本:**
*   人工智能的普及正在改变模型的训练和部署方式。模型架构和硬件效率的提高大大降低了大规模人工智能系统的训练成本，使更多用户可以使用这些系统。 (Ultralytics)
*   人工智能技术的普及促进了各行各业的创新，使初创企业和小型企业能够开发和部署曾经属于大型企业领域的人工智能解决方案。培训成本的降低也加快了迭代周期，使人工智能模型的实验和完善更加迅速。(Ultralytics)

**2. 人工智能代理和人工通用智能（AGI）:**
*   人工智能代理正变得越来越先进，为实现人工通用智能（AGI）架起了桥梁。与传统的人工智能系统专为狭窄的任务而设计不同，这些代理可以不断学习，适应动态环境，并根据实时数据做出独立决策。(Ultralytics)
*   2025年，多代理系统，即多个人工智能代理合作实现复杂目标，有望变得更加突出。这些系统可以优化工作流程，产生洞察力，并协助各行各业做出决策。(Ultralytics)

**3. 计算机视觉的进步:**
*   视觉转换器（ViTs）、边缘人工智能和3D视觉等新方法正在推进实时感知和分析。这些技术为自动化、医疗保健、可持续发展和机器人技术带来了新的可能性，使计算机视觉比以往任何时候都更加高效和强大。(Ultralytics)
*   自监督学习、视觉转换器和边缘人工智能等先进技术有望提升机器感知、分析和与世界交互的能力。这些创新将继续推动实时图像处理、物体检测和环境监测的发展，使人工智能驱动的视觉系统更高效、更易于在各行各业使用。(Ultralytics)

**4. 边缘人工智能:**
*   边缘计算的一个关键优势是它能够在云连接有限或不切实际的环境中实现实时决策。通过将边缘计算与人工智能驱动的视觉相结合，各行业可以实现更高的可扩展性、更快的响应速度和更强的安全性，使实时人工智能视觉成为2025年自动化的基石。(Ultralytics)

**5. 道德和负责任的人工智能:**
*   随着人工智能越来越

In [11]:
# 详细查看grounding metadata（Google搜索结果的核心数据）
if response and response.candidates:
    candidate = response.candidates[0]
    
    if hasattr(candidate, 'grounding_metadata') and candidate.grounding_metadata:
        print("=== Grounding Metadata (Google搜索结果) ===")
        grounding_metadata = candidate.grounding_metadata
        
        print(f"Grounding Metadata 类型: {type(grounding_metadata)}")
        # print(f"Grounding Metadata 属性: {dir(grounding_metadata)}")
        print()
        
        # 查看搜索结果块
        if hasattr(grounding_metadata, 'grounding_chunks'):
            print(f"搜索结果块数量: {len(grounding_metadata.grounding_chunks)}")
            print()
            
           
        # 尝试将grounding_metadata转换为字典查看完整结构
        try:
            # 如果有to_dict方法
            if hasattr(grounding_metadata, 'to_dict'):
                metadata_dict = grounding_metadata.to_dict()
                print("=== Grounding Metadata 完整结构 (字典格式) ===")
                pprint(metadata_dict, depth=3)
            else:
                print("=== Grounding Metadata 原始对象 ===")
                # 打印更详细的结构信息
                print("Grounding Chunks:")
                for i, chunk in enumerate(grounding_metadata.grounding_chunks):
                    print(f"\nChunk {i+1}:")
                    if hasattr(chunk, 'web') and chunk.web:
                        print(f"  Title: {chunk.web.title}")
                        print(f"  URI: {chunk.web.uri}")
                
                print("\nGrounding Supports:")
                for i, support in enumerate(grounding_metadata.grounding_supports):
                    print(f"\nSupport {i+1}:")
                    print(f"  Confidence Scores: {support.confidence_scores}")
                    print(f"  Chunk Indices: {support.grounding_chunk_indices}")
                    if hasattr(support, 'segment'):
                        print(f"  Segment Text: {support.segment.text}")
                        print(f"  Start Index: {support.segment.start_index}")
                        print(f"  End Index: {support.segment.end_index}")
        except Exception as e:
            print(f"无法转换为字典: {e}")
    else:
        print("没有找到grounding_metadata")
        
print("\n" + "="*50 + "\n")


=== Grounding Metadata (Google搜索结果) ===
Grounding Metadata 类型: <class 'google.genai.types.GroundingMetadata'>

搜索结果块数量: 9

=== Grounding Metadata 原始对象 ===
Grounding Chunks:

Chunk 1:
  Title: botpress.com
  URI: https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFuLVbyL09GuHpcgAOR-cpy5Suozvy7rbNuYEdyf-Jq-f2rAEAFOhNA1Z9Z55lACNlHvzMGppGo-De7DLvmTBW4dKJc56eQVLbAMBk06VviknYb8kXnbCQu1E7VM9AguIqzayQOyLV7UGrF6JSm-J6liGGOug_Eudonpg==

Chunk 2:
  Title: ultralytics.com
  URI: https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFjFtNsDD79h2up8Z2rJRiKnC-faj6P3ojQZVMxR95oaRQ_Gc93kYvx2IO9MXLJUhJqSqEI_vHwjWg22iCgTd-oM6Jd3Wg51IK1zERbAXQSf6UsqBjk7iFzYqwna2xQKRi_4nyyfwW7ITQilSYwdXUYzuMxCFn7e6mj_RLouK7lkmWe3EsOY-NoivA5FF2jRU6Qupe66UuV

Chunk 3:
  Title: news.cn
  URI: https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHIG1SQBGVTlUDzGv7BATCbLy0rBgg_EptKWq4jGZ1MN0cCEfMI12do6wRwTSKU8tgVOtoU0Bb11RftWtNM1PcG5j3ABh2MEwPeJTDG9VKFBhCbP9UYM-r1Msogq5SHAhwdXV0DI

In [13]:

mock_state = {"id": 0}

# resolve the urls to short urls for saving tokens and time
resolved_urls = resolve_urls(
    response.candidates[0].grounding_metadata.grounding_chunks,  mock_state["id"]

)

print("resolved_urls:\n", resolved_urls, "\n")    
# Gets the citations and adds them to the generated text
citations = get_citations(response, resolved_urls)
print("citations:\n", citations, "\n"   )
modified_text = insert_citation_markers(response.text, citations)
print("modified_text:\n", modified_text, "\n")
sources_gathered = [item for citation in citations for item in citation["segments"]]
print("sources_gathered:\n", sources_gathered, "\n" )

# return {
#     "sources_gathered": sources_gathered,
#     "search_query": [state["search_query"]],
#     "web_research_result": [modified_text],
# }

resolved_urls:
 {'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFuLVbyL09GuHpcgAOR-cpy5Suozvy7rbNuYEdyf-Jq-f2rAEAFOhNA1Z9Z55lACNlHvzMGppGo-De7DLvmTBW4dKJc56eQVLbAMBk06VviknYb8kXnbCQu1E7VM9AguIqzayQOyLV7UGrF6JSm-J6liGGOug_Eudonpg==': 'https://vertexaisearch.cloud.google.com/id/0-0', 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFjFtNsDD79h2up8Z2rJRiKnC-faj6P3ojQZVMxR95oaRQ_Gc93kYvx2IO9MXLJUhJqSqEI_vHwjWg22iCgTd-oM6Jd3Wg51IK1zERbAXQSf6UsqBjk7iFzYqwna2xQKRi_4nyyfwW7ITQilSYwdXUYzuMxCFn7e6mj_RLouK7lkmWe3EsOY-NoivA5FF2jRU6Qupe66UuV': 'https://vertexaisearch.cloud.google.com/id/0-1', 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHIG1SQBGVTlUDzGv7BATCbLy0rBgg_EptKWq4jGZ1MN0cCEfMI12do6wRwTSKU8tgVOtoU0Bb11RftWtNM1PcG5j3ABh2MEwPeJTDG9VKFBhCbP9UYM-r1Msogq5SHAhwdXV0DIFHldBccRHezY_gLwFojww173UkpyVOj3F7SXFOxkA==': 'https://vertexaisearch.cloud.google.com/id/0-2', 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9

In [7]:
# 测试web_research函数的完整流程
if response and response.candidates and hasattr(response.candidates[0], 'grounding_metadata'):
    print("=== 测试完整的web_research处理流程 ===")
    
    # 模拟state参数
    mock_state = {"id": 0}
    
    # 1. 解析URLs
    try:
        resolved_urls = resolve_urls(
            response.candidates[0].grounding_metadata.grounding_chunks, 
            mock_state["id"]
        )
        print(f"解析后的URL数量: {len(resolved_urls)}")
        print("解析后的URLs:")
        for url in resolved_urls:
            print(f"  - 短URL: {url['short_url']}")
            print(f"    原URL: {url['value']}")
            print()
    except Exception as e:
        print(f"URL解析失败: {e}")
        resolved_urls = []
    
    # 2. 获取引用
    try:
        citations = get_citations(response, resolved_urls)
        print(f"引用数量: {len(citations)}")
        print("引用信息:")
        for i, citation in enumerate(citations):
            print(f"  引用 {i+1}:")
            print(f"    - 段落数量: {len(citation['segments'])}")
            for segment in citation['segments']:
                print(f"      * 短URL: {segment['short_url']}")
                print(f"        标题: {segment.get('title', 'N/A')}")
    except Exception as e:
        print(f"引用获取失败: {e}")
        citations = []
    
    # 3. 插入引用标记
    try:
        modified_text = insert_citation_markers(response.text, citations)
        print("\n=== 插入引用标记后的文本 ===")
        print(modified_text)
        
        # 收集所有来源
        sources_gathered = [item for citation in citations for item in citation["segments"]]
        print(f"\n收集到的来源数量: {len(sources_gathered)}")
        
    except Exception as e:
        print(f"引用标记插入失败: {e}")
        
print("\n" + "="*50 + "\n")


=== 测试完整的web_research处理流程 ===
解析后的URL数量: 9
解析后的URLs:
URL解析失败: string indices must be integers, not 'str'
引用数量: 36
引用信息:
  引用 1:
    - 段落数量: 0
  引用 2:
    - 段落数量: 0
  引用 3:
    - 段落数量: 0
  引用 4:
    - 段落数量: 0
  引用 5:
    - 段落数量: 0
  引用 6:
    - 段落数量: 0
  引用 7:
    - 段落数量: 0
  引用 8:
    - 段落数量: 0
  引用 9:
    - 段落数量: 0
  引用 10:
    - 段落数量: 0
  引用 11:
    - 段落数量: 0
  引用 12:
    - 段落数量: 0
  引用 13:
    - 段落数量: 0
  引用 14:
    - 段落数量: 0
  引用 15:
    - 段落数量: 0
  引用 16:
    - 段落数量: 0
  引用 17:
    - 段落数量: 0
  引用 18:
    - 段落数量: 0
  引用 19:
    - 段落数量: 0
  引用 20:
    - 段落数量: 0
  引用 21:
    - 段落数量: 0
  引用 22:
    - 段落数量: 0
  引用 23:
    - 段落数量: 0
  引用 24:
    - 段落数量: 0
  引用 25:
    - 段落数量: 0
  引用 26:
    - 段落数量: 0
  引用 27:
    - 段落数量: 0
  引用 28:
    - 段落数量: 0
  引用 29:
    - 段落数量: 0
  引用 30:
    - 段落数量: 0
  引用 31:
    - 段落数量: 0
  引用 32:
    - 段落数量: 0
  引用 33:
    - 段落数量: 0
  引用 34:
    - 段落数量: 0
  引用 35:
    - 段落数量: 0
  引用 36:
    - 段落数量: 0

=== 插入引用标记后的文本 ===
基于最新的研究和分析，以下是2025年人工智能发展的主要趋势：

**1. 人工智

In [8]:
# 直接调用原始的web_research函数进行对比测试
print("=== 直接调用web_research函数 ===")

# 导入需要的类和配置
from agent.state import WebSearchState
from agent.configuration import Configuration
from langchain_core.runnables import RunnableConfig

# 创建模拟的state和config
mock_web_state = WebSearchState(
    search_query=test_query,
    id=0
)

# 创建配置
mock_config = RunnableConfig(
    configurable={
        "query_generator_model": "gemini-2.0-flash-exp",
        "reasoning_model": "gemini-2.0-flash-exp"
    }
)

# 导入web_research函数
from agent.graph import web_research

try:
    # 调用web_research函数
    result = web_research(mock_web_state, mock_config)
    
    print("=== web_research函数返回结果 ===")
    print(f"搜索查询: {result['search_query']}")
    print(f"来源数量: {len(result['sources_gathered'])}")
    print(f"研究结果长度: {len(result['web_research_result'][0]) if result['web_research_result'] else 0}")
    print()
    print("=== 研究结果内容 ===")
    print(result['web_research_result'][0] if result['web_research_result'] else "无结果")
    print()
    print("=== 来源信息 ===")
    for i, source in enumerate(result['sources_gathered']):
        print(f"来源 {i+1}:")
        print(f"  - 短URL: {source['short_url']}")
        print(f"  - 原URL: {source['value']}")
        print(f"  - 标题: {source.get('title', 'N/A')}")
        print()
        
except Exception as e:
    print(f"web_research函数调用失败: {e}")
    import traceback
    traceback.print_exc()


=== 直接调用web_research函数 ===
=== web_research函数返回结果 ===
搜索查询: ['最新的人工智能发展趋势']
来源数量: 545
研究结果长度: 39373

=== 研究结果内容 ===
Okay, I will conduct targeted Google Searches to gather the most recent, credible information on "最新的人工智能发展趋势" and synthesize it into a verifiable text artifact.
Based on the search results, here's a summary of the latest AI development trends expected in 2025:

**1. Increased Accessibili [microsoft](https://vertexaisearch.cloud.google.com/id/0-0) [botpress](https://vertexaisearch.cloud.google.com/id/0-1) [sina](https://vertexaisearch.cloud.google.com/id/0-2) [ultralytics](https://vertexaisearch.cloud.google.com/id/0-3) [wallstreetcn](https://vertexaisearch.cloud.google.com/id/0-4) [cac](https://vertexaisearch.cloud.google.com/id/0-5)ty and Lower Training Costs:**

*   Advancements in model architecture and hardware efficiency are significantly reducing the training costs for large-scale AI systems, making them more accessible to a wider range of users. (Source: Ultralyti

In [None]:
# 可以修改这里测试不同的搜索查询
print("=== 测试其他搜索查询 ===")

# 测试其他查询
other_queries = [
    "Python最新版本特性",
    "OpenAI GPT-4 vs Claude",
    "机器学习算法比较"
]

for query in other_queries:
    print(f"\n--- 测试查询: {query} ---")
    
    # 格式化提示
    formatted_prompt = web_searcher_instructions.format(
        current_date=get_current_date(),
        research_topic=query,
    )
    
    try:
        response = genai_client.models.generate_content(
            model="gemini-2.0-flash-exp",
            contents=formatted_prompt,
            config={
                "tools": [{"google_search": {}}],
                "temperature": 0,
            },
        )
        
        print(f"✅ 搜索成功")
        print(f"生成文本长度: {len(response.text)}")
        
        if response.candidates and hasattr(response.candidates[0], 'grounding_metadata'):
            grounding_metadata = response.candidates[0].grounding_metadata
            if hasattr(grounding_metadata, 'grounding_chunks'):
                print(f"搜索结果数量: {len(grounding_metadata.grounding_chunks)}")
        
        # 显示前200个字符
        print(f"内容预览: {response.text[:200]}...")
        
    except Exception as e:
        print(f"❌ 搜索失败: {e}")
    
    print("-" * 40)
