In [51]:
from dotenv import load_dotenv

_ = load_dotenv()

In [52]:
# State
from typing import TypedDict
from langgraph.graph import add_messages
from typing_extensions import Annotated


import operator

class OverallState(TypedDict):
    messages: Annotated[list, add_messages]
    search_query: Annotated[list, operator.add]
    web_research_result: Annotated[list, operator.add]
    sources_gathered: Annotated[list, operator.add]
    initial_search_query_count: int
    max_research_loops: int
    research_loop_count: int
    reasoning_model: str

class WebSearchState(TypedDict):
    search_query: str
    id: str



In [53]:
# Configuration
import os
from pydantic import BaseModel, Field
from typing import Any, Optional

from langchain_core.runnables import RunnableConfig


class Configuration(BaseModel):
    """The configuration for the agent."""

    query_generator_model: str = Field(
        default="gemini-2.0-flash",
        metadata={
            "description": "The name of the language model to use for the agent's query generation."
        },
    )

    max_research_loops: int = Field(
        default=2,
        metadata={"description": "The maximum number of research loops to perform."},
    )

    @classmethod
    def from_runnable_config(
        cls, config: Optional[RunnableConfig] = None
    ) -> "Configuration":
        """Create a Configuration instance from a RunnableConfig."""
        configurable = (
            config["configurable"] if config and "configurable" in config else {}
        )

        # Get raw values from environment or config
        raw_values: dict[str, Any] = {
            name: os.environ.get(name.upper(), configurable.get(name))
            for name in cls.model_fields.keys()
        }

        # Filter out None values
        values = {k: v for k, v in raw_values.items() if v is not None}

        return cls(**values)

In [54]:
# Prompts
from datetime import datetime


# Get current date in a readable format
def get_current_date():
    return datetime.now().strftime("%B %d, %Y")
web_searcher_instructions = """Conduct targeted Google Searches to gather the most recent, credible information on "{research_topic}" and synthesize it into a verifiable text artifact.

Instructions:
- Query should ensure that the most current information is gathered. The current date is {current_date}.
- Conduct multiple, diverse searches to gather comprehensive information.
- Consolidate key findings while meticulously tracking the source(s) for each specific piece of information.
- The output should be a well-written summary or report based on your search findings. 
- Only include the information found in the search results, don't make up any information.
- Ensure search results respect the specified time constraints.

# Output Format

- Provide a structured response in markdown format.
- Include the following sections:
    - **Problem Statement**: Restate the problem for clarity.
    - **Research Findings**: Organize your findings by topic rather than by tool used. For each major finding:
        - Summarize the key information
        - Track the sources of information but DO NOT include inline citations in the text
        - Include relevant images if available
    - **Conclusion**: Provide a synthesized response to the problem based on the gathered information.
    - **References**: List all sources used with their complete URLs in link reference format at the end of the document. Make sure to include an empty line between each reference for better readability. Use this format for each reference:
      ```markdown
      - [Source Title](https://example.com/page1)

      - [Source Title](https://example.com/page2)
      ```
- DO NOT include inline citations in the text. Instead, track all sources and list them in the References section at the end using link reference format.
"""

In [55]:
# Graph
from google.genai import Client
# Used for Google Search API
genai_client = Client(api_key=os.getenv("GEMINI_API_KEY"))

In [59]:
def web_research(state: WebSearchState, config: RunnableConfig) -> OverallState:
    """LangGraph node that performs web research using the native Google Search API tool.

    Executes a web search using the native Google Search API tool in combination with Gemini 2.0 Flash.

    Args:
        state: Current graph state containing the search query and research loop count
        config: Configuration for the runnable, including search API settings

    Returns:
        Dictionary with state update, including sources_gathered, research_loop_count, and web_research_results
    """
    # Configure
    configurable = Configuration.from_runnable_config(config)
    formatted_prompt = web_searcher_instructions.format(
        current_date=get_current_date(),
        research_topic=state["search_query"],
    )

    # Uses the google genai client as the langchain client doesn't return grounding metadata
    response = genai_client.models.generate_content(
        model=configurable.query_generator_model,
        contents=formatted_prompt,
        config={
            "tools": [{"google_search": {}}],
            "temperature": 0,
        },
    )

    # Gets the citations and adds them to the generated text
    modified_text = response.text

    return {
        "search_query": [state["search_query"]],
        "web_research_result": [modified_text],
    }

In [60]:
# 确保已导入所有依赖和定义
from langchain_core.runnables import RunnableConfig

# 构造 state
state = {
    "search_query": "major AI research breakthroughs 2024",
    "id": "0"
}

# 构造 config
config = RunnableConfig({})

# 调用 web_research
result = web_research(state, config)

# 打印结果
print(result)

{'search_query': ['major AI research breakthroughs 2024'], 'web_research_result': ['Okay, I will conduct targeted Google Searches to gather the most recent, credible information on "major AI research breakthroughs 2024" and synthesize it into a verifiable text artifact.\n\n**Problem Statement**: To identify and summarize major AI research breakthroughs in 2024, based on credible information gathered through targeted Google Searches.\n\n**Research Findings**\n\n**1. Advancements in Generative AI Chatbots**\nThe evolution of generative AI chatbots continued in 2024, with new features like memory and multimodal capabilities breaking ground. Companies like Google, Meta, and Anthropic released rival chatbots, and there were many open-source collaborations.\n\n**2. Apple\'s Entry into the AI Arena**\nApple integrated OpenAI-powered generative language and graphics functionality across its product ecosystem with Apple Intelligence. This move is considered a watershed moment in consumer adopti