In [None]:
import json
import os
from datetime import datetime
from typing import AsyncGenerator, List, Optional

from dotenv import load_dotenv
from google.adk.agents import BaseAgent, LlmAgent, LoopAgent, SequentialAgent
from google.adk.agents.invocation_context import InvocationContext
from google.adk.events import Event, EventActions
from google.adk.tools import VertexAiSearchTool
from google.genai.types import Content, Part
from pydantic import BaseModel, Field

# Load environment variables
load_dotenv()

# Get the datastore ID from environment
datastore_id = os.getenv(
    "VERTEX_SEARCH_DATASTORE_ID",
    "projects/YOUR_PROJECT_ID/locations/YOUR_LOCATION/collections/default_collection/dataStores/YOUR_DATASTORE_ID",
)

# Create the Vertex AI Search tool instance
vertex_search_tool = VertexAiSearchTool(
    data_store_id=datastore_id,
)


APP_NAME = "deep_research_agent"
USER_ID = "research_user_01"
SESSION_ID_BASE = "research_session"
GEMINI_MODEL = "gemini-2.5-flash"

STATE_QUERIES = "queries"
STATE_SEARCH_RESULTS = "search_results"
STATE_REFLECTION = "reflection"
STATE_FINAL_ANSWER = "final_answer"
STATE_RESEARCH_LOOP_COUNT = "research_loop_count"
STATE_QUERY_RATIONALE = "query_rationale"

# Configuration
MAX_RESEARCH_LOOPS = 2
NUMBER_OF_INITIAL_QUERIES = 3


def get_current_date():
    """Get current date in a readable format"""
    return datetime.now().strftime("%B %d, %Y")


class SearchQueries(BaseModel):
    rationale: str = Field(
        description="Brief explanation of why these queries are relevant"
    )
    queries: List[str] = Field(description="A list of search queries")


class ReflectionResult(BaseModel):
    is_sufficient: bool = Field(
        description="Is the information sufficient to answer the original question?"
    )
    follow_up_queries: Optional[List[str]] = Field(
        default=None, description="New search queries if more research needed"
    )


class SetupAgent(BaseAgent):
    """Persists the user's initial question to state at workflow start"""

    async def _run_async_impl(
        self, ctx: InvocationContext
    ) -> AsyncGenerator[Event, None]:
        user_question = ""
        if ctx.user_content and ctx.user_content.parts:
            user_question = ctx.user_content.parts[0].text or ""

        yield Event(
            author=self.name,
            content=Content(
                parts=[Part(text="Initializing deep research workflow...")]
            ),
            actions=EventActions(state_delta={"user_question": user_question}),
        )


class QueryGeneratorAgent(BaseAgent):
    """Generates sophisticated search queries and saves to state"""

    async def _run_async_impl(
        self, ctx: InvocationContext
    ) -> AsyncGenerator[Event, None]:
        user_question = ctx.session.state.get("user_question", "")

        query_agent = LlmAgent(
            name="QueryLLM",
            model=GEMINI_MODEL,
            instruction=f"""Your goal is to generate sophisticated and diverse web search queries for: {user_question}

**Current Date:** {get_current_date()}

**Instructions:**
- Always prefer a single search query, only add another query if the original question requests multiple aspects or elements and one query is not enough.
- Each query should focus on one specific aspect of the original question.
- Don't produce more than {NUMBER_OF_INITIAL_QUERIES} queries.
- Queries should be diverse, if the topic is broad, generate more than 1 query.
- Don't generate multiple similar queries, 1 is enough.
- Query should ensure that the most current information is gathered.

**Format:**
Output your response as a JSON object with these exact keys:
- "rationale": Brief explanation of why these queries are relevant
- "queries": A list of search queries (as strings)

**Example:**
```json
{{
    "rationale": "To answer this comparative growth question accurately, we need specific data points on Apple's stock performance and iPhone sales metrics. These queries target the precise financial information needed.",
    "queries": ["Apple total revenue growth fiscal year 2024", "iPhone unit sales growth fiscal year 2024"]
}}
```

Generate sophisticated search queries for the user's question.""",
            output_schema=SearchQueries,
        )

        async for event in query_agent.run_async(ctx):
            if event.author == "QueryLLM" and event.content:
                try:
                    content_text = event.content.parts[0].text
                    queries_data = json.loads(content_text)

                    yield Event(
                        author=self.name,
                        content=Content(
                            parts=[
                                Part(
                                    text=f"Generated {len(queries_data['queries'])} search queries: {', '.join(queries_data['queries'])}"
                                )
                            ]
                        ),
                        actions=EventActions(state_delta={STATE_QUERIES: queries_data}),
                    )
                except Exception as e:
                    yield Event(
                        author=self.name,
                        content=Content(
                            parts=[Part(text=f"Error parsing query generation: {e}")]
                        ),
                    )


class SearchAgent(BaseAgent):
    """Executes searches and saves results to state"""

    async def _run_async_impl(
        self, ctx: InvocationContext
    ) -> AsyncGenerator[Event, None]:
        queries_data = ctx.session.state.get(STATE_QUERIES, {})
        existing_results = ctx.session.state.get(STATE_SEARCH_RESULTS, "")

        queries = (
            queries_data.get("queries", []) if isinstance(queries_data, dict) else []
        )

        search_agent = LlmAgent(
            name="SearchLLM",
            model=GEMINI_MODEL,
            instruction=f"""Conduct targeted searches to gather the most recent, credible information and synthesize it into a verifiable text artifact.

**Current Date:** {get_current_date()}

**Current Queries:** {", ".join(queries)}

**Existing Research (if any):** {existing_results}

**Instructions:**
- Query should ensure that the most current information is gathered.
- Conduct comprehensive searches to gather comprehensive information.
- Consolidate key findings while meticulously tracking the source(s) for each specific piece of information.
- The output should be a well-written summary or report based on your search findings.
- Only include the information found in the search results, don't make up any information.
- If building upon existing research, integrate new findings with previous results while avoiding duplication.

**Output Format:**
Organize results by:
- Key findings and insights
- Supporting evidence and data
- Source information and credibility
- Relevance to the original question

Provide a comprehensive compilation of all search results.""",
            tools=[vertex_search_tool],
        )

        async for event in search_agent.run_async(ctx):
            if event.author == "SearchLLM" and event.content:
                search_results = event.content.parts[0].text
                yield Event(
                    author=self.name,
                    content=Content(
                        parts=[
                            Part(text="Research completed - consolidating findings...")
                        ]
                    ),
                    actions=EventActions(
                        state_delta={STATE_SEARCH_RESULTS: search_results}
                    ),
                )


class ReflectionAgent(BaseAgent):
    """Evaluates research sufficiency and either escalates or generates follow-up queries"""

    async def _run_async_impl(
        self, ctx: InvocationContext
    ) -> AsyncGenerator[Event, None]:
        user_question = ctx.session.state.get("user_question", "")
        search_results = ctx.session.state.get(STATE_SEARCH_RESULTS, "")

        reflection_agent = LlmAgent(
            name="ReflectionLLM",
            model=GEMINI_MODEL,
            instruction=f"""You are an expert research assistant analyzing research summaries for: {user_question}

**Current Date:** {get_current_date()}

**Research Results:**
{search_results}

**Task:** Evaluate if the current research provides sufficient context to answer the user's question comprehensively.

**Evaluation Criteria:**
1. **Completeness**: Do we have enough information to fully address the question?
2. **Quality**: Are the sources credible and authoritative?
3. **Coverage**: Are all key aspects of the question addressed?
4. **Depth**: Do we have sufficient detail for a comprehensive answer?
5. **Currency**: Is the information current and up-to-date?

**Output Format:**
Respond with a JSON object:
{{
    "is_sufficient": true/false,
    "follow_up_queries": ["query1", "query2"] // only if is_sufficient is false
}}

Be decisive. If sufficient, set is_sufficient to true. If not, provide 2-3 targeted follow-up queries.""",
            output_schema=ReflectionResult,
        )

        async for event in reflection_agent.run_async(ctx):
            if event.author == "ReflectionLLM" and event.content:
                try:
                    content_text = event.content.parts[0].text
                    reflection_data = json.loads(content_text)

                    if reflection_data.get("is_sufficient", True):
                        yield Event(
                            author=self.name,
                            content=Content(
                                parts=[
                                    Part(
                                        text="Information is sufficient. Proceeding to final summary."
                                    )
                                ]
                            ),
                            actions=EventActions(escalate=True),
                        )
                    else:
                        follow_up = reflection_data.get("follow_up_queries", [])
                        yield Event(
                            author=self.name,
                            content=Content(
                                parts=[
                                    Part(
                                        text=f"Information not sufficient. New queries: {follow_up}"
                                    )
                                ]
                            ),
                            actions=EventActions(
                                state_delta={STATE_QUERIES: {"queries": follow_up}}
                            ),
                        )
                except Exception:
                    # Default to escalation if parsing fails
                    yield Event(
                        author=self.name,
                        content=Content(
                            parts=[
                                Part(
                                    text="Reflection analysis complete. Proceeding to summary."
                                )
                            ]
                        ),
                        actions=EventActions(escalate=True),
                    )


research_loop = LoopAgent(
    name="ResearchLoop",
    sub_agents=[
        SearchAgent(name="SearchAgent"),
        ReflectionAgent(name="ReflectionAgent"),
    ],
    max_iterations=MAX_RESEARCH_LOOPS,  # Maximum research iterations
)


class AnswerGeneratorAgent(BaseAgent):
    """Generates and displays comprehensive final answer directly to user"""

    async def _run_async_impl(
        self, ctx: InvocationContext
    ) -> AsyncGenerator[Event, None]:
        user_question = ctx.session.state.get("user_question", "")
        search_results = ctx.session.state.get(STATE_SEARCH_RESULTS, "")

        answer_agent = LlmAgent(
            name="AnswerLLM",
            model="gemini-2.5-pro",  # Use more powerful model for final answer
            instruction=f"""Generate a high-quality answer to the user's question: {user_question}

**Current Date:** {get_current_date()}

**Research Summaries:**
{search_results}

**Instructions:**
- You are the final step of a multi-step research process
- You have access to all information gathered from previous research steps
- Generate a high-quality, comprehensive answer based on the research summaries
- Structure your response professionally with clear sections and flow
- Include sources and citations where applicable
- Be thorough but concise

**Answer Structure:**
1. **Direct Response**: Lead with a clear, concise answer to the question
2. **Comprehensive Analysis**: Provide detailed information with supporting evidence
3. **Key Insights**: Highlight the most important findings
4. **Supporting Evidence**: Reference specific data and sources from research
5. **Context & Implications**: Provide broader context and practical implications
6. **Conclusion**: Summarize key takeaways

**Format Requirements:**
- Use clear markdown formatting with headers and sections
- Include bullet points and lists for clarity
- Bold key findings and important information
- Professional, authoritative tone
- Comprehensive yet accessible language

**Quality Standards:**
- Ensure accuracy by only including information from research summaries
- Provide balanced perspective when multiple viewpoints exist
- Address all aspects of the original question
- Maintain logical flow and organization

Generate a comprehensive, well-structured answer that fully addresses the user's question.""",
        )

        async for event in answer_agent.run_async(ctx):
            if event.author == "AnswerLLM" and event.content:
                final_answer = event.content.parts[0].text
                yield Event(
                    author="",
                    content=Content(parts=[Part(text=final_answer)]),
                    turn_complete=True,
                )


root_agent = SequentialAgent(
    name="EnhancedDeepResearchAgent",
    sub_agents=[
        SetupAgent(name="SetupAgent"),
        QueryGeneratorAgent(name="QueryGeneratorAgent"),
        research_loop,
        AnswerGeneratorAgent(name="AnswerGeneratorAgent"),
    ],
    description="Enhanced deep research agent with sophisticated prompts, knowledge gap analysis, and structured outputs",
)
