In [None]:
"""
Literature Review Agent System - Capstone Project Implementation
=================================================================

This notebook implements a comprehensive multi-agent system for automated
literature review generation, demonstrating all ADK concepts from the 5-day course.

Architecture Overview:
- 10 Specialized Agents + 1 Orchestrator
- Multi-agent patterns: Sequential, Parallel, Loop
- Tools: OpenAPI, MCP, Custom, Built-in
- Sessions & Memory: State management + Memory Bank
- Observability: Logging, Tracing, Metrics
- Deployment ready for Vertex AI Agent Engine

Author: Capstone Project
Date: 2025

SETUP INSTRUCTIONS FOR GOOGLE COLAB:
====================================

1. Click Runtime > Run all (or run cells one by one)
2. When prompted, enter your Google API key
3. Get your key from: https://aistudio.google.com/app/apikey
4. The notebook will handle everything else!

For Colab Secrets (more secure):
1. Click the üîë key icon in left sidebar
2. Add secret named: GOOGLE_API_KEY
3. Paste your API key as the value
4. Enable notebook access
5. Run cells - key will load automatically!
"""

# ============================================================================
# CELL 1: INSTALL DEPENDENCIES (RUN THIS FIRST IN COLAB)
# ============================================================================

print("üì¶ Installing required packages...")
print("This may take 2-3 minutes on first run.\n")

# Check if running in Colab
try:
    import google.colab
    IN_COLAB = True
    print("‚úÖ Detected Google Colab environment")

    # Install required packages for Colab
    print("\nüîß Installing google-adk and dependencies...")
    !pip install -q google-adk google-genai scikit-learn numpy reportlab

    print("‚úÖ Installation complete!")
    print("üì¶ Installed packages:")
    print("   ‚Ä¢ google-adk (Agent Development Kit)")
    print("   ‚Ä¢ google-genai (Gemini API)")
    print("   ‚Ä¢ scikit-learn (ML algorithms)")
    print("   ‚Ä¢ numpy (Numerical computing)")
    print("   ‚Ä¢ reportlab (PDF generation)")
    print()

except ImportError:
    IN_COLAB = False
    print("‚ÑπÔ∏è Not in Colab - assuming packages already installed")

print("="*60)
print("üéâ READY TO PROCEED")
print("="*60)
print("Next step: Run the API key configuration cell below\n")

# ============================================================================
# CELL 2: GOOGLE COLAB SETUP & API KEY CONFIGURATION
# ============================================================================

"""
GOOGLE API KEY SETUP FOR COLAB
-------------------------------

This section handles API key authentication for Google Colab environment.

Two methods provided:
1. Using Google Colab Secrets (Recommended - Secure)
2. Direct input (Quick setup - Less secure)
"""

import os

# Detect if running in Colab
try:
    import google.colab
    IN_COLAB = True
    print("‚úÖ Running in Google Colab environment")
except ImportError:
    IN_COLAB = False
    print("‚ÑπÔ∏è Not running in Colab (using local environment)")

# ============================================================================
# METHOD 1: USING COLAB SECRETS (RECOMMENDED - MOST SECURE)
# ============================================================================

"""
To use Colab Secrets:

1. In your Colab notebook, click the üîë key icon in the left sidebar
2. Click "+ Add new secret"
3. Name: GOOGLE_API_KEY
4. Value: Paste your API key from https://aistudio.google.com/app/apikey
5. Toggle ON the notebook access
6. Run this cell

This keeps your API key secure and not visible in the notebook.
"""

if IN_COLAB:
    try:
        from google.colab import userdata

        # Try to get API key from Colab secrets
        GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
        os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
        os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "FALSE"

        print("‚úÖ API key loaded from Colab Secrets")
        print(f"üîë Key preview: {GOOGLE_API_KEY[:10]}...{GOOGLE_API_KEY[-4:]}")

    except Exception as e:
        print(f"‚ö†Ô∏è Could not load from Colab Secrets: {e}")
        print("üìù Falling back to manual input method...")

        # ====================================================================
        # METHOD 2: MANUAL INPUT (FALLBACK - LESS SECURE)
        # ====================================================================
        """
        If Colab Secrets doesn't work, you can enter your key directly.

        ‚ö†Ô∏è WARNING: Don't share notebooks with API keys hardcoded!
        Clear the output before sharing.
        """

        from getpass import getpass

        print("\n" + "="*60)
        print("GOOGLE API KEY SETUP")
        print("="*60)
        print("\nüìå Get your API key from:")
        print("   https://aistudio.google.com/app/apikey")
        print("\n‚ö†Ô∏è  Your key will be hidden as you type")
        print("="*60 + "\n")

        # Use getpass for secure input (hides the key while typing)
        GOOGLE_API_KEY = getpass("Enter your Google API Key: ")

        if not GOOGLE_API_KEY or GOOGLE_API_KEY.strip() == "":
            raise ValueError("‚ùå API key cannot be empty!")

        os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
        os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "FALSE"

        print("\n‚úÖ API key configured successfully!")
        print(f"üîë Key preview: {GOOGLE_API_KEY[:10]}...{GOOGLE_API_KEY[-4:]}")
        print("\n‚ö†Ô∏è  Remember to clear this cell's output before sharing!")

else:
    # For local/Kaggle environments
    try:
        from kaggle_secrets import UserSecretsClient
        GOOGLE_API_KEY = UserSecretsClient().get_secret("GOOGLE_API_KEY")
        os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
        os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "FALSE"
        print("‚úÖ API key loaded from Kaggle Secrets")
    except:
        # Manual fallback for local development
        GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
        if not GOOGLE_API_KEY:
            print("‚ö†Ô∏è  GOOGLE_API_KEY not found in environment variables")
            print("üìù Please set it: export GOOGLE_API_KEY='your-key-here'")
            print("   Or enter it manually below:")
            from getpass import getpass
            GOOGLE_API_KEY = getpass("Enter your Google API Key: ")
            os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
            os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "FALSE"

# Verify API key is set
if not os.environ.get("GOOGLE_API_KEY"):
    raise ValueError("‚ùå GOOGLE_API_KEY not configured! Please set up your API key.")

print("\n" + "="*60)
print("üéâ AUTHENTICATION COMPLETE")
print("="*60)
print("‚úÖ Google API Key: Configured")
print("‚úÖ Backend: Gemini API (Google AI Studio)")
print("="*60 + "\n")

# ============================================================================
# SECTION 1: SETUP AND IMPORTS
# ============================================================================

import json
import uuid
import logging
from typing import List, Dict, Any, Optional
from dataclasses import dataclass, field
from datetime import datetime
import asyncio

# ADK Core Components
from google.adk.agents import LlmAgent, SequentialAgent, ParallelAgent, LoopAgent
from google.adk.models.google_llm import Gemini
from google.adk.runners import Runner
from google.adk.sessions import InMemorySessionService
from google.adk.apps import App, ResumabilityConfig
from google.genai import types

# ADK Tools
from google.adk.tools import (
    FunctionTool,
    AgentTool,
    google_search,
    ToolContext
)
from google.adk.tools.mcp_tool.mcp_toolset import McpToolset
from google.adk.tools.mcp_tool.mcp_session_manager import StdioConnectionParams
from mcp import StdioServerParameters
from google.adk.code_executors import BuiltInCodeExecutor

# Memory and State - CORRECTED IMPORTS
# Note: Memory services are in google.adk.memory, not as separate classes
from google.adk.memory import InMemoryMemoryService, VertexAiMemoryBankService
# PreloadMemoryTool is in google.adk.tools.preload_memory_tool
from google.adk.tools.preload_memory_tool import PreloadMemoryTool

# Observability - CORRECTED APPROACH
# ADK uses standard Python logging, not separate observability classes
# OpenTelemetry is used for tracing, not custom classes
# For production: use OpenTelemetry with Cloud Trace
import logging

# Optional: For advanced observability with OpenTelemetry
# Uncomment these if deploying to production with Cloud Trace:
# from opentelemetry import trace
# from opentelemetry.sdk.trace import TracerProvider
# from opentelemetry.sdk.trace.export import BatchSpanProcessor
# from opentelemetry.exporter.cloud_trace import CloudTraceSpanExporter

# External libraries
import numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
import requests
from bs4 import BeautifulSoup

# Configure logging for observability
# ADK uses Python's standard logging library
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Retry configuration for resilience
retry_config = types.HttpRetryOptions(
    attempts=5,
    exp_base=7,
    initial_delay=1,
    http_status_codes=[429, 500, 503, 504]
)

print("‚úÖ All imports completed successfully")

# ============================================================================
# SECTION 2: DATA STRUCTURES AND STATE MANAGEMENT
# ============================================================================

@dataclass
class Paper:
    """Represents a single academic paper with metadata and content."""
    paper_id: str
    title: str
    authors: List[str]
    year: int
    journal: str
    abstract: str
    url: str
    source: str  # 'google_scholar', 'arxiv', 'semantic_scholar'
    score: float = 0.0
    full_text: Optional[str] = None
    sections: Dict[str, str] = field(default_factory=dict)

    # Analysis results
    micro_summary: Optional[str] = None
    long_summary: Optional[str] = None
    methodology: Optional[str] = None
    findings: Optional[str] = None
    contributions: Optional[str] = None
    limitations: Optional[str] = None
    relevance_notes: Optional[str] = None
    embedding: Optional[np.ndarray] = None
    theme_id: Optional[int] = None

@dataclass
class Theme:
    """Represents a thematic cluster of papers."""
    theme_id: int
    label: str
    description: str
    paper_ids: List[str]
    comparison_matrix: Optional[Dict] = None
    narrative_summary: Optional[str] = None
    common_limitations: List[str] = field(default_factory=list)
    best_practices: List[str] = field(default_factory=list)

@dataclass
class ResearchGap:
    """Represents an identified research gap."""
    gap_type: str  # 'methodological', 'empirical', 'theoretical', 'geographical'
    description: str
    evidence: List[str]
    suggested_questions: List[str]

@dataclass
class LiteratureReviewState:
    """
    Central state object for the entire literature review process.
    This demonstrates SESSION & STATE MANAGEMENT from Day 3.
    """
    run_id: str
    user_id: str
    topic: str
    expanded_topic: Optional[str] = None
    keywords: List[str] = field(default_factory=list)
    subdomains: List[str] = field(default_factory=list)
    search_queries: List[str] = field(default_factory=list)

    # Paper collection
    papers: Dict[str, Paper] = field(default_factory=dict)

    # Analysis results
    themes: List[Theme] = field(default_factory=list)
    research_gaps: List[ResearchGap] = field(default_factory=list)

    # Final outputs
    literature_review_draft: Optional[str] = None
    formatted_review: Optional[str] = None
    bibliography: Optional[str] = None

    # Workflow status for observability
    workflow_status: Dict[str, str] = field(default_factory=lambda: {
        "topic_understood": "pending",
        "papers_fetched": "pending",
        "pdfs_retrieved": "pending",
        "summaries_done": "pending",
        "themes_identified": "pending",
        "analysis_complete": "pending",
        "gaps_identified": "pending",
        "review_written": "pending",
        "citations_formatted": "pending",
        "output_generated": "pending"
    })

    # Timestamps for metrics
    start_time: datetime = field(default_factory=datetime.now)
    end_time: Optional[datetime] = None

    # Observability metrics
    metrics: Dict[str, Any] = field(default_factory=dict)

print("‚úÖ Data structures defined")

# ============================================================================
# SECTION 3: CUSTOM TOOLS - Function Tools
# ============================================================================
# This demonstrates CUSTOM TOOLS from Day 2

def search_google_scholar(query: str, max_results: int = 10) -> Dict:
    """
    Simulates Google Scholar API search.
    In production, this would call the actual Scholar API.

    This is a CUSTOM FUNCTION TOOL demonstrating Day 2 concepts.

    Args:
        query: Search query string
        max_results: Maximum number of results to return

    Returns:
        Dictionary with status and list of papers
    """
    logger.info(f"Searching Google Scholar for: {query}")

    # Mock data for demonstration
    # In production, replace with actual API call:
    # response = requests.get(SCHOLAR_API_URL, params={'q': query, 'key': API_KEY})

    mock_papers = [
        {
            "title": f"Analysis of {query}: A Comprehensive Study",
            "authors": ["Smith, J.", "Doe, A."],
            "year": 2023,
            "journal": "Journal of Advanced Research",
            "abstract": f"This paper provides a comprehensive analysis of {query}...",
            "url": f"https://scholar.google.com/paper/{uuid.uuid4().hex[:8]}",
            "citations": 45
        },
        {
            "title": f"Recent Advances in {query}",
            "authors": ["Johnson, B.", "Williams, C."],
            "year": 2024,
            "journal": "IEEE Transactions",
            "abstract": f"We present recent methodological advances in {query}...",
            "url": f"https://scholar.google.com/paper/{uuid.uuid4().hex[:8]}",
            "citations": 32
        }
    ]

    return {
        "status": "success",
        "source": "google_scholar",
        "query": query,
        "results": mock_papers[:max_results],
        "count": len(mock_papers[:max_results])
    }

def search_arxiv(query: str, max_results: int = 10) -> Dict:
    """
    Simulates arXiv API search.

    This demonstrates OPENAPI TOOLS pattern from Day 2.
    In production, use actual arXiv API or OpenAPI tool.
    """
    logger.info(f"Searching arXiv for: {query}")

    mock_papers = [
        {
            "title": f"Deep Learning Approaches to {query}",
            "authors": ["Chen, L.", "Wang, Y."],
            "year": 2024,
            "journal": "arXiv preprint",
            "abstract": f"We explore deep learning methods for {query}...",
            "url": f"https://arxiv.org/abs/2024.{uuid.uuid4().hex[:8]}",
            "citations": 15
        }
    ]

    return {
        "status": "success",
        "source": "arxiv",
        "query": query,
        "results": mock_papers[:max_results],
        "count": len(mock_papers[:max_results])
    }

def search_semantic_scholar(query: str, max_results: int = 10) -> Dict:
    """Simulates Semantic Scholar API search."""
    logger.info(f"Searching Semantic Scholar for: {query}")

    mock_papers = [
        {
            "title": f"Semantic Analysis of {query}",
            "authors": ["Rodriguez, M.", "Garcia, P."],
            "year": 2023,
            "journal": "ACM Computing Surveys",
            "abstract": f"A semantic approach to understanding {query}...",
            "url": f"https://semanticscholar.org/paper/{uuid.uuid4().hex[:8]}",
            "citations": 28
        }
    ]

    return {
        "status": "success",
        "source": "semantic_scholar",
        "query": query,
        "results": mock_papers[:max_results],
        "count": len(mock_papers[:max_results])
    }

def download_and_extract_pdf(url: str) -> Dict:
    """
    Simulates PDF download and text extraction.

    This demonstrates MCP TOOLS pattern - in production, use MCP file server.
    """
    logger.info(f"Downloading PDF from: {url}")

    # Mock extraction
    # In production: use PyPDF2, GROBID, or MCP PDF extraction server
    mock_text = f"""
    Introduction
    This paper presents a comprehensive analysis of the topic...

    Methodology
    We employed a mixed-methods approach combining...

    Results
    Our findings indicate significant correlations...

    Discussion
    The implications of these results suggest...

    Conclusion
    In summary, this work contributes to the field by...
    """

    return {
        "status": "success",
        "url": url,
        "full_text": mock_text,
        "sections": {
            "introduction": "This paper presents...",
            "methodology": "We employed...",
            "results": "Our findings...",
            "discussion": "The implications...",
            "conclusion": "In summary..."
        },
        "page_count": 12
    }

def format_citation(paper_metadata: Dict, style: str = "APA") -> Dict:
    """
    Formats a citation in the specified style.

    This is a CUSTOM TOOL for citation management.
    """
    authors = ", ".join(paper_metadata.get("authors", []))
    title = paper_metadata.get("title", "")
    year = paper_metadata.get("year", "")
    journal = paper_metadata.get("journal", "")

    if style == "APA":
        citation = f"{authors} ({year}). {title}. {journal}."
    elif style == "Harvard":
        citation = f"{authors}, {year}. {title}. {journal}."
    elif style == "IEEE":
        citation = f"{authors}, \"{title},\" {journal}, {year}."
    else:
        citation = f"{authors} ({year}). {title}. {journal}."

    return {
        "status": "success",
        "citation": citation,
        "style": style
    }

def cluster_embeddings(embeddings: List[List[float]], n_clusters: int = 5) -> Dict:
    """
    Performs k-means clustering on paper embeddings.

    This demonstrates CODE EXECUTION tool pattern - clustering logic.
    """
    logger.info(f"Clustering {len(embeddings)} papers into {n_clusters} themes")

    if len(embeddings) < n_clusters:
        n_clusters = max(2, len(embeddings) // 2)

    # Convert to numpy array
    X = np.array(embeddings)

    # Perform k-means clustering
    kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
    labels = kmeans.fit_predict(X)

    # Calculate cluster statistics
    cluster_stats = {}
    for i in range(n_clusters):
        cluster_papers = np.where(labels == i)[0]
        cluster_stats[i] = {
            "paper_indices": cluster_papers.tolist(),
            "size": len(cluster_papers),
            "centroid": kmeans.cluster_centers_[i].tolist()
        }

    return {
        "status": "success",
        "labels": labels.tolist(),
        "n_clusters": n_clusters,
        "cluster_stats": cluster_stats,
        "inertia": float(kmeans.inertia_)
    }

def generate_pdf_output(content: str, filename: str) -> Dict:
    """
    Simulates PDF generation from markdown/LaTeX.

    In production, use tools like pandoc, LaTeX, or python-docx.
    """
    logger.info(f"Generating PDF: {filename}")

    # Mock PDF generation
    # In production: subprocess.run(['pandoc', input_file, '-o', output_pdf])

    return {
        "status": "success",
        "filename": filename,
        "format": "PDF",
        "size_kb": 245,
        "path": f"/output/{filename}"
    }

print("‚úÖ Custom function tools defined")

# ============================================================================
# SECTION 4: AGENT 1 - TOPIC UNDERSTANDING AGENT
# ============================================================================
# This is an LLM AGENT demonstrating Day 1 concepts

def create_topic_understanding_agent() -> LlmAgent:
    """
    Creates the Topic Understanding Agent.

    This agent uses LLM capabilities to:
    - Expand the user's topic
    - Extract keywords
    - Identify subdomains
    - Generate search queries

    Pattern: LLM Agent (Day 1)
    Tools: Built-in LLM
    """

    agent = LlmAgent(
        name="TopicUnderstandingAgent",
        model=Gemini(model="gemini-2.5-flash-lite", retry_options=retry_config),
        instruction="""You are an expert research librarian specializing in academic literature.

Your task is to analyze a research topic and prepare it for comprehensive literature search.

Given a topic, you must:
1. Expand the topic with relevant context and clarifications
2. Extract 10-15 core keywords and concepts
3. Identify 3-5 major subdomains or research areas
4. Generate 15-20 diverse search queries that would capture relevant papers

Format your response as JSON:
{
    "expanded_topic": "detailed topic description",
    "keywords": ["keyword1", "keyword2", ...],
    "subdomains": ["subdomain1", "subdomain2", ...],
    "search_queries": ["query1", "query2", ...]
}

Be thorough and consider:
- Synonyms and related terms
- Different methodological approaches
- Various application domains
- Both broad and specific queries
- Common academic phrasings
""",
        tools=[]  # Uses only LLM capabilities
    )

    logger.info("Topic Understanding Agent created")
    return agent

# ============================================================================
# SECTION 5: AGENT 2 - ACADEMIC PAPER SEARCH AGENT (PARALLEL)
# ============================================================================
# This demonstrates PARALLEL AGENTS from Day 1

def create_paper_search_agents() -> ParallelAgent:
    """
    Creates a parallel multi-source paper search system.

    This demonstrates PARALLEL AGENT pattern from Day 1.
    Three specialist agents search different sources concurrently.

    Pattern: Parallel Agents
    Tools: Custom Function Tools (OpenAPI simulation)
    """

    # Scholar search specialist
    scholar_agent = LlmAgent(
        name="GoogleScholarAgent",
        model=Gemini(model="gemini-2.5-flash-lite", retry_options=retry_config),
        instruction="""You are a Google Scholar search specialist.

Use the search_google_scholar tool to find relevant papers.
Execute all provided queries and return comprehensive results.
Focus on highly-cited, recent papers.""",
        tools=[FunctionTool(search_google_scholar)],
        output_key="scholar_results"
    )

    # arXiv search specialist
    arxiv_agent = LlmAgent(
        name="ArxivAgent",
        model=Gemini(model="gemini-2.5-flash-lite", retry_options=retry_config),
        instruction="""You are an arXiv search specialist.

Use the search_arxiv tool to find relevant preprints and papers.
Focus on recent, cutting-edge research.""",
        tools=[FunctionTool(search_arxiv)],
        output_key="arxiv_results"
    )

    # Semantic Scholar specialist
    semantic_agent = LlmAgent(
        name="SemanticScholarAgent",
        model=Gemini(model="gemini-2.5-flash-lite", retry_options=retry_config),
        instruction="""You are a Semantic Scholar search specialist.

Use the search_semantic_scholar tool to find relevant papers.
Focus on papers with strong semantic relevance.""",
        tools=[FunctionTool(search_semantic_scholar)],
        output_key="semantic_results"
    )

    # Parallel execution of all three search agents
    parallel_search = ParallelAgent(
        name="ParallelPaperSearch",
        sub_agents=[scholar_agent, arxiv_agent, semantic_agent]
    )

    logger.info("Parallel Paper Search Agents created")
    return parallel_search

# ============================================================================
# SECTION 6: AGENT 3 - PDF RETRIEVAL AGENT (LOOP)
# ============================================================================
# This demonstrates LOOP AGENTS from Day 1

def create_pdf_retrieval_agent() -> LoopAgent:
    """
    Creates a PDF retrieval agent using loop pattern.

    This demonstrates LOOP AGENT pattern from Day 1.
    Iteratively downloads and extracts PDFs with retry logic.

    Pattern: Loop Agent
    Tools: Custom Function Tool (MCP simulation)
    """

    # PDF downloader
    downloader = LlmAgent(
        name="PDFDownloader",
        model=Gemini(model="gemini-2.5-flash-lite", retry_options=retry_config),
        instruction="""You are a PDF retrieval specialist.

For each paper URL provided:
1. Use download_and_extract_pdf tool to get the PDF content
2. If download fails, note it and continue
3. Store successful extractions

Handle errors gracefully and report statistics.""",
        tools=[FunctionTool(download_and_extract_pdf)],
        output_key="pdf_extraction_results"
    )

    # Validator (checks if we need more attempts)
    validator = LlmAgent(
        name="PDFValidator",
        model=Gemini(model="gemini-2.5-flash-lite", retry_options=retry_config),
        instruction="""Check if PDF retrieval is complete.

Return 'COMPLETE' if all papers processed or max attempts reached.
Otherwise return 'CONTINUE' to retry failed downloads.""",
        output_key="validation_status"
    )

    # Loop agent that iterates until complete
    loop_agent = LoopAgent(
        name="PDFRetrievalLoop",
        sub_agents=[downloader, validator],
        max_iterations=3  # Retry up to 3 times
    )

    logger.info("PDF Retrieval Loop Agent created")
    return loop_agent

# ============================================================================
# SECTION 7: AGENT 4 - PER-PAPER SUMMARIZATION (PARALLEL)
# ============================================================================

def create_summarization_agents() -> ParallelAgent:
    """
    Creates parallel summarization agents for multiple papers.

    Pattern: Parallel Agents
    Each paper gets summarized independently for speed.
    """

    summarizer = LlmAgent(
        name="PaperSummarizer",
        model=Gemini(model="gemini-2.5-flash-lite", retry_options=retry_config),
        instruction="""You are an expert at academic paper summarization.

For each paper's full text, generate:

1. **Micro Summary** (20 words): One-sentence essence
2. **Long Summary** (150 words): Comprehensive overview
3. **Methodology**: Research methods used
4. **Findings**: Key results and discoveries
5. **Contributions**: Novel contributions to the field
6. **Limitations**: Acknowledged limitations
7. **Relevance**: Why this paper matters to the topic

Be precise, objective, and academically rigorous.

Format as JSON:
{
    "micro_summary": "...",
    "long_summary": "...",
    "methodology": "...",
    "findings": "...",
    "contributions": "...",
    "limitations": "...",
    "relevance_notes": "..."
}""",
        tools=[],
        output_key="paper_summary"
    )

    # In practice, create multiple instances for parallel processing
    parallel_summarizers = ParallelAgent(
        name="ParallelSummarization",
        sub_agents=[summarizer]  # Would replicate for N papers
    )

    logger.info("Summarization Agents created")
    return parallel_summarizers

# ============================================================================
# SECTION 8: AGENT 5 - THEMATIC CLUSTERING AGENT
# ============================================================================

def create_clustering_agent() -> LlmAgent:
    """
    Creates the thematic clustering agent.

    This agent uses CODE EXECUTION (clustering algorithm) + LLM labeling.

    Pattern: Agent with Code Execution Tool
    Tools: Custom clustering function, LLM for naming
    """

    agent = LlmAgent(
        name="ThematicClusteringAgent",
        model=Gemini(model="gemini-2.5-flash-lite", retry_options=retry_config),
        instruction="""You are an expert at identifying thematic patterns in research.

Your task:
1. Take paper summaries and their embeddings
2. Use cluster_embeddings tool to group similar papers
3. For each cluster, analyze the papers and:
   - Generate a descriptive theme label
   - Write a 100-word description
   - List common methodologies
   - Note shared limitations

Output format:
{
    "themes": [
        {
            "theme_id": 0,
            "label": "Theme Name",
            "description": "...",
            "paper_indices": [0, 3, 5],
            "common_methods": [...],
            "common_limitations": [...]
        },
        ...
    ]
}""",
        tools=[FunctionTool(cluster_embeddings)],
        code_executor=BuiltInCodeExecutor(),  # For embedding generation if needed
        output_key="themes"
    )

    logger.info("Clustering Agent created")
    return agent

# ============================================================================
# SECTION 9: AGENT 6 - COMPARATIVE ANALYSIS (SEQUENTIAL)
# ============================================================================

def create_comparative_analysis_agent() -> SequentialAgent:
    """
    Creates sequential comparative analysis pipeline.

    Pattern: Sequential Agents
    Each theme gets analyzed in order.
    """

    # Theme analyzer
    theme_analyzer = LlmAgent(
        name="ThemeAnalyzer",
        model=Gemini(model="gemini-2.5-flash-lite", retry_options=retry_config),
        instruction="""Analyze papers within each theme.

For each theme:
1. Compare methodologies across papers
2. Contrast results and findings
3. Note contradictions or inconsistencies
4. Identify common data sources/limitations
5. Highlight unique contributions

Create a comparison matrix.""",
        output_key="theme_analysis"
    )

    # Cross-theme comparator
    cross_comparator = LlmAgent(
        name="CrossThemeComparator",
        model=Gemini(model="gemini-2.5-flash-lite", retry_options=retry_config),
        instruction="""Compare findings across different themes.

Input: {theme_analysis}

Identify:
- Common patterns across themes
- Divergent approaches
- Complementary findings
- Contradictory results""",
        output_key="cross_theme_comparison"
    )

    sequential_analysis = SequentialAgent(
        name="ComparativeAnalysisPipeline",
        sub_agents=[theme_analyzer, cross_comparator]
    )

    logger.info("Comparative Analysis Agent created")
    return sequential_analysis

# ============================================================================
# SECTION 10: AGENT 7 - RESEARCH GAP IDENTIFICATION
# ============================================================================

def create_gap_identification_agent() -> LlmAgent:
    """
    Creates the research gap identification agent.

    This is a synthesis agent that identifies what's missing.
    """

    agent = LlmAgent(
        name="ResearchGapAgent",
        model=Gemini(model="gemini-2.5-flash-lite", retry_options=retry_config),
        instruction="""You are an expert at identifying research gaps.

Analyze all themes and papers to identify:

1. **Methodological gaps**: Underexplored methods or approaches
2. **Empirical gaps**: Lack of data, studies, or evidence
3. **Theoretical gaps**: Underdeveloped concepts or frameworks
4. **Geographical/contextual gaps**: Underrepresented contexts

For each gap:
- Describe it clearly
- Provide evidence from the papers
- Suggest 2-3 specific research questions

Format as JSON:
{
    "gaps": [
        {
            "gap_type": "methodological",
            "description": "...",
            "evidence": ["paper1 doesn't address X", ...],
            "suggested_questions": ["How can we...", ...]
        },
        ...
    ]
}""",
        output_key="research_gaps"
    )

    logger.info("Gap Identification Agent created")
    return agent

# ============================================================================
# SECTION 11: AGENT 8 - LITERATURE REVIEW WRITER (WITH RAG)
# ============================================================================

def create_review_writer_agent(vector_store) -> LlmAgent:
    """
    Creates the main literature review writing agent.

    This demonstrates:
    - RAG (Retrieval Augmented Generation)
    - Context engineering and compaction
    - Memory integration

    Pattern: LLM Agent with RAG
    """

    agent = LlmAgent(
        name="LiteratureReviewWriter",
        model=Gemini(model="gemini-2.5-flash-lite", retry_options=retry_config),
        instruction="""You are an expert academic writer specializing in literature reviews.

Write a comprehensive, well-structured literature review with:

1. **Introduction** (200 words)
   - Context and importance of the topic
   - Scope of the review
   - Organization of the review

2. **Overview of Key Papers** (300 words)
   - Highlight seminal works
   - Mention key authors and their contributions
   - Reference paper IDs: [PAPER_ID]

3. **Thematic Analysis** (500 words)
   - Discuss each major theme
   - Compare and contrast within themes
   - Reference specific papers

4. **Comparative Discussion** (400 words)
   - Cross-theme comparisons
   - Methodological variations
   - Conflicting or complementary findings

5. **Research Gaps** (300 words)
   - Identified gaps from analysis
   - Implications for future research

6. **Conclusion** (200 words)
   - Synthesis of findings
   - Future directions

Use formal academic tone. Ensure coherent flow.
Reference papers by their IDs for later citation formatting.""",
        tools=[],  # RAG retrieval would be added here
        output_key="literature_review_draft"
    )

    logger.info("Review Writer Agent created")
    return agent

# ============================================================================
# SECTION 12: AGENT 9 - CITATION & BIBLIOGRAPHY FORMATTER
# ============================================================================

def create_citation_agent() -> LlmAgent:
    """
    Creates the citation formatting agent.

    Pattern: Deterministic tool-based agent
    Tools: Custom citation formatter
    """

    agent = LlmAgent(
        name="CitationFormatter",
        model=Gemini(model="gemini-2.5-flash-lite", retry_options=retry_config),
        instruction="""You are a citation formatting specialist.

Tasks:
1. Find all paper ID markers [PAPER_ID] in the text
2. Use format_citation tool to format each citation in APA style
3. Replace markers with proper in-text citations (Author, Year)
4. Generate a complete reference list at the end
5. Create a .bib file

Ensure consistency and accuracy.""",
        tools=[FunctionTool(format_citation)],
        output_key="formatted_review"
    )

    logger.info("Citation Agent created")
    return agent

# ============================================================================
# SECTION 13: AGENT 10 - FINAL OUTPUT GENERATOR
# ============================================================================

def create_output_generator_agent() -> LlmAgent:
    """
    Creates the final output generation agent.

    Pattern: Assembly agent with multiple output formats
    Tools: PDF generation, diagram creation
    """

    agent = LlmAgent(
        name="OutputGenerator",
        model=Gemini(model="gemini-2.5-flash-lite", retry_options=retry_config),
        instruction="""You are a document assembly specialist.

Generate final outputs:

1. **PDF Literature Review**
   - Use generate_pdf_output tool
   - Professional formatting
   - Embedded tables and figures

2. **Separate Reference List**
   - Both PDF and .bib format

3. **Supplementary Materials**
   - Cluster diagram (theme visualization)
   - Comparison tables
   - Research gaps summary sheet

Ensure all files are properly named and organized.""",
        tools=[FunctionTool(generate_pdf_output)],
        code_executor=BuiltInCodeExecutor(),  # For diagram generation
        output_key="final_outputs"
    )

    logger.info("Output Generator Agent created")
    return agent

# ============================================================================
# SECTION 14: ORCHESTRATOR AGENT
# ============================================================================

def create_orchestrator_agent(sub_agents: List) -> LlmAgent:
    """
    Creates the main orchestrator agent that coordinates all specialists.

    This is the central coordinator demonstrating:
    - Multi-agent orchestration
    - Workflow management
    - State tracking

    Pattern: LLM-based orchestrator with agent tools
    """

    # Convert all sub-agents to AgentTools
    agent_tools = [AgentTool(agent) for agent in sub_agents]

    orchestrator = LlmAgent(
        name="LiteratureReviewOrchestrator",
        model=Gemini(model="gemini-2.5-flash-lite", retry_options=retry_config),
        instruction="""You are the master coordinator for literature review generation.

Your workflow (MUST execute in this order):

1. Call TopicUnderstandingAgent with user's topic
2. Call ParallelPaperSearch with search queries from step 1
3. Call PDFRetrievalLoop with paper URLs from step 2
4. Call ParallelSummarization with extracted PDFs from step 3
5. Call ThematicClusteringAgent with summaries from step 4
6. Call ComparativeAnalysisPipeline with themes from step 5
7. Call ResearchGapAgent with all analysis from step 6
8. Call LiteratureReviewWriter with all artifacts from steps 1-7
9. Call CitationFormatter with draft from step 8
10. Call OutputGenerator with formatted review from step 9

Track status after each step. Handle errors gracefully.
Report progress to the user.

If any step fails, diagnose the issue and either:
- Retry with adjusted parameters
- Skip and continue (if non-critical)
- Abort and report (if critical)

Maintain state across all steps.""",
        tools=agent_tools,
        output_key="orchestration_result"
    )

    logger.info("Orchestrator Agent created with all sub-agents as tools")
    return orchestrator

# ============================================================================
# SECTION 15: MEMORY BANK SETUP (CORRECTED)
# ============================================================================
# This demonstrates LONG-TERM MEMORY from Day 3

def setup_memory_bank(project_id: str = None):
    """
    Sets up long-term memory for the system.

    This demonstrates MEMORY BANK from Day 3.

    For production with Vertex AI:
    - Uses VertexAiMemoryBankService
    - Stores user preferences, past topics, canonical papers

    For development/demo:
    - Uses InMemoryMemoryService
    - Stores data in application memory

    Args:
        project_id: Google Cloud project ID (optional, for Vertex AI)

    Returns:
        Memory service instance
    """

    # For production deployment to Vertex AI Agent Engine
    if project_id and os.environ.get("USE_VERTEX_AI_MEMORY") == "1":
        try:
            logger.info("Initializing Vertex AI Memory Bank Service...")

            # This requires deployment to Agent Engine
            # See: https://google.github.io/adk-docs/sessions/memory/
            memory_service = VertexAiMemoryBankService(
                project=project_id,
                location="us-central1",  # Or your preferred region
                # agent_engine_id will be set during deployment
            )

            logger.info("‚úÖ Vertex AI Memory Bank initialized")
            return memory_service

        except Exception as e:
            logger.warning(f"Could not initialize Vertex AI Memory Bank: {e}")
            logger.info("Falling back to InMemoryMemoryService")

    # For development, testing, and this demo
    logger.info("Initializing In-Memory Memory Service...")
    memory_service = InMemoryMemoryService()

    logger.info("‚úÖ In-Memory Memory Service initialized")
    logger.info("üìù Note: Memories stored in application memory (not persistent)")

    return memory_service

# Create a callback for automatic memory saving
async def auto_save_session_to_memory_callback(callback_context):
    """
    Callback to automatically save sessions to Memory Bank.

    This is called after each agent turn to extract and store
    important information for long-term memory.

    Pattern from ADK docs:
    https://google.github.io/adk-docs/sessions/memory/
    """
    try:
        await callback_context._invocation_context.memory_service.add_session_to_memory(
            callback_context._invocation_context.session
        )
        logger.info("‚úÖ Session automatically saved to memory bank")
    except Exception as e:
        logger.warning(f"Could not save session to memory: {e}")

# ============================================================================
# SECTION 16: VECTOR STORE SETUP
# ============================================================================

class SimpleVectorStore:
    """
    Simple in-memory vector store for paper embeddings.

    In production, use:
    - Vertex AI Vector Search
    - Pinecone
    - Weaviate
    - ChromaDB
    """

    def __init__(self):
        self.embeddings = []
        self.metadata = []
        self.index_to_id = {}
        logger.info("Vector Store initialized")

    def add(self, paper_id: str, embedding: np.ndarray, metadata: Dict):
        """Add a paper embedding to the store."""
        idx = len(self.embeddings)
        self.embeddings.append(embedding)
        self.metadata.append(metadata)
        self.index_to_id[idx] = paper_id
        logger.debug(f"Added to vector store: {paper_id}")

    def search(self, query_embedding: np.ndarray, top_k: int = 5) -> List[Dict]:
        """Semantic search for similar papers."""
        if not self.embeddings:
            return []

        # Calculate cosine similarity
        similarities = cosine_similarity([query_embedding], self.embeddings)[0]

        # Get top-k indices
        top_indices = np.argsort(similarities)[-top_k:][::-1]

        results = []
        for idx in top_indices:
            results.append({
                "paper_id": self.index_to_id[idx],
                "similarity": float(similarities[idx]),
                "metadata": self.metadata[idx]
            })

        return results

    def get_all_embeddings(self) -> List[np.ndarray]:
        """Get all embeddings for clustering."""
        return self.embeddings

# ============================================================================
# SECTION 17: OBSERVABILITY SETUP
# ============================================================================
# This demonstrates OBSERVABILITY from Day 4

class ObservabilitySystem:
    """
    Comprehensive observability system.

    This demonstrates LOGGING, TRACING, and METRICS from Day 4.

    Components:
    - Logger: Structured logging for all events
    - Tracer: End-to-end traces for workflow execution
    - Metrics: Performance and quality metrics
    """

    def __init__(self, run_id: str):
        self.run_id = run_id
        self.logger = logging.getLogger(f"LitReview.{run_id}")
        self.traces = []
        self.metrics = {
            "papers_found": 0,
            "papers_downloaded": 0,
            "papers_summarized": 0,
            "themes_identified": 0,
            "gaps_identified": 0,
            "total_duration_seconds": 0,
            "agent_call_counts": {},
            "error_counts": {},
        }
        self.start_time = datetime.now()

    def log_event(self, event_type: str, details: Dict):
        """Log a structured event."""
        self.logger.info(f"[{event_type}] {json.dumps(details)}")

    def start_trace(self, agent_name: str, operation: str) -> str:
        """Start a new trace span."""
        trace_id = str(uuid.uuid4())
        self.traces.append({
            "trace_id": trace_id,
            "agent": agent_name,
            "operation": operation,
            "start_time": datetime.now(),
            "end_time": None,
            "status": "running",
            "duration_ms": None
        })
        self.log_event("TRACE_START", {
            "trace_id": trace_id,
            "agent": agent_name,
            "operation": operation
        })
        return trace_id

    def end_trace(self, trace_id: str, status: str = "success", error: str = None):
        """End a trace span."""
        for trace in self.traces:
            if trace["trace_id"] == trace_id:
                trace["end_time"] = datetime.now()
                trace["status"] = status
                trace["duration_ms"] = (
                    trace["end_time"] - trace["start_time"]
                ).total_seconds() * 1000
                if error:
                    trace["error"] = error

                self.log_event("TRACE_END", {
                    "trace_id": trace_id,
                    "status": status,
                    "duration_ms": trace["duration_ms"]
                })
                break

    def record_metric(self, metric_name: str, value: Any):
        """Record a metric."""
        self.metrics[metric_name] = value
        self.log_event("METRIC", {
            "metric": metric_name,
            "value": value
        })

    def increment_counter(self, counter_name: str, increment: int = 1):
        """Increment a counter metric."""
        if counter_name not in self.metrics:
            self.metrics[counter_name] = 0
        self.metrics[counter_name] += increment

    def get_summary(self) -> Dict:
        """Get observability summary."""
        end_time = datetime.now()
        total_duration = (end_time - self.start_time).total_seconds()

        return {
            "run_id": self.run_id,
            "total_duration_seconds": total_duration,
            "metrics": self.metrics,
            "trace_count": len(self.traces),
            "successful_traces": len([t for t in self.traces if t["status"] == "success"]),
            "failed_traces": len([t for t in self.traces if t["status"] == "failed"])
        }

# ============================================================================
# SECTION 18: EVALUATION FRAMEWORK
# ============================================================================
# This demonstrates AGENT EVALUATION from Day 4

class LiteratureReviewEvaluator:
    """
    Evaluation framework for literature review quality.

    This demonstrates EVALUATION from Day 4.

    Metrics:
    1. Coverage: How many relevant papers found?
    2. Cluster Coherence: Are themes meaningful?
    3. Writing Quality: Is the review well-structured?
    4. Citation Accuracy: Are citations correct?
    5. Gap Identification: Are gaps valid and useful?
    """

    def __init__(self):
        self.scores = {}

    def evaluate_coverage(self, papers: List[Paper], topic: str) -> float:
        """
        Evaluate search coverage.

        Metrics:
        - Number of papers found
        - Source diversity
        - Recency of papers
        - Citation counts
        """
        if not papers:
            return 0.0

        # Calculate coverage score
        num_papers = len(papers)
        sources = set(p.source for p in papers)
        recent_papers = [p for p in papers if p.year >= 2020]
        highly_cited = [p for p in papers if hasattr(p, 'citations') and p.citations > 20]

        coverage_score = min(1.0, (
            (num_papers / 50) * 0.4 +  # Target: 50 papers
            (len(sources) / 3) * 0.2 +  # All 3 sources
            (len(recent_papers) / num_papers) * 0.2 +  # Recency
            (len(highly_cited) / num_papers) * 0.2  # Impact
        ))

        self.scores['coverage'] = coverage_score
        logger.info(f"Coverage Score: {coverage_score:.2f}")
        return coverage_score

    def evaluate_cluster_coherence(self, themes: List[Theme], papers: List[Paper]) -> float:
        """
        Evaluate thematic clustering quality.

        Uses:
        - Silhouette score (if embeddings available)
        - Theme size balance
        - Within-cluster similarity
        """
        if not themes or not papers:
            return 0.0

        # Simple heuristic: balanced cluster sizes
        cluster_sizes = [len(t.paper_ids) for t in themes]
        mean_size = np.mean(cluster_sizes)
        std_size = np.std(cluster_sizes)

        # Prefer balanced clusters
        balance_score = 1.0 - min(1.0, std_size / (mean_size + 1))

        # Check if each theme has meaningful label
        labeled_themes = [t for t in themes if t.label and len(t.label) > 5]
        label_score = len(labeled_themes) / len(themes)

        coherence_score = (balance_score * 0.5 + label_score * 0.5)

        self.scores['coherence'] = coherence_score
        logger.info(f"Cluster Coherence Score: {coherence_score:.2f}")
        return coherence_score

    def evaluate_writing_quality(self, review_text: str) -> float:
        """
        Evaluate literature review writing quality.

        Checks:
        - Structure (sections present)
        - Length (comprehensive)
        - Citation density
        - Academic tone
        """
        if not review_text:
            return 0.0

        # Check for expected sections
        required_sections = ['introduction', 'methodology', 'findings', 'gaps', 'conclusion']
        sections_present = sum(
            1 for section in required_sections
            if section.lower() in review_text.lower()
        )
        structure_score = sections_present / len(required_sections)

        # Check length (target: 1500-2500 words)
        word_count = len(review_text.split())
        length_score = min(1.0, word_count / 2000)

        # Check citation density (rough heuristic)
        citation_markers = review_text.count('[') + review_text.count('(')
        citation_score = min(1.0, citation_markers / 30)  # Target: 30+ citations

        writing_score = (
            structure_score * 0.4 +
            length_score * 0.3 +
            citation_score * 0.3
        )

        self.scores['writing_quality'] = writing_score
        logger.info(f"Writing Quality Score: {writing_score:.2f}")
        return writing_score

    def evaluate_gap_identification(self, gaps: List[ResearchGap]) -> float:
        """
        Evaluate research gap identification.

        Checks:
        - Number of gaps found
        - Gap types diversity
        - Evidence provided
        - Research questions quality
        """
        if not gaps:
            return 0.0

        # Number of gaps (target: 3-7)
        num_score = min(1.0, len(gaps) / 5)

        # Type diversity
        gap_types = set(g.gap_type for g in gaps)
        diversity_score = len(gap_types) / 4  # 4 possible types

        # Evidence and questions
        gaps_with_evidence = [g for g in gaps if g.evidence]
        gaps_with_questions = [g for g in gaps if g.suggested_questions]

        evidence_score = len(gaps_with_evidence) / len(gaps)
        question_score = len(gaps_with_questions) / len(gaps)

        gap_score = (
            num_score * 0.25 +
            diversity_score * 0.25 +
            evidence_score * 0.25 +
            question_score * 0.25
        )

        self.scores['gap_quality'] = gap_score
        logger.info(f"Gap Identification Score: {gap_score:.2f}")
        return gap_score

    def get_overall_score(self) -> Dict:
        """Calculate overall quality score."""
        if not self.scores:
            return {"overall": 0.0, "breakdown": {}}

        overall = np.mean(list(self.scores.values()))

        return {
            "overall": overall,
            "breakdown": self.scores,
            "grade": self._get_grade(overall)
        }

    def _get_grade(self, score: float) -> str:
        """Convert score to letter grade."""
        if score >= 0.9:
            return "A"
        elif score >= 0.8:
            return "B"
        elif score >= 0.7:
            return "C"
        elif score >= 0.6:
            return "D"
        else:
            return "F"

# ============================================================================
# SECTION 19: MAIN WORKFLOW ORCHESTRATION
# ============================================================================

class LiteratureReviewSystem:
    """
    Main system class that orchestrates the entire pipeline.

    This is the complete implementation demonstrating:
    - Multi-agent orchestration
    - State management
    - Memory integration
    - Observability
    - Evaluation
    """

    def __init__(self, project_id: str = "literature-review-system"):
        self.project_id = project_id

        # Initialize infrastructure
        self.session_service = InMemorySessionService()
        self.memory_service = setup_memory_bank(project_id)  # Corrected
        self.vector_store = SimpleVectorStore()

        # Create all agents
        logger.info("Initializing all agents...")
        self.topic_agent = create_topic_understanding_agent()
        self.search_agents = create_paper_search_agents()
        self.pdf_agent = create_pdf_retrieval_agent()
        self.summary_agents = create_summarization_agents()
        self.clustering_agent = create_clustering_agent()
        self.analysis_agent = create_comparative_analysis_agent()
        self.gap_agent = create_gap_identification_agent()
        self.writer_agent = create_review_writer_agent(self.vector_store)
        self.citation_agent = create_citation_agent()
        self.output_agent = create_output_generator_agent()

        # Create orchestrator with all sub-agents
        all_agents = [
            self.topic_agent,
            self.search_agents,
            self.pdf_agent,
            self.summary_agents,
            self.clustering_agent,
            self.analysis_agent,
            self.gap_agent,
            self.writer_agent,
            self.citation_agent,
            self.output_agent
        ]
        self.orchestrator = create_orchestrator_agent(all_agents)

        # Wrap in resumable app for long-running operations
        self.app = App(
            name="literature_review_app",
            root_agent=self.orchestrator,
            resumability_config=ResumabilityConfig(is_resumable=True)
        )

        # Create runner with memory service
        self.runner = Runner(
            app=self.app,
            session_service=self.session_service,
            memory_service=self.memory_service  # Added memory service
        )

        logger.info("‚úÖ Literature Review System initialized")
        logger.info(f"üìö Using {type(self.memory_service).__name__} for memory")

    async def generate_review(
        self,
        topic: str,
        user_id: str = "default_user",
        citation_style: str = "APA"
    ) -> LiteratureReviewState:
        """
        Main method to generate a complete literature review.

        This executes the full pipeline with observability and evaluation.

        Args:
            topic: Research topic
            user_id: User identifier
            citation_style: Citation format (APA, Harvard, IEEE)

        Returns:
            Complete literature review state with all artifacts
        """

        # Initialize state
        run_id = str(uuid.uuid4())
        state = LiteratureReviewState(
            run_id=run_id,
            user_id=user_id,
            topic=topic
        )

        # Initialize observability
        obs = ObservabilitySystem(run_id)
        obs.log_event("WORKFLOW_START", {"topic": topic, "user_id": user_id})

        # Initialize evaluator
        evaluator = LiteratureReviewEvaluator()

        try:
            # Create session
            session_id = f"lit_review_{run_id}"
            await self.session_service.create_session(
                app_name="literature_review_app",
                user_id=user_id,
                session_id=session_id
            )

            # ================================================================
            # STAGE 1: Topic Understanding
            # ================================================================
            trace_id = obs.start_trace("TopicUnderstandingAgent", "expand_topic")
            logger.info(f"üìö Stage 1: Understanding topic '{topic}'")

            query = types.Content(
                parts=[types.Part(text=f"Analyze this research topic: {topic}")]
            )

            # Execute topic understanding
            events = []
            async for event in self.runner.run_async(
                user_id=user_id,
                session_id=session_id,
                new_message=query
            ):
                events.append(event)

            # Extract topic analysis (simplified for demo)
            state.expanded_topic = f"Expanded analysis of {topic}"
            state.keywords = ["machine learning", "neural networks", "deep learning"]
            state.search_queries = [
                f"{topic} recent advances",
                f"{topic} methodology",
                f"{topic} applications"
            ]
            state.workflow_status["topic_understood"] = "complete"

            obs.end_trace(trace_id, "success")
            obs.increment_counter("stages_completed")

            # ================================================================
            # STAGE 2: Paper Search (Parallel)
            # ================================================================
            trace_id = obs.start_trace("ParallelPaperSearch", "search_papers")
            logger.info(f"üîç Stage 2: Searching for papers across sources")

            # Simulate paper search results
            mock_papers = [
                Paper(
                    paper_id=f"paper_{i}",
                    title=f"Research on {topic} - Paper {i}",
                    authors=["Smith, J.", "Doe, A."],
                    year=2023 + (i % 2),
                    journal="Journal of AI Research",
                    abstract=f"This paper explores {topic}...",
                    url=f"https://example.com/paper{i}",
                    source="google_scholar",
                    score=0.9 - (i * 0.05)
                )
                for i in range(10)
            ]

            state.papers = {p.paper_id: p for p in mock_papers}
            state.workflow_status["papers_fetched"] = "complete"
            obs.record_metric("papers_found", len(mock_papers))
            obs.end_trace(trace_id, "success")

            # ================================================================
            # STAGE 3: PDF Retrieval (Loop)
            # ================================================================
            trace_id = obs.start_trace("PDFRetrievalLoop", "download_pdfs")
            logger.info(f"üìÑ Stage 3: Retrieving and extracting PDFs")

            # Simulate PDF extraction
            for paper in state.papers.values():
                extraction = download_and_extract_pdf(paper.url)
                if extraction["status"] == "success":
                    paper.full_text = extraction["full_text"]
                    paper.sections = extraction["sections"]
                    obs.increment_counter("papers_downloaded")

            state.workflow_status["pdfs_retrieved"] = "complete"
            obs.end_trace(trace_id, "success")

            # ================================================================
            # STAGE 4: Summarization (Parallel)
            # ================================================================
            trace_id = obs.start_trace("ParallelSummarization", "summarize_papers")
            logger.info(f"üìù Stage 4: Generating paper summaries")

            # Simulate summarization
            for paper in state.papers.values():
                paper.micro_summary = f"Brief summary of {paper.title}"
                paper.long_summary = f"Detailed analysis of {paper.title}..."
                paper.methodology = "Mixed methods approach"
                paper.findings = "Significant results found"
                paper.contributions = "Novel contribution to field"
                paper.limitations = "Limited sample size"
                paper.relevance_notes = f"Highly relevant to {topic}"

                # Generate mock embedding
                paper.embedding = np.random.rand(768)
                self.vector_store.add(paper.paper_id, paper.embedding, {
                    "title": paper.title,
                    "year": paper.year
                })

                obs.increment_counter("papers_summarized")

            state.workflow_status["summaries_done"] = "complete"
            obs.end_trace(trace_id, "success")

            # ================================================================
            # STAGE 5: Thematic Clustering
            # ================================================================
            trace_id = obs.start_trace("ThematicClusteringAgent", "cluster_papers")
            logger.info(f"üéØ Stage 5: Identifying themes")

            # Get all embeddings
            embeddings = [p.embedding.tolist() for p in state.papers.values()]

            # Cluster
            cluster_result = cluster_embeddings(embeddings, n_clusters=3)

            # Create themes
            for cluster_id in range(cluster_result["n_clusters"]):
                paper_indices = cluster_result["cluster_stats"][cluster_id]["paper_indices"]
                paper_ids = [list(state.papers.keys())[idx] for idx in paper_indices]

                theme = Theme(
                    theme_id=cluster_id,
                    label=f"Theme {cluster_id + 1}: {topic} Aspect {cluster_id + 1}",
                    description=f"Papers focusing on specific aspect of {topic}",
                    paper_ids=paper_ids
                )
                state.themes.append(theme)

                # Assign theme to papers
                for paper_id in paper_ids:
                    state.papers[paper_id].theme_id = cluster_id

            state.workflow_status["themes_identified"] = "complete"
            obs.record_metric("themes_identified", len(state.themes))
            obs.end_trace(trace_id, "success")

            # ================================================================
            # STAGE 6: Comparative Analysis (Sequential)
            # ================================================================
            trace_id = obs.start_trace("ComparativeAnalysis", "analyze_themes")
            logger.info(f"üî¨ Stage 6: Comparative analysis")

            for theme in state.themes:
                theme.comparison_matrix = {
                    "methods": ["Method A", "Method B", "Method C"],
                    "data_sources": ["Dataset 1", "Dataset 2"],
                    "results": "Mixed results across papers"
                }
                theme.narrative_summary = f"Analysis of {theme.label}"
                theme.common_limitations = ["Small sample", "Limited scope"]
                theme.best_practices = ["Best practice 1", "Best practice 2"]

            state.workflow_status["analysis_complete"] = "complete"
            obs.end_trace(trace_id, "success")

            # ================================================================
            # STAGE 7: Research Gap Identification
            # ================================================================
            trace_id = obs.start_trace("ResearchGapAgent", "identify_gaps")
            logger.info(f"üîç Stage 7: Identifying research gaps")

            # Identify gaps
            gaps = [
                ResearchGap(
                    gap_type="methodological",
                    description=f"Limited exploration of novel methods in {topic}",
                    evidence=["Most papers use traditional approaches"],
                    suggested_questions=[
                        f"How can we apply emerging methods to {topic}?",
                        "What are the limitations of current approaches?"
                    ]
                ),
                ResearchGap(
                    gap_type="empirical",
                    description=f"Lack of large-scale studies in {topic}",
                    evidence=["Small sample sizes across papers"],
                    suggested_questions=[
                        "Can we conduct larger-scale validation?",
                        "What would a comprehensive dataset look like?"
                    ]
                )
            ]

            state.research_gaps = gaps
            state.workflow_status["gaps_identified"] = "complete"
            obs.record_metric("gaps_identified", len(gaps))
            obs.end_trace(trace_id, "success")

            # ================================================================
            # STAGE 8: Literature Review Writing
            # ================================================================
            trace_id = obs.start_trace("LiteratureReviewWriter", "write_review")
            logger.info(f"‚úçÔ∏è Stage 8: Writing literature review")

            # Generate review (simplified for demo)
            review_text = f"""
# Literature Review: {topic}

## Introduction

This comprehensive literature review examines the current state of research in {state.expanded_topic}.
Based on analysis of {len(state.papers)} papers across {len(state.themes)} major themes, this review
identifies key trends, methodologies, and research gaps in the field.

## Overview of Key Papers

The reviewed literature spans from {min(p.year for p in state.papers.values())} to {max(p.year for p in state.papers.values())},
representing work from leading researchers including {', '.join(state.papers[list(state.papers.keys())[0]].authors[:2])}.

## Thematic Analysis

### {state.themes[0].label if state.themes else 'Theme 1'}
{state.themes[0].description if state.themes else 'Analysis of first theme...'}

Papers in this theme: {len(state.themes[0].paper_ids) if state.themes else 0}

### {state.themes[1].label if len(state.themes) > 1 else 'Theme 2'}
{state.themes[1].description if len(state.themes) > 1 else 'Analysis of second theme...'}

## Comparative Discussion

Cross-theme analysis reveals several important patterns. Methodologically, most studies employ
{', '.join(['qualitative methods', 'quantitative approaches', 'mixed methods'])}, with varying
degrees of success.

## Research Gaps

Our analysis identified {len(state.research_gaps)} significant research gaps:

1. **{state.research_gaps[0].gap_type.title()} Gap**: {state.research_gaps[0].description}
2. **{state.research_gaps[1].gap_type.title()} Gap**: {state.research_gaps[1].description}

## Conclusion

This review synthesizes current knowledge in {topic} and highlights promising directions for
future research. The identified gaps present opportunities for meaningful contributions to the field.
"""

            state.literature_review_draft = review_text
            state.workflow_status["review_written"] = "complete"
            obs.end_trace(trace_id, "success")

            # ================================================================
            # STAGE 9: Citation Formatting
            # ================================================================
            trace_id = obs.start_trace("CitationFormatter", "format_citations")
            logger.info(f"üìö Stage 9: Formatting citations")

            # Generate bibliography
            bibliography_entries = []
            for paper in list(state.papers.values())[:5]:  # First 5 for demo
                citation = format_citation({
                    "authors": paper.authors,
                    "title": paper.title,
                    "year": paper.year,
                    "journal": paper.journal
                }, citation_style)
                bibliography_entries.append(citation["citation"])

            state.bibliography = "\n\n".join(bibliography_entries)
            state.formatted_review = state.literature_review_draft + "\n\n## References\n\n" + state.bibliography
            state.workflow_status["citations_formatted"] = "complete"
            obs.end_trace(trace_id, "success")

            # ================================================================
            # STAGE 10: Output Generation
            # ================================================================
            trace_id = obs.start_trace("OutputGenerator", "generate_outputs")
            logger.info(f"üì¶ Stage 10: Generating final outputs")

            # Generate outputs
            pdf_result = generate_pdf_output(state.formatted_review, f"literature_review_{run_id}.pdf")

            state.workflow_status["output_generated"] = "complete"
            obs.end_trace(trace_id, "success")

            # ================================================================
            # EVALUATION
            # ================================================================
            logger.info(f"üìä Evaluating review quality")

            evaluator.evaluate_coverage(list(state.papers.values()), topic)
            evaluator.evaluate_cluster_coherence(state.themes, list(state.papers.values()))
            evaluator.evaluate_writing_quality(state.formatted_review)
            evaluator.evaluate_gap_identification(state.research_gaps)

            eval_results = evaluator.get_overall_score()
            state.metrics['evaluation'] = eval_results

            # ================================================================
            # FINALIZE
            # ================================================================
            state.end_time = datetime.now()
            obs_summary = obs.get_summary()
            state.metrics['observability'] = obs_summary

            obs.log_event("WORKFLOW_COMPLETE", {
                "status": "success",
                "evaluation": eval_results,
                "observability": obs_summary
            })

            logger.info(f"‚úÖ Literature review generation complete!")
            logger.info(f"üìä Overall Quality Score: {eval_results['overall']:.2f} (Grade: {eval_results['grade']})")
            logger.info(f"‚è±Ô∏è Total Duration: {obs_summary['total_duration_seconds']:.1f}s")

            # Store in memory service for future use
            # This uses the corrected memory service API
            try:
                # Create a simple dictionary to store as memory
                memory_data = {
                    "topic": topic,
                    "themes": [t.label for t in state.themes],
                    "evaluation": eval_results,
                    "timestamp": datetime.now().isoformat()
                }

                # In production with VertexAiMemoryBankService, this would
                # automatically extract memories from the session
                # For now, we just log it
                logger.info(f"üìù Memory stored: Topic '{topic}' with {len(state.themes)} themes")

            except Exception as e:
                logger.warning(f"Could not store to memory: {e}")

            return state

        except Exception as e:
            logger.error(f"‚ùå Error in workflow: {str(e)}")
            obs.log_event("WORKFLOW_ERROR", {"error": str(e)})
            raise

    def print_summary(self, state: LiteratureReviewState, export_pdf: bool = True):
        """
        Print a human-readable summary of the results.

        Args:
            state: LiteratureReviewState with results
            export_pdf: If True, automatically generate PDF report
        """

        print("\n" + "="*80)
        print("üìö LITERATURE REVIEW GENERATION SUMMARY")
        print("="*80)

        print(f"\nüéØ Topic: {state.topic}")
        print(f"üÜî Run ID: {state.run_id}")
        print(f"üë§ User ID: {state.user_id}")

        print(f"\nüìä STATISTICS:")
        print(f"  ‚Ä¢ Papers Found: {len(state.papers)}")
        print(f"  ‚Ä¢ Papers Downloaded: {sum(1 for p in state.papers.values() if p.full_text)}")
        print(f"  ‚Ä¢ Themes Identified: {len(state.themes)}")
        print(f"  ‚Ä¢ Research Gaps: {len(state.research_gaps)}")

        if state.themes:
            print(f"\nüé® THEMES IDENTIFIED:")
            for i, theme in enumerate(state.themes, 1):
                print(f"  {i}. {theme.label}")
                print(f"     Papers: {len(theme.paper_ids)}")

        if state.research_gaps:
            print(f"\nüîç RESEARCH GAPS:")
            for i, gap in enumerate(state.research_gaps, 1):
                print(f"  {i}. [{gap.gap_type.upper()}] {gap.description}")

        if 'evaluation' in state.metrics:
            eval_data = state.metrics['evaluation']
            print(f"\nüìà QUALITY EVALUATION:")
            print(f"  Overall Score: {eval_data['overall']:.2f} (Grade: {eval_data['grade']})")
            print(f"  Breakdown:")
            for metric, score in eval_data['breakdown'].items():
                print(f"    ‚Ä¢ {metric}: {score:.2f}")

        if 'observability' in state.metrics:
            obs_data = state.metrics['observability']
            print(f"\n‚è±Ô∏è PERFORMANCE METRICS:")
            print(f"  Total Duration: {obs_data['total_duration_seconds']:.1f}s")
            print(f"  Successful Operations: {obs_data['successful_traces']}")
            print(f"  Failed Operations: {obs_data['failed_traces']}")

        print(f"\nüìù WORKFLOW STATUS:")
        for stage, status in state.workflow_status.items():
            icon = "‚úÖ" if status == "complete" else "‚è≥"
            print(f"  {icon} {stage.replace('_', ' ').title()}: {status}")

        print("\n" + "="*80)

        # PDF Export Option
        if export_pdf:
            print("\nüìÑ PDF EXPORT")
            print("="*80)

            try:
                pdf_file = create_pdf_report(state)
                print(f"\n‚úÖ PDF report generated successfully!")
                print(f"üìÅ File: {pdf_file}")
                print(f"\nüìã The PDF includes:")
                print("   ‚Ä¢ Executive summary with key statistics")
                print("   ‚Ä¢ Complete literature review text")
                print("   ‚Ä¢ Thematic analysis with cluster details")
                print("   ‚Ä¢ Synthesized findings and patterns")
                print("   ‚Ä¢ Critical evaluation of research")
                print("   ‚Ä¢ Research gaps and future directions")
                print("   ‚Ä¢ Quality metrics and evaluation")
                print("   ‚Ä¢ Complete bibliography")

                # In Colab, offer to download
                try:
                    from google.colab import files
                    download = input("\nDownload PDF now? (yes/no): ").lower().strip()
                    if download in ['yes', 'y']:
                        files.download(pdf_file)
                        print("‚¨áÔ∏è  Download started!")
                except:
                    pass

            except Exception as e:
                print(f"\n‚ö†Ô∏è  Could not generate PDF: {e}")
                print("   Summary is still available in the console output above.")

        print("\n" + "="*80)

# ============================================================================
# SECTION 20: A2A PROTOCOL INTEGRATION
# ============================================================================
# This demonstrates A2A PROTOCOL from Day 5

def expose_as_a2a_agent(system: LiteratureReviewSystem, port: int = 8000):
    """
    Expose the literature review system as an A2A-compatible agent.

    This demonstrates A2A PROTOCOL from Day 5.
    Other agents can now interact with this system over HTTP.

    In production:
    - Use adk.a2a.utils.agent_to_a2a.to_a2a()
    - Deploy with uvicorn
    - Publish agent card at /.well-known/agent-card.json
    """

    from google.adk.a2a.utils.agent_to_a2a import to_a2a

    # Create A2A-compatible app
    a2a_app = to_a2a(system.orchestrator, port=port)

    logger.info(f"‚úÖ Literature Review System exposed via A2A on port {port}")
    logger.info(f"üìã Agent card available at: http://localhost:{port}/.well-known/agent-card.json")

    return a2a_app

# Example of consuming this agent from another system
def create_remote_literature_review_client(url: str):
    """
    Create a client to use remote literature review agent via A2A.

    This demonstrates consuming A2A agents from Day 5.
    """
    from google.adk.agents.remote_a2a_agent import RemoteA2aAgent, AGENT_CARD_WELL_KNOWN_PATH

    remote_agent = RemoteA2aAgent(
        name="remote_literature_review_agent",
        description="Remote literature review generation service",
        agent_card=f"{url}{AGENT_CARD_WELL_KNOWN_PATH}"
    )

    logger.info(f"‚úÖ Connected to remote literature review agent at {url}")
    return remote_agent

# ============================================================================
# SECTION 21: DEPLOYMENT PREPARATION
# ============================================================================
# This demonstrates DEPLOYMENT from Day 5

def prepare_for_deployment(system: LiteratureReviewSystem, project_id: str, region: str):
    """
    Prepare the system for deployment to Vertex AI Agent Engine.

    This demonstrates DEPLOYMENT concepts from Day 5.

    Steps:
    1. Create deployment configuration
    2. Package agent code
    3. Set up environment variables
    4. Configure resource limits
    """

    # Deployment configuration for Agent Engine
    deployment_config = {
        "min_instances": 0,  # Scale to zero when not in use
        "max_instances": 5,  # Max concurrent instances
        "resource_limits": {
            "cpu": "2",
            "memory": "4Gi"  # Literature review is memory-intensive
        }
    }

    # Environment configuration
    env_config = {
        "GOOGLE_CLOUD_PROJECT": project_id,
        "GOOGLE_CLOUD_LOCATION": region,
        "GOOGLE_GENAI_USE_VERTEXAI": "1"
    }

    # Save configs for deployment
    with open(".agent_engine_config.json", "w") as f:
        json.dump(deployment_config, f, indent=2)

    with open(".env", "w") as f:
        for key, value in env_config.items():
            f.write(f"{key}={value}\n")

    logger.info("‚úÖ Deployment configuration created")
    logger.info(f"üì¶ Ready to deploy to project: {project_id}, region: {region}")
    logger.info("üöÄ Deploy command: adk deploy agent_engine --project={project_id} --region={region} .")

    return deployment_config

# ============================================================================
# SECTION 22: DEMONSTRATION & USAGE
# ============================================================================

async def main():
    """
    Main demonstration function showing the complete system in action.
    """

    print("\n" + "="*80)
    print("üéì LITERATURE REVIEW AGENT SYSTEM - CAPSTONE PROJECT DEMONSTRATION")
    print("="*80)

    # Initialize system
    print("\nüîß Initializing Literature Review System...")
    system = LiteratureReviewSystem(project_id="capstone-lit-review")

    # Example topic
    topic = "Machine Learning Applications in Healthcare Diagnostics"

    print(f"\nüìö Generating literature review for topic:")
    print(f"   '{topic}'")
    print("\n‚è≥ Processing... This demonstrates all 10 agents working together.\n")

    # Generate review
    state = await system.generate_review(
        topic=topic,
        user_id="demo_user",
        citation_style="APA"
    )

    # Print summary
    system.print_summary(state)

    # Show sample output
    print("\nüìÑ SAMPLE REVIEW EXCERPT:")
    print("-" * 80)
    excerpt = state.formatted_review[:800] if state.formatted_review else "No review generated"
    print(excerpt + "...\n")

    # Show themes
    if state.themes:
        print("\nüé® DETAILED THEME ANALYSIS:")
        for theme in state.themes[:2]:  # Show first 2 themes
            print(f"\n  Theme: {theme.label}")
            print(f"  Description: {theme.description}")
            print(f"  Papers: {len(theme.paper_ids)}")
            if theme.common_limitations:
                print(f"  Common Limitations: {', '.join(theme.common_limitations)}")

    # Show research gaps
    if state.research_gaps:
        print("\nüîç RESEARCH GAP DETAILS:")
        for gap in state.research_gaps:
            print(f"\n  Type: {gap.gap_type.upper()}")
            print(f"  Description: {gap.description}")
            print(f"  Suggested Questions:")
            for q in gap.suggested_questions:
                print(f"    ‚Ä¢ {q}")

    # Demonstrate A2A exposure
    print("\n" + "="*80)
    print("üåê A2A PROTOCOL INTEGRATION")
    print("="*80)
    print("\nThis system can be exposed as an A2A agent:")
    print("  a2a_app = expose_as_a2a_agent(system, port=8000)")
    print("  # Other agents can then call: http://localhost:8000")

    # Demonstrate deployment readiness
    print("\n" + "="*80)
    print("üöÄ DEPLOYMENT READINESS")
    print("="*80)
    print("\nTo deploy to Vertex AI Agent Engine:")
    deployment_config = prepare_for_deployment(
        system,
        project_id="your-project-id",
        region="us-central1"
    )
    print(f"\n‚úÖ Configuration saved. System ready for production deployment.")

    # Final summary
    print("\n" + "="*80)
    print("üìã CAPSTONE PROJECT FEATURES DEMONSTRATED")
    print("="*80)
    print("""
‚úÖ Multi-Agent System:
   ‚Ä¢ 10 specialized agents + 1 orchestrator
   ‚Ä¢ LLM-powered agents with distinct roles

‚úÖ Agent Patterns:
   ‚Ä¢ Sequential: Comparative analysis pipeline
   ‚Ä¢ Parallel: Multi-source paper search, parallel summarization
   ‚Ä¢ Loop: PDF retrieval with retry logic

‚úÖ Tools (All Types):
   ‚Ä¢ Custom Function Tools: Search, extraction, clustering, citation
   ‚Ä¢ Built-in Tools: Google Search, Code Execution
   ‚Ä¢ MCP Tools: PDF processing (simulated)
   ‚Ä¢ OpenAPI Tools: Scholar, arXiv, Semantic Scholar APIs (simulated)
   ‚Ä¢ Agent Tools: Using agents as tools for orchestration

‚úÖ Long-Running Operations:
   ‚Ä¢ Pause/Resume support via ResumabilityConfig
   ‚Ä¢ Workflow state persistence
   ‚Ä¢ Multi-stage pipeline execution

‚úÖ Sessions & Memory:
   ‚Ä¢ InMemorySessionService for per-run state
   ‚Ä¢ Memory Bank for long-term storage (user prefs, past topics)
   ‚Ä¢ Vector Store for semantic search

‚úÖ Context Engineering:
   ‚Ä¢ RAG pattern for review writing
   ‚Ä¢ Context compaction via summarization
   ‚Ä¢ Embeddings for semantic clustering

‚úÖ Observability:
   ‚Ä¢ Structured logging for all events
   ‚Ä¢ End-to-end tracing with trace IDs
   ‚Ä¢ Performance metrics collection
   ‚Ä¢ Error tracking and reporting

‚úÖ Evaluation:
   ‚Ä¢ Coverage metrics (papers found, sources)
   ‚Ä¢ Cluster coherence scoring
   ‚Ä¢ Writing quality assessment
   ‚Ä¢ Gap identification quality
   ‚Ä¢ Overall quality grading system

‚úÖ A2A Protocol:
   ‚Ä¢ Ready to expose as A2A-compatible agent
   ‚Ä¢ Can consume other A2A agents
   ‚Ä¢ Agent card generation support

‚úÖ Deployment:
   ‚Ä¢ Vertex AI Agent Engine ready
   ‚Ä¢ Configuration files generated
   ‚Ä¢ Environment setup automated
   ‚Ä¢ Resource limits configured

‚úÖ Code Quality:
   ‚Ä¢ Comprehensive comments and docstrings
   ‚Ä¢ Type hints throughout
   ‚Ä¢ Error handling and resilience
   ‚Ä¢ Logging for debugging
   ‚Ä¢ Modular, maintainable architecture
""")

    print("\n" + "="*80)
    print("üéâ DEMONSTRATION COMPLETE")
    print("="*80)
    print("\nThis implementation showcases all concepts from the 5-day ADK bootcamp:")
    print("Day 1: Multi-agent architectures (Sequential, Parallel, Loop)")
    print("Day 2: Tools (Custom, MCP, Built-in, OpenAPI, Agent Tools)")
    print("Day 3: Sessions & Memory (State management, Memory Bank)")
    print("Day 4: Observability & Evaluation (Logging, Tracing, Metrics)")
    print("Day 5: A2A Protocol & Deployment (Agent Engine ready)")
    print("\n‚ú® All 10 agents working together to generate comprehensive literature reviews!")

    return state

# ============================================================================
# SECTION 23: ADDITIONAL UTILITIES
# ============================================================================

def export_state_to_json(state: LiteratureReviewState, filename: str):
    """Export complete state to JSON for analysis or archival."""

    # Convert state to serializable dict
    state_dict = {
        "run_id": state.run_id,
        "user_id": state.user_id,
        "topic": state.topic,
        "expanded_topic": state.expanded_topic,
        "keywords": state.keywords,
        "subdomains": state.subdomains,
        "papers": {
            pid: {
                "title": p.title,
                "authors": p.authors,
                "year": p.year,
                "journal": p.journal,
                "abstract": p.abstract,
                "theme_id": p.theme_id
            }
            for pid, p in state.papers.items()
        },
        "themes": [
            {
                "theme_id": t.theme_id,
                "label": t.label,
                "description": t.description,
                "paper_count": len(t.paper_ids)
            }
            for t in state.themes
        ],
        "research_gaps": [
            {
                "type": g.gap_type,
                "description": g.description,
                "questions": g.suggested_questions
            }
            for g in state.research_gaps
        ],
        "metrics": state.metrics,
        "workflow_status": state.workflow_status
    }

    with open(filename, 'w') as f:
        json.dump(state_dict, f, indent=2)

    logger.info(f"‚úÖ State exported to {filename}")

def load_state_from_json(filename: str) -> Dict:
    """Load previously saved state from JSON."""

    with open(filename, 'r') as f:
        state_dict = json.load(f)

    logger.info(f"‚úÖ State loaded from {filename}")
    return state_dict

# ============================================================================
# SECTION 24: PDF EXPORT FUNCTIONALITY (NEW FEATURE)
# ============================================================================

"""
PDF Export Module
-----------------
This module provides functionality to export the complete Literature Review
Generation Summary to a professional PDF document.

Features:
- Executive summary with key statistics
- Complete literature review text
- Thematic analysis with cluster details
- Research gaps and future directions
- Quality evaluation metrics
- References and bibliography
- Visual elements (tables, charts)
"""

try:
    from reportlab.lib import colors
    from reportlab.lib.pagesizes import letter, A4
    from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
    from reportlab.lib.units import inch
    from reportlab.platypus import (
        SimpleDocTemplate, Paragraph, Spacer, PageBreak,
        Table, TableStyle, Image, KeepTogether
    )
    from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY
    REPORTLAB_AVAILABLE = True
    print("‚úÖ ReportLab available for PDF generation")
except ImportError:
    REPORTLAB_AVAILABLE = False
    print("‚ö†Ô∏è ReportLab not installed. PDF export will use fallback method.")
    print("   To enable full PDF features, run: pip install reportlab")

def create_pdf_report(state: LiteratureReviewState, filename: str = None) -> str:
    """
    Create a comprehensive PDF report of the literature review.

    This function generates a professional PDF document containing:
    - Executive Summary
    - Complete literature review text
    - Thematic analysis with detailed cluster information
    - Synthesized findings and patterns
    - Critical evaluation of research
    - Research gaps and future directions
    - Quality metrics and evaluation
    - Complete bibliography

    Args:
        state: LiteratureReviewState object with all review data
        filename: Output PDF filename (default: auto-generated)

    Returns:
        str: Path to the generated PDF file
    """

    if not REPORTLAB_AVAILABLE:
        return create_pdf_report_fallback(state, filename)

    # Generate filename if not provided
    if not filename:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"literature_review_{state.run_id[:8]}_{timestamp}.pdf"

    logger.info(f"üìÑ Generating PDF report: {filename}")

    # Create PDF document
    doc = SimpleDocTemplate(
        filename,
        pagesize=letter,
        rightMargin=0.75*inch,
        leftMargin=0.75*inch,
        topMargin=1*inch,
        bottomMargin=0.75*inch
    )

    # Container for PDF elements
    story = []

    # Define custom styles
    styles = getSampleStyleSheet()

    # Title style
    title_style = ParagraphStyle(
        'CustomTitle',
        parent=styles['Heading1'],
        fontSize=24,
        textColor=colors.HexColor('#1a73e8'),
        spaceAfter=30,
        alignment=TA_CENTER,
        fontName='Helvetica-Bold'
    )

    # Heading styles
    heading1_style = ParagraphStyle(
        'CustomHeading1',
        parent=styles['Heading1'],
        fontSize=16,
        textColor=colors.HexColor('#202124'),
        spaceAfter=12,
        spaceBefore=12,
        fontName='Helvetica-Bold'
    )

    heading2_style = ParagraphStyle(
        'CustomHeading2',
        parent=styles['Heading2'],
        fontSize=14,
        textColor=colors.HexColor('#5f6368'),
        spaceAfter=10,
        spaceBefore=10,
        fontName='Helvetica-Bold'
    )

    # Body text style
    body_style = ParagraphStyle(
        'CustomBody',
        parent=styles['BodyText'],
        fontSize=11,
        leading=16,
        alignment=TA_JUSTIFY,
        spaceAfter=10
    )

    # Citation style
    citation_style = ParagraphStyle(
        'Citation',
        parent=styles['BodyText'],
        fontSize=10,
        leading=14,
        leftIndent=0.25*inch,
        spaceAfter=8
    )

    # ========================================================================
    # TITLE PAGE
    # ========================================================================
    story.append(Spacer(1, 0.5*inch))
    story.append(Paragraph("Literature Review", title_style))
    story.append(Spacer(1, 0.2*inch))

    # Topic
    topic_style = ParagraphStyle(
        'Topic',
        parent=styles['Heading2'],
        fontSize=16,
        alignment=TA_CENTER,
        textColor=colors.HexColor('#5f6368')
    )
    story.append(Paragraph(state.topic, topic_style))
    story.append(Spacer(1, 0.5*inch))

    # Metadata table
    metadata_data = [
        ['Generated', datetime.now().strftime("%B %d, %Y at %H:%M")],
        ['Run ID', state.run_id[:16]],
        ['User ID', state.user_id],
        ['Papers Analyzed', str(len(state.papers))],
        ['Themes Identified', str(len(state.themes))],
        ['Research Gaps Found', str(len(state.research_gaps))]
    ]

    if 'evaluation' in state.metrics:
        eval_data = state.metrics['evaluation']
        metadata_data.append(['Quality Score', f"{eval_data['overall']:.2f} (Grade: {eval_data['grade']})"])

    metadata_table = Table(metadata_data, colWidths=[2*inch, 4*inch])
    metadata_table.setStyle(TableStyle([
        ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
        ('FONTNAME', (0, 0), (0, -1), 'Helvetica-Bold'),
        ('FONTNAME', (1, 0), (1, -1), 'Helvetica'),
        ('FONTSIZE', (0, 0), (-1, -1), 10),
        ('TEXTCOLOR', (0, 0), (0, -1), colors.HexColor('#5f6368')),
        ('TEXTCOLOR', (1, 0), (1, -1), colors.HexColor('#202124')),
        ('GRID', (0, 0), (-1, -1), 0.5, colors.HexColor('#e0e0e0')),
        ('ROWBACKGROUNDS', (0, 0), (-1, -1), [colors.white, colors.HexColor('#f5f5f5')])
    ]))
    story.append(metadata_table)
    story.append(PageBreak())

    # ========================================================================
    # EXECUTIVE SUMMARY
    # ========================================================================
    story.append(Paragraph("Executive Summary", heading1_style))
    story.append(Spacer(1, 0.1*inch))

    # Generate executive summary text
    exec_summary = f"""
    This comprehensive literature review examines <b>{state.topic}</b> through
    systematic analysis of {len(state.papers)} academic papers. The review identifies
    {len(state.themes)} major thematic areas and reveals {len(state.research_gaps)}
    significant research gaps that present opportunities for future investigation.
    """

    if state.expanded_topic:
        exec_summary += f"""
        <br/><br/>
        <b>Scope:</b> {state.expanded_topic}
        """

    story.append(Paragraph(exec_summary, body_style))
    story.append(Spacer(1, 0.2*inch))

    # Key findings box
    key_findings = []
    if state.themes:
        key_findings.append(f"‚Ä¢ Identified {len(state.themes)} distinct thematic clusters in the literature")
    if state.papers:
        years = [p.year for p in state.papers.values() if p.year]
        if years:
            key_findings.append(f"‚Ä¢ Analyzed papers spanning {min(years)} to {max(years)}")
    if state.research_gaps:
        gap_types = set(g.gap_type for g in state.research_gaps)
        key_findings.append(f"‚Ä¢ Discovered gaps in {', '.join(gap_types)} areas")

    if key_findings:
        story.append(Paragraph("<b>Key Findings:</b>", heading2_style))
        for finding in key_findings:
            story.append(Paragraph(finding, body_style))
        story.append(Spacer(1, 0.2*inch))

    story.append(PageBreak())

    # ========================================================================
    # LITERATURE REVIEW TEXT
    # ========================================================================
    story.append(Paragraph("Complete Literature Review", heading1_style))
    story.append(Spacer(1, 0.1*inch))

    if state.formatted_review:
        # Split review into sections
        review_sections = state.formatted_review.split('\n## ')

        for i, section in enumerate(review_sections):
            if i == 0:
                # First section (before first ##)
                paragraphs = section.split('\n\n')
                for para in paragraphs:
                    if para.strip():
                        # Check if it's a heading (starts with #)
                        if para.strip().startswith('#'):
                            heading_text = para.strip().lstrip('#').strip()
                            story.append(Paragraph(heading_text, heading1_style))
                        else:
                            story.append(Paragraph(para.strip(), body_style))
            else:
                # Subsequent sections
                lines = section.split('\n', 1)
                section_title = lines[0].strip()
                section_content = lines[1] if len(lines) > 1 else ""

                story.append(Paragraph(section_title, heading2_style))

                # Process section content
                paragraphs = section_content.split('\n\n')
                for para in paragraphs:
                    if para.strip():
                        story.append(Paragraph(para.strip(), body_style))

        story.append(Spacer(1, 0.2*inch))

    story.append(PageBreak())

    # ========================================================================
    # THEMATIC ANALYSIS
    # ========================================================================
    story.append(Paragraph("Thematic Analysis and Synthesis", heading1_style))
    story.append(Spacer(1, 0.1*inch))

    story.append(Paragraph(
        """This section presents a detailed analysis of the major themes identified
        through clustering of paper abstracts and content. Each theme represents a
        coherent research area within the broader topic.""",
        body_style
    ))
    story.append(Spacer(1, 0.2*inch))

    if state.themes:
        for i, theme in enumerate(state.themes, 1):
            # Theme header
            story.append(Paragraph(f"Theme {i}: {theme.label}", heading2_style))

            # Theme description
            story.append(Paragraph(theme.description, body_style))

            # Theme statistics table
            theme_stats = [
                ['Papers in Theme', str(len(theme.paper_ids))],
                ['Percentage of Corpus', f"{(len(theme.paper_ids)/len(state.papers)*100):.1f}%"]
            ]

            if theme.common_limitations:
                theme_stats.append(['Common Limitations', ', '.join(theme.common_limitations[:3])])

            if theme.best_practices:
                theme_stats.append(['Best Practices', ', '.join(theme.best_practices[:3])])

            theme_table = Table(theme_stats, colWidths=[2*inch, 4*inch])
            theme_table.setStyle(TableStyle([
                ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
                ('FONTNAME', (0, 0), (0, -1), 'Helvetica-Bold'),
                ('FONTSIZE', (0, 0), (-1, -1), 9),
                ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
                ('BACKGROUND', (0, 0), (0, -1), colors.HexColor('#f5f5f5'))
            ]))
            story.append(theme_table)

            # Sample papers from this theme (first 3)
            if theme.paper_ids:
                story.append(Spacer(1, 0.1*inch))
                story.append(Paragraph("<b>Representative Papers:</b>", body_style))

                for paper_id in theme.paper_ids[:3]:
                    if paper_id in state.papers:
                        paper = state.papers[paper_id]
                        paper_text = f"‚Ä¢ <i>{paper.title}</i> ({paper.year})"
                        story.append(Paragraph(paper_text, citation_style))

            story.append(Spacer(1, 0.2*inch))

        # Cross-theme synthesis
        story.append(Paragraph("Cross-Theme Synthesis", heading2_style))
        synthesis_text = f"""
        Analysis across all {len(state.themes)} themes reveals important patterns
        and connections. The themes demonstrate both complementary relationships
        and areas of divergence, suggesting a field that is both maturing in some
        areas while remaining exploratory in others.
        """
        story.append(Paragraph(synthesis_text, body_style))
        story.append(Spacer(1, 0.2*inch))

    story.append(PageBreak())

    # ========================================================================
    # CRITICAL EVALUATION
    # ========================================================================
    story.append(Paragraph("Critical Evaluation of Research", heading1_style))
    story.append(Spacer(1, 0.1*inch))

    critical_eval = f"""
    <b>Methodological Assessment:</b><br/>
    The reviewed papers employ diverse methodological approaches, ranging from
    empirical studies to theoretical frameworks. This diversity strengthens the
    field but also presents challenges for direct comparison across studies.
    <br/><br/>
    <b>Evidence Quality:</b><br/>
    Papers span from {min(p.year for p in state.papers.values())} to
    {max(p.year for p in state.papers.values())}, providing both historical
    context and current perspectives. Recent papers ({sum(1 for p in state.papers.values() if p.year >= 2023)}
    from 2023+) incorporate latest developments.
    <br/><br/>
    <b>Identified Patterns:</b><br/>
    ‚Ä¢ Clustering analysis revealed {len(state.themes)} distinct research directions<br/>
    ‚Ä¢ Papers cluster around methodological similarities and application domains<br/>
    ‚Ä¢ Evidence of both incremental refinement and paradigm shifts in approaches
    """

    story.append(Paragraph(critical_eval, body_style))
    story.append(Spacer(1, 0.2*inch))

    story.append(PageBreak())

    # ========================================================================
    # RESEARCH GAPS AND FUTURE DIRECTIONS
    # ========================================================================
    story.append(Paragraph("Research Gaps and Future Directions", heading1_style))
    story.append(Spacer(1, 0.1*inch))

    story.append(Paragraph(
        """This section identifies significant gaps in the current literature and
        proposes directions for future research. These gaps represent opportunities
        for meaningful contributions to the field.""",
        body_style
    ))
    story.append(Spacer(1, 0.2*inch))

    if state.research_gaps:
        for i, gap in enumerate(state.research_gaps, 1):
            # Gap header
            gap_title = f"Gap {i}: {gap.gap_type.title()} Gap"
            story.append(Paragraph(gap_title, heading2_style))

            # Description
            story.append(Paragraph(f"<b>Description:</b> {gap.description}", body_style))

            # Evidence
            if gap.evidence:
                story.append(Paragraph("<b>Evidence:</b>", body_style))
                for evidence in gap.evidence[:3]:
                    story.append(Paragraph(f"‚Ä¢ {evidence}", citation_style))

            # Research questions
            if gap.suggested_questions:
                story.append(Paragraph("<b>Suggested Research Questions:</b>", body_style))
                for question in gap.suggested_questions:
                    story.append(Paragraph(f"‚Ä¢ {question}", citation_style))

            story.append(Spacer(1, 0.2*inch))

    story.append(PageBreak())

    # ========================================================================
    # QUALITY METRICS
    # ========================================================================
    if 'evaluation' in state.metrics:
        story.append(Paragraph("Quality Assessment", heading1_style))
        story.append(Spacer(1, 0.1*inch))

        eval_data = state.metrics['evaluation']

        # Overall score
        score_text = f"""
        <b>Overall Quality Score:</b> {eval_data['overall']:.2f} / 1.00
        (Grade: {eval_data['grade']})<br/><br/>
        This literature review has been evaluated across multiple dimensions to
        ensure comprehensive coverage and analytical rigor.
        """
        story.append(Paragraph(score_text, body_style))
        story.append(Spacer(1, 0.2*inch))

        # Breakdown table
        breakdown_data = [['Metric', 'Score', 'Assessment']]
        for metric, score in eval_data['breakdown'].items():
            metric_name = metric.replace('_', ' ').title()
            assessment = 'Excellent' if score >= 0.9 else 'Good' if score >= 0.7 else 'Satisfactory'
            breakdown_data.append([metric_name, f"{score:.2f}", assessment])

        breakdown_table = Table(breakdown_data, colWidths=[2.5*inch, 1*inch, 1.5*inch])
        breakdown_table.setStyle(TableStyle([
            ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
            ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
            ('FONTSIZE', (0, 0), (-1, -1), 10),
            ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
            ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#1a73e8')),
            ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
            ('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, colors.HexColor('#f5f5f5')])
        ]))
        story.append(breakdown_table)
        story.append(Spacer(1, 0.2*inch))

        story.append(PageBreak())

    # ========================================================================
    # REFERENCES
    # ========================================================================
    if state.bibliography:
        story.append(Paragraph("References", heading1_style))
        story.append(Spacer(1, 0.1*inch))

        # Split bibliography into individual references
        references = state.bibliography.split('\n\n')
        for ref in references:
            if ref.strip():
                story.append(Paragraph(ref.strip(), citation_style))

    # ========================================================================
    # APPENDIX: Methodology
    # ========================================================================
    story.append(PageBreak())
    story.append(Paragraph("Appendix: Review Methodology", heading1_style))
    story.append(Spacer(1, 0.1*inch))

    methodology_text = f"""
    <b>Search Strategy:</b><br/>
    Papers were identified through systematic searches across multiple academic
    databases including Google Scholar, arXiv, and Semantic Scholar.
    Search queries were generated based on keyword analysis and domain expertise.
    <br/><br/>
    <b>Selection Criteria:</b><br/>
    ‚Ä¢ Papers published between {min(p.year for p in state.papers.values())}
    and {max(p.year for p in state.papers.values())}<br/>
    ‚Ä¢ Relevance to core topic: {state.topic}<br/>
    ‚Ä¢ Availability of full text for analysis<br/>
    ‚Ä¢ Minimum citation threshold for impact assessment
    <br/><br/>
    <b>Analysis Methods:</b><br/>
    ‚Ä¢ Thematic clustering using k-means algorithm on paper embeddings<br/>
    ‚Ä¢ Automated summarization using large language models<br/>
    ‚Ä¢ Cross-paper comparative analysis<br/>
    ‚Ä¢ Gap identification through systematic content analysis
    <br/><br/>
    <b>Quality Assurance:</b><br/>
    ‚Ä¢ Multi-dimensional evaluation framework<br/>
    ‚Ä¢ Coverage assessment across sources<br/>
    ‚Ä¢ Cluster coherence validation<br/>
    ‚Ä¢ Writing quality metrics
    """

    story.append(Paragraph(methodology_text, body_style))

    # ========================================================================
    # BUILD PDF
    # ========================================================================
    try:
        doc.build(story)
        logger.info(f"‚úÖ PDF report generated successfully: {filename}")
        print(f"\nüìÑ PDF Report Generated!")
        print(f"   File: {filename}")
        print(f"   Size: {os.path.getsize(filename) / 1024:.1f} KB")
        return filename
    except Exception as e:
        logger.error(f"‚ùå Error generating PDF: {e}")
        print(f"\n‚ùå Error generating PDF: {e}")
        print("   Falling back to text-based PDF...")
        return create_pdf_report_fallback(state, filename)

def create_pdf_report_fallback(state: LiteratureReviewState, filename: str = None) -> str:
    """
    Fallback method to create PDF using simple text-to-PDF conversion.
    Used when reportlab is not available.
    """

    if not filename:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"literature_review_{state.run_id[:8]}_{timestamp}.txt"

    logger.info(f"üìÑ Generating text report (fallback): {filename}")

    # Generate comprehensive text report
    with open(filename, 'w', encoding='utf-8') as f:
        # Header
        f.write("="*80 + "\n")
        f.write("LITERATURE REVIEW GENERATION SUMMARY\n")
        f.write("="*80 + "\n\n")

        # Metadata
        f.write(f"Topic: {state.topic}\n")
        f.write(f"Generated: {datetime.now().strftime('%B %d, %Y at %H:%M')}\n")
        f.write(f"Run ID: {state.run_id}\n")
        f.write(f"User ID: {state.user_id}\n\n")

        # Statistics
        f.write("-"*80 + "\n")
        f.write("STATISTICS\n")
        f.write("-"*80 + "\n")
        f.write(f"Papers Analyzed: {len(state.papers)}\n")
        f.write(f"Themes Identified: {len(state.themes)}\n")
        f.write(f"Research Gaps: {len(state.research_gaps)}\n\n")

        # Quality Score
        if 'evaluation' in state.metrics:
            eval_data = state.metrics['evaluation']
            f.write(f"Quality Score: {eval_data['overall']:.2f} (Grade: {eval_data['grade']})\n\n")

        # Complete Review
        f.write("="*80 + "\n")
        f.write("COMPLETE LITERATURE REVIEW\n")
        f.write("="*80 + "\n\n")

        if state.formatted_review:
            f.write(state.formatted_review)
            f.write("\n\n")

        # Themes
        f.write("="*80 + "\n")
        f.write("THEMATIC ANALYSIS\n")
        f.write("="*80 + "\n\n")

        for i, theme in enumerate(state.themes, 1):
            f.write(f"\nTheme {i}: {theme.label}\n")
            f.write("-"*40 + "\n")
            f.write(f"Description: {theme.description}\n")
            f.write(f"Papers: {len(theme.paper_ids)}\n\n")

        # Research Gaps
        f.write("="*80 + "\n")
        f.write("RESEARCH GAPS AND FUTURE DIRECTIONS\n")
        f.write("="*80 + "\n\n")

        for i, gap in enumerate(state.research_gaps, 1):
            f.write(f"\nGap {i}: {gap.gap_type.upper()}\n")
            f.write("-"*40 + "\n")
            f.write(f"Description: {gap.description}\n")
            f.write(f"Evidence: {', '.join(gap.evidence)}\n")
            f.write(f"Questions: {', '.join(gap.suggested_questions)}\n\n")

        # References
        if state.bibliography:
            f.write("="*80 + "\n")
            f.write("REFERENCES\n")
            f.write("="*80 + "\n\n")
            f.write(state.bibliography)
            f.write("\n\n")

    logger.info(f"‚úÖ Text report generated: {filename}")
    print(f"\nüìÑ Text Report Generated!")
    print(f"   File: {filename}")
    print(f"   Note: Install 'reportlab' for full PDF features")

    return filename

print("‚úÖ PDF export functionality added")

def create_test_scenario(scenario_name: str = "basic") -> Dict:
    """
    Create test scenarios for development and testing.

    Scenarios:
    - basic: Simple topic, 10 papers, 3 themes
    - comprehensive: Complex topic, 50 papers, 5 themes
    - minimal: Edge case with very few papers
    """

    scenarios = {
        "basic": {
            "topic": "Neural Networks in Image Recognition",
            "expected_papers": 10,
            "expected_themes": 3,
            "citation_style": "APA"
        },
        "comprehensive": {
            "topic": "Deep Learning Applications in Healthcare: Diagnostics, Treatment, and Patient Care",
            "expected_papers": 50,
            "expected_themes": 5,
            "citation_style": "IEEE"
        },
        "minimal": {
            "topic": "Quantum Computing for Protein Folding",
            "expected_papers": 5,
            "expected_themes": 2,
            "citation_style": "Harvard"
        }
    }

    return scenarios.get(scenario_name, scenarios["basic"])

async def run_tests():
    """Run automated tests on the system."""

    print("\nüß™ Running System Tests...\n")

    system = LiteratureReviewSystem(project_id="test-system")

    # Test 1: Basic functionality
    print("Test 1: Basic Literature Review Generation")
    scenario = create_test_scenario("basic")
    state = await system.generate_review(scenario["topic"])

    assert len(state.papers) > 0, "No papers found"
    assert len(state.themes) > 0, "No themes identified"
    assert state.formatted_review is not None, "No review generated"
    print("‚úÖ Test 1 passed\n")

    # Test 2: Evaluation scores
    print("Test 2: Evaluation System")
    assert 'evaluation' in state.metrics, "No evaluation performed"
    eval_score = state.metrics['evaluation']['overall']
    assert 0.0 <= eval_score <= 1.0, "Invalid evaluation score"
    print(f"‚úÖ Test 2 passed (Score: {eval_score:.2f})\n")

    # Test 3: Observability
    print("Test 3: Observability Metrics")
    assert 'observability' in state.metrics, "No observability data"
    obs_data = state.metrics['observability']
    assert obs_data['total_duration_seconds'] > 0, "Invalid duration"
    print(f"‚úÖ Test 3 passed (Duration: {obs_data['total_duration_seconds']:.1f}s)\n")

    print("üéâ All tests passed!\n")
    return True

# ============================================================================
# EXECUTION
# ============================================================================

if __name__ == "__main__":
    """
    Main execution block.

    Uncomment the desired function to run:
    - main(): Full demonstration
    - run_tests(): Run automated tests
    """

    # For Jupyter/Notebook environments
    import nest_asyncio
    nest_asyncio.apply()

    # Run the main demonstration
    result_state = asyncio.run(main())

    # Optionally run tests
    # test_results = asyncio.run(run_tests())

    # Optionally export results
    # export_state_to_json(result_state, "literature_review_results.json")

    print("\n‚úÖ Notebook execution complete!")
    print("\nTo deploy this system:")
    print("1. Save this notebook as agent.py")
    print("2. Create requirements.txt with dependencies")
    print("3. Run: adk deploy agent_engine --project=YOUR_PROJECT --region=us-central1 .")

print("\n‚úÖ Literature Review Agent System - Complete Implementation Loaded")
print("üìö Ready to generate comprehensive literature reviews!")
print("\nUsage:")
print("  system = LiteratureReviewSystem()")
print("  state = await system.generate_review('Your Research Topic')")
print("  system.print_summary(state)")

üì¶ Installing required packages...
This may take 2-3 minutes on first run.

‚úÖ Detected Google Colab environment

üîß Installing google-adk and dependencies...
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m2.0/2.0 MB[0m [31m21.0 MB/s[0m eta [36m0:00:00[0m
[?25h‚úÖ Installation complete!
üì¶ Installed packages:
   ‚Ä¢ google-adk (Agent Development Kit)
   ‚Ä¢ google-genai (Gemini API)
   ‚Ä¢ scikit-learn (ML algorithms)
   ‚Ä¢ numpy (Numerical computing)
   ‚Ä¢ reportlab (PDF generation)

üéâ READY TO PROCEED
Next step: Run the API key configuration cell below

‚úÖ Running in Google Colab environment
‚úÖ API key loaded from Colab Secrets
üîë Key preview: AIzaSyDcTv...dUDo

üéâ AUTHENTICATION COMPLETE
‚úÖ Google API Key: Configured
‚úÖ Backend: Gemini API (Google AI Studio)

‚úÖ All imports completed successfully
‚úÖ Data structures defined
‚úÖ Custom function tools defined
‚úÖ R

  resumability_config=ResumabilityConfig(is_resumable=True)



üìö LITERATURE REVIEW GENERATION SUMMARY

üéØ Topic: Machine Learning Applications in Healthcare Diagnostics
üÜî Run ID: 7002f13f-7ca9-4e1c-8c2e-0dcff0f8103d
üë§ User ID: demo_user

üìä STATISTICS:
  ‚Ä¢ Papers Found: 10
  ‚Ä¢ Papers Downloaded: 10
  ‚Ä¢ Themes Identified: 3
  ‚Ä¢ Research Gaps: 2

üé® THEMES IDENTIFIED:
  1. Theme 1: Machine Learning Applications in Healthcare Diagnostics Aspect 1
     Papers: 3
  2. Theme 2: Machine Learning Applications in Healthcare Diagnostics Aspect 2
     Papers: 5
  3. Theme 3: Machine Learning Applications in Healthcare Diagnostics Aspect 3
     Papers: 2

üîç RESEARCH GAPS:
  1. [METHODOLOGICAL] Limited exploration of novel methods in Machine Learning Applications in Healthcare Diagnostics
  2. [EMPIRICAL] Lack of large-scale studies in Machine Learning Applications in Healthcare Diagnostics

üìà QUALITY EVALUATION:
  Overall Score: 0.57 (Grade: F)
  Breakdown:
    ‚Ä¢ coverage: 0.35
    ‚Ä¢ coherence: 0.86
    ‚Ä¢ writing_quality: 0

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

‚¨áÔ∏è  Download started!


üìÑ SAMPLE REVIEW EXCERPT:
--------------------------------------------------------------------------------

# Literature Review: Machine Learning Applications in Healthcare Diagnostics

## Introduction

This comprehensive literature review examines the current state of research in Expanded analysis of Machine Learning Applications in Healthcare Diagnostics. 
Based on analysis of 10 papers across 3 major themes, this review 
identifies key trends, methodologies, and research gaps in the field.

## Overview of Key Papers

The reviewed literature spans from 2023 to 2024, 
representing work from leading researchers including Smith, J., Doe, A..

## Thematic Analysis

### Theme 1: Machine Learning Applications in Healthcare Diagnostics Aspect 1
Papers focusing on specific aspect of Machine Learning Applications in Healthcare Diagnostics

Papers in this theme: 3

### Theme 2: Machine Learning Applica...


üé® DETAILED THEME ANALYSIS:

  Theme: Theme 1: Machine 