In [1]:
from google.adk.agents import LlmAgent
from google.adk.models.google_llm import Gemini
from google.adk.runners import InMemoryRunner
from google.genai import types
from google.adk.tools.tool_context import ToolContext
from pathlib import Path

In [2]:
retry_config=types.HttpRetryOptions(
    attempts=5,  # Maximum retry attempts
    exp_base=7,  # Delay multiplier
    initial_delay=1,
    http_status_codes=[429, 500, 503, 504], # Retry on these HTTP errors
)

In [3]:
def read_md_file(tool_context: ToolContext, filepath: str) -> dict:
    """
    Reads the content of a specified text file.

    Args:
        tool_context: The ADK tool context.
        filename: The name of the file to read.

    Returns:
        A dictionary containing the file content.
    """
    file_path = Path(filepath)
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
        return {"status": "success", "content": content}
    except FileNotFoundError:
        return {"status": "error", "message": f"File not found: {filepath}"}
    except Exception as e:
        return {"status": "error", "message": str(e)}

In [4]:
summarizer = LlmAgent(
    name="summarizer",
    model=Gemini(
        model="gemini-2.5-flash-lite",
        retry_options=retry_config
    ),
    instruction="Your primary function is to read files using the provided tool and summarize their content.",
    tools=[read_md_file]
)

In [5]:
runner = InMemoryRunner(agent=summarizer)
response = await runner.run_debug(
    "Read the file from filepath post_content/Ahead of AI/beyond-standard-llms.md and summarize its content"
)


 ### Created new session: debug_session_id

User > Read the file from filepath post_content/Ahead of AI/beyond-standard-llms.md and summarize its content




summarizer > The file discusses various alternative architectures to standard autoregressive transformer Large Language Models (LLMs).

Here's a summary of the key alternatives:

1.  **Linear Attention Hybrids:** These models aim to improve efficiency by using linear attention mechanisms instead of the standard quadratic attention, which can be computationally expensive for long sequences. Examples include Qwen3-Next and Kimi Linear, which combine linear attention (like Gated DeltaNet) with standard attention layers in a hybrid approach. While they offer efficiency gains (reduced FLOPs and KV memory), they may involve added complexity and a slight trade-off in accuracy.

2.  **Text Diffusion Models:** Inspired by diffusion models used in image generation, these models offer a different approach to text generation. Their main advantage is the potential for parallel token generation, which could be more efficient than the sequential generation of autoregressive LLMs. However, they face c