# Example DeepResearchAgent

In [32]:
import os
import time
from os.path import join, exists
from os import listdir, makedirs
from datetime import datetime
from google import genai
from google.genai import types
import requests
import json
from pydantic import BaseModel, Field
import asyncio
import nest_asyncio 
from crawl4ai import *

# Add this line to allow nested event loops
nest_asyncio.apply()

### Define Google Search API Function

The API key is stored in the environment variable `SERPER_API_KEY`.  
An account can be created [here](https://serper.dev/).


In [None]:
def get_google_search_results(query, num_results=10):
    api_key = os.environ.get("SERPER_API_KEY")

    if not api_key:
        print("SERPER_API_KEY not found in environment variables.")
        return

    url = "https://google.serper.dev/search"
    payload = json.dumps({
    "q": query,
    "num": num_results
    })
    headers = {
    'X-API-KEY': api_key,
    'Content-Type': 'application/json'
    }

    response = requests.request("POST", url, headers=headers, data=payload)

    return response


### Search Webpages

In [None]:
topic = "Test-time compute and test-time training for Large Language Models."
response = get_google_search_results(topic)
# Convert to JSON
json_response = response.json()
print(json_response)

### Crawl Webpages

In [3]:
async def crawl_website_async(url_webpage):
    async with AsyncWebCrawler() as crawler:
        result = await crawler.arun(
            url=url_webpage,
        )
        return result.markdown

def crawl_website(url_webpage):
    return asyncio.run(crawl_website_async(url_webpage))

In [None]:
query_folder = "test-time-compute"
# Create folder if not exists
if not exists(query_folder):
    makedirs(query_folder)

document_data = {}

for result in json_response['organic']:
    title = result['title']
    markdown = crawl_website(result['link'])
    filename = result['title'] + ".md"

    document_data[title] = {
        'topic': topic,
        'link': result['link'],
        'snippet': result['snippet'],
        'date': result['date'],
        'position': result['position'],
        'markdown': markdown,
        'filename': filename
    }

    with open(join(query_folder, filename), "w") as f:
        f.write(markdown)

### Crate an Agent that rates the quality of generated content

In [25]:
# Only run this block for Gemini Developer API
client = genai.Client()

model_name_thinking = "gemini-2.0-flash-thinking-exp-01-21"
model_name = "gemini-2.0-flash-exp"

In [28]:
class DocumentQuality(BaseModel):
    filename: str = Field(description="The name of the file")   
    document_length: int = Field(description="The length of the document (0 to 10). Is the document short or long.", ge=0, le=10)
    relevance: int = Field(description="How relevant is the document with the main topic (0 to 10).", ge=0, le=10)
    document_quality: int = Field(description="Guess the quality of the document (0 to 10).", ge=0, le=10)
    document_age: int = Field(description="The age of the document relative to the current data (0 to 10).", ge=0, le=10)
    additional_observations: str = Field(description="If you noticed something strange about the document. Write it in form of an instruction for another LLM agents.")

def rate_document(document):

    system_instruction = f"""
    You are an professional scientific journalist. 

    You will receive research related documents (markdown format). 
    Your goal is to estimate the relevance and quality of this document (based on a given topic).
    The document will later be used for writing a reasearch report/document.
    If the quality of the text isn't good, this will lead to an overall bad outcome of the report. 

    Topic: {document['topic']}    
    Current Date: {datetime.now().date()}
    Document Date: {document['date']}
    Link: {document['link']}
    """

    markdown_content = markdown

    response = client.models.generate_content(
        model=model_name,
        contents=markdown_content,
        config=types.GenerateContentConfig(
            response_mime_type="application/json",
            response_schema=DocumentQuality,
            system_instruction=system_instruction,
            temperature=0.3,
        ),
    )
    document['response'] = response
    return document

### Process Files (Quality Assessment)

In [31]:
# Create folder if not exists
if exists(query_folder):
    # Get all files in folder
    files = listdir(query_folder)

for title, document in document_data.items():
    print(f"File: {document.filename}")
    document = rate_document(document)
    print(f"Reponse Text: {response.text}")
    document_data[title] = document
    time.sleep(5) # sleep for 5 seconds (rate limit is 10 RPM)

File: Optimizing LLM Test-Time Compute Involves Solving a Meta-RL ....md


INFO:root:AFC is enabled with max remote calls: 10.


Reponse Text: {
"additional_observations": "The document is a blog post from CMU's Machine Learning blog, discussing a novel approach to optimizing LLM test-time compute using meta-RL. It provides a detailed explanation of the problem, the proposed solution, and the challenges involved. The post is well-structured, includes figures, and provides links to relevant resources. It also includes a citation section and social media sharing options.",
  "document_length": 8,
  "document_quality": 9,
  "filename": "test-time-compute/Optimizing LLM Test-Time Compute Involves Solving a Meta-RL ....md",
  "relevance": 10
}
File: Understanding Test-Time Compute: A New Mechanism Allowing AI ....md


INFO:root:AFC is enabled with max remote calls: 10.


Reponse Text: {
  "additional_observations": "The document provides a good overview of test-time compute, explaining its importance and how it works. However, it lacks specific technical details and could benefit from more concrete examples or research citations. The writing style is accessible to a general audience, but might need more depth for a scientific report.",
  "document_length": 7,
  "document_quality": 7,
  "filename": "test-time-compute/Understanding Test-Time Compute: A New Mechanism Allowing AI ....md",
    "relevance": 9
}
File: Train Less, Think More: Advancing LLMs Through Test-Time Compute.md


INFO:root:AFC is enabled with max remote calls: 10.


Reponse Text: {
"additional_observations": "The document is a well-written article explaining test-time compute for LLMs. It provides a good overview of the topic, including relevant research and examples. The use of analogies and clear explanations makes it accessible to a broad audience. The article also includes links to external resources and references, which is good for further exploration. The structure is logical, starting with an introduction to LLMs, moving to the problem of scaling, and then introducing test-time compute as a solution. The author also provides a good summary of the main ideas. The article is well-formatted and easy to read.",
"document_length": 8,
"document_quality": 9,
"filename": "test-time-compute/Train Less, Think More: Advancing LLMs Through Test-Time Compute.md",
"relevance": 9
}
File: Scaling LLM Test Time Compute.md


INFO:root:AFC is enabled with max remote calls: 10.


Reponse Text: {
"additional_observations": "The document is a well-structured blog post that provides a comprehensive overview of test-time compute for Large Language Models (LLMs). It includes clear explanations, relevant examples, and references to key research papers. The author also provides a good summary of the current state of the field and future directions. The document is well-written and easy to understand, making it a valuable resource for anyone interested in this topic. The document is very well structured and easy to follow. The author also provides links to the original papers, which is very helpful for further reading. The document is also very well formatted and easy to read. The author also provides links to the original papers, which is very helpful for further reading. The document is also very well formatted and easy to read.",
  "document_length": 9,
  "document_quality": 9,
  "filename": "test-time-compute/Scaling LLM Test Time Compute.md",
  "relevance": 10
}
F

INFO:root:AFC is enabled with max remote calls: 10.


Reponse Text: {
  "additional_observations": "The document is a blog post discussing test-time training for large language models. It includes technical details, performance metrics, and links to external sources. The writing style is journalistic and aims to inform a general audience about a complex topic.",
  "document_length": 7,
  "document_quality": 7,
  "filename": "test-time-compute/Test Time Training Will Take LLM AI to the Next Level.md",
  "relevance": 9
}
File: Test-Time Compute: The Next Frontier in AI Scaling - IKANGAI.md


INFO:root:AFC is enabled with max remote calls: 10.


Reponse Text: {
  "additional_observations": "The document is a well-structured article discussing test-time compute in AI, including its mechanisms, benefits, and implications. It includes an FAQ section and references, making it a comprehensive resource. The writing is clear and concise, suitable for a scientific audience.",
  "document_length": 8,
  "document_quality": 9,
  "filename": "test-time-compute/Test-Time Compute: The Next Frontier in AI Scaling - IKANGAI.md",
  "relevance": 10
}
File: Scaling LLM Test-Time Compute Optimally can be More Effective ....md


INFO:root:AFC is enabled with max remote calls: 10.


Reponse Text: {
"additional_observations": "The document is a research paper on scaling test-time compute for Large Language Models (LLMs), focusing on improving performance on math reasoning tasks. It introduces a 'compute-optimal' scaling strategy based on question difficulty, comparing different approaches for scaling test-time compute, and analyzing the trade-offs between test-time compute and pre-training compute. The paper is well-structured, with clear sections, figures, and tables. It includes detailed experimental setups, analysis, and results. The authors also discuss limitations and future work.",
"document_length": 8,
"document_quality": 9,
"filename": "test-time-compute/Scaling LLM Test-Time Compute Optimally can be More Effective ....md",
"relevance": 10
}
File: Scaling test-time compute - a Hugging Face Space by ....md


INFO:root:AFC is enabled with max remote calls: 10.


Reponse Text: {
"additional_observations": "The document appears to be a Hugging Face Space page, not a research document. It contains metadata and interactions, not research content. It is not suitable for a research report.",
"document_length": 1,
"document_quality": 1,
"filename": "test-time-compute/Scaling test-time compute - a Hugging Face Space by ....md",
"relevance": 1
}
File: What is Test Time Compute? | CSA.md


INFO:root:AFC is enabled with max remote calls: 10.


Reponse Text: {
  "additional_observations": "The document is a blog post from the Cloud Security Alliance (CSA) website. It provides a detailed explanation of Test Time Compute (TTC) for Large Language Models (LLMs). The content is well-structured, covering key aspects, advanced strategies, implications, and future directions of TTC. It also includes a code example and references. The document is informative and relevant to the topic of test-time compute and test-time training for Large Language Models.",
  "document_length": 7,
  "document_quality": 9,
  "filename": "test-time-compute/What is Test Time Compute? | CSA.md",
  "relevance": 10
}


In [None]:
response = client.models.generate_content(
    model=model_name_thinking, contents="Is it difficult to find large prime numbers? If yes, why?"
)
print(response.text)

In [None]:
system_instruction = """You are an AI assistant designed to produce output that is visually appealing and easily readable in a terminal. When formatting your responses, utilize the syntax of the Python `rich` library. This involves using square brackets to enclose formatting tags.
        Here are some examples of how to apply formatting:

        * **Emphasis:** Instead of "This is important", output "[bold]This is important[/]".
        * **Headers/Titles:** Instead of "Section Title:", output "[bold blue]Section Title:[/]".
        * **Warnings:** Instead of "Warning!", output "[bold red]Warning![/]".
        * **Success Messages:** Instead of "Operation successful.", output "[green]Operation successful.[/]".
        * **Lists:** You can use colors for list items like "[cyan]*[/] Item 1".

        Always use the `rich` library's syntax for formatting terminal output to enhance readability."""

response = client.models.generate_content(
    model=model_name_thinking,
    contents="Show me the proof for the euler identity?",
    config=types.GenerateContentConfig(
        system_instruction=system_instruction,
        temperature=0.3,
    ),
)
print(response.text)

In [None]:
file = client.files.upload(path="a11.text")
response = client.models.generate_content(
    model=model_name, contents=["Summarize this file", file]
)
print(response.text)

In [26]:
def get_current_weather(location: str) -> str:
    """Returns the current weather.

    Args:
      location: The city and state, e.g. San Francisco, CA
    """
    return "sunny"


response = client.models.generate_content(
    model="gemini-2.0-flash-exp",
    contents="What is the weather like in Boston?",
    config=types.GenerateContentConfig(tools=[get_current_weather]),
)

print(response.text)

  Expected `enum` but got `str` with value `'STRING'` - serialized value may not be as expected
  return self.__pydantic_serializer__.to_python(


The weather in Boston is sunny.



In [34]:
class CountryInfo(BaseModel):
    name: str
    population: int
    capital: str
    continent: str
    gdp: int
    official_language: str
    total_area_sq_mi: int


response = client.models.generate_content(
    model="gemini-2.0-flash-exp",
    contents="Give me information for the United States.",
    config=types.GenerateContentConfig(
        response_mime_type="application/json",
        response_schema=CountryInfo,
    ),
)
print(response.text)

{
"capital": "Washington, D.C.",
"continent": "North America",
"gdp": 25460000000000,
"name": "United States",
"official_language": "English",
"population": 331002651,
"total_area_sq_mi": 3796742
}
