In [11]:
from agent_utils import *
from agent_tools import *

config = Config()

# Log to a file with custom timestamp format
logger.add("logs/chain_of_thougth_agent_system.log", format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}")
model = GeminiModel(config.FLASH2_MODEL)

logfire.configure(scrubbing=logfire.ScrubbingOptions(callback=scrubbing_callback))
logfire.instrument_httpx()



Logfire project URL: ]8;id=438078;https://logfire.pydantic.dev/dplaia/deepresearchagent\https://logfire.pydantic.dev/dplaia/deepresearchagent]8;;\


In [17]:
# Initialize the chat handler with your credentials and desired model.
reasoningAgentChat = ChatHandler()
basicSearchAgent = BasicSearchAgent()

class SearchQueryAgentResponse(BaseModel):
    google_search_queries: list[str] | None = Field(description="The extracted google search queries (if available).")
    google_scholar_queries: list[str] | None = Field(description="The extracted google scholar queries (if available).")
    text_summary: str  | None = Field(description="Extract the text summary here (if available).")

searchQueryAgent = BaseAgent(SearchQueryAgentResponse, 
        system_prompt="""Your goal is to extract search queries (e.g., google search, google scholar, etc.) 
        that are mention in a text.""")

class URLRating(BaseModel):
    url_number: int = Field(description="The URL number", ge=0)
    rating: int = Field(description="The rating (value between 0 and 100) for the URL", ge=0, le=100)

class URLRatingAgentResponse(BaseModel):
    url_info: list[URLRating] = Field(description="A list with URLs with corresponding ratings.")

urlRatingAgent = BaseAgent(URLRatingAgentResponse, 
        system_prompt="""Your goal is to extract URL number and the corresponding rating. 
        The input text has the following format: {1, 60}, {2,75}, {3, 35} .... 
        with  
        {URL number, Rating}""")

def get_document():
    documents = {}
    folder_name = 'input_files/'

    # Create directory if it doesn't exist
    if not exists(folder_name):
        makedirs(folder_name, exist_ok=True)

    main_input_file = join(folder_name, "Vorhabenbeschreibung_NeuroTrust.txt")

    if not exists(main_input_file):
        print("file does not exists")
        # Process each file in the input directory
        for filename in listdir(folder_name):
            filepath = join(folder_name, filename)
            
            if not os.path.isfile(filepath):
                continue
                
            try:
                md = MarkItDown()
                result = md.convert(filepath)
                filename = os.path.basename(filepath)
                documents[filename] = result.text_content
            except Exception as e:
                print(f"Error processing {filepath}: {str(e)}")
                continue

        doc = ""
        for filename in documents:
            print(f"Filename: {filename}")
            doc = documents[filename]
            count = word_count(doc)
            print(f"Number of Words in the document: {count}")

            break

        with open(main_input_file, "w", encoding="utf-8") as f:
            f.write(doc)

    else:
        with open(main_input_file, "r", encoding="utf-8") as f:
            doc = f.read()

    return doc

async def get_search_queries_for_document(doc: str):
    query = get_system_prompt("search_query_recommendation")
    query += f"""
    # Input Document: 

    {doc}
    """
    
    text_response = await reasoningAgentChat(query)
    queries = await searchQueryAgent(f"Please extract all search queries from this text: {text_response}")

    return queries.data

async def get_search_query_help(query: str):

    query += f"""
    We have to improve the search results given a user search query. 
    Please think of multiple google search queries (plain text) that would increase the quality of the search results, when we combine all the search results.

    # Desired output format:

    Google Search Queries:
    - Search query 1
    - Search query 2
    - etc.

    # User Search Query:
    {query}

    """
    
    text_response = await reasoningAgentChat(query)
    queries = await searchQueryAgent(f"Please extract all search queries from this text: {text_response}")

    return queries.data

async def rate_search_results(content_text: str):
    query = f"""
    Please rate each search result based on relevance (value between 0 and 100).
    Each search result has an URL with an URL number, 
    for example: "Link [143]: https...", where 143 is the URL number.

    Your generated output format should look like this:

    (1, 60), (2,75), (3, 35) ....
    
    with 
    
    (URL number, Rating)  
    
    Here are the search results based on your search query suggestions:
    
    {content_text}
    """
    text_response = await reasoningAgent(query)

    url_info = await urlRatingAgent(f"Please extract the url info in this text: {text_response}")

    return url_info.data

async def search_queries(search_queries: list[str]) -> list[str]:
    results = []
    for query in search_queries:
        print(f"Search query: {query}")
        response = await basicSearchAgent(query)
        results.append(response)
    return results


## Use generic Google/Perplexity Search

In [13]:
user_search_query = "Find FPGA platforms that can run pretrained transformer models up to 100 million parameters (inference only)."
queries = await get_search_query_help(user_search_query)

results = await search_queries(queries.google_search_queries)


INFO:root:AFC is enabled with max remote calls: 10.
/home/dplaia/Projekte/deepresearchagent/.venv/lib/python3.13/site-packages/google/genai/types.py:2769: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  for field_name, field_value in part.dict(


11:20:05.410 agent run prompt=Please extract all search queries from this text: Google Searc... inference toolchain
- FPGA acceleration transformer inference
11:20:05.411   preparing model and tools run_step=1
11:20:05.413   model request
11:20:05.414     POST /v1beta/models/gemini-2.0-flash:generateContent


INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent "HTTP/1.1 200 OK"


11:20:06.565   handle model response


In [None]:
result_text = ""

for k, result in enumerate(results):
    #console_print(result)
    result_text += f"""
    # Result query {k+1}:

    {result}


    """

#console_print(result_text)

In [36]:
response = await reasoningAgentChat(f"""
Please generate a high quality report text based on the search results (output in Markdown format).
Add a reference section with citations at the end.
Format:
# References
[1] http...
[2] etc.

Add the correct citations in the writen report. 

The text should be as relevant to the user search query as possible. 

# User Search Query:
{user_search_query} 

#Search Results (text):
{result_text}
""")

INFO:root:AFC is enabled with max remote calls: 10.
/home/dplaia/Projekte/deepresearchagent/.venv/lib/python3.13/site-packages/google/genai/types.py:2769: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  for field_name, field_value in part.dict(


In [None]:
console_print(response)

## Research PUFs

In [None]:
doc = get_document()
if not (queries := load_data("search_query_suggestions")):
    queries = await get_search_queries_for_document(doc)
    save_data(queries, "search_query_suggestions")

google_search_queries = queries.google_search_queries
google_scholar_queries = queries.google_scholar_queries
document_summary = queries.text_summary
console_print(document_summary)

In [None]:
if not (search_results := load_data("google_search_results")):
    search_results = {}

    for search_query in google_search_queries:
        print(search_query)
        results = await google_general_search_async(search_query)
        search_results[search_query] = results
    
    save_data(search_results, "google_search_results")

if not (scholar_results := load_data("google_scholar_results"))
    scholar_results = {}

    for search_query in google_scholar_queries:
        print(search_query)
        results = await google_scholar_search_async(search_query)
        scholar_results[search_query] = results
    
    save_data(scholar_results, "google_scholar_results")

In [60]:
content = []
content.append("Google Search Results:")
link_counter = 1

links = {}

for query in search_results:
    result = search_results[query]

    content.append(f"\nFor search query: '{result['searchParameters']['q']}'.")

    if 'answerBox' in result:
        content.append(f"\nAnswerBox Text: {result['answerBox']['snippet']}")
    content.append("\nOrganic results:")

    for organic in result['organic']:
        #rprint(organic)
        content.append(f"Title: {organic['title']}")
        content.append(f"Link [{link_counter}]: {organic['link']}")
        links[link_counter] = organic['link']
        content.append(f"Snippet: {organic['snippet']}")

        if 'date' in organic:
            content.append(f"Date: {organic['date']}")

        if 'attributes' in organic:
            content.append(f"Attributes: {organic['attributes']}")
        
        content.append("\n")
        link_counter+=1


content_text = "\n".join(content)

content2 = []
content2.append("\nGoogle Scholar Results:")

for query in scholar_results:
    result = scholar_results[query] # list
    content2.append(f"\nFor search query: '{query}'.\n")
    for entry in result:

        content2.append(f"Title: {entry['title']}")
        content2.append(f"Link [{link_counter}]: {entry['link']}")
        links[link_counter] = entry['link']
        content2.append(f"Snippet: {entry['snippet']}")
        if 'date' in entry:
            content2.append(f"Date: {entry['date']}")
        if 'attributes' in entry:
            content2.append(f"Attributes: {entry['attributes']}")
        #rprint(entry)
        content2.append("\n")
        link_counter+=1

content_text = "\n".join(content2)
#rprint(content_text)