In [16]:
import os
import time
from os.path import join, exists
from os import listdir, makedirs
from datetime import datetime
from google import genai
from google.genai import types
from openai import OpenAI
from openai import AsyncOpenAI
import requests
import json
from pydantic import BaseModel, Field
from crawl4ai import *
from pydantic_ai import Agent, RunContext
from pydantic_ai.models.gemini import GeminiModel
from dataclasses import dataclass
from rich import print as rprint
from rich.console import Console
from rich.markdown import Markdown
from queue import Queue, Empty
import asyncio
import nest_asyncio 
# Add this line to allow nested event loops
nest_asyncio.apply()

from agent_tools import *
from agent_utils import *

from loguru import logger

config = Config()

console = Console()
# Log to a file with custom timestamp format
logger.add("agent_output.log", format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}")
model = GeminiModel(config.FLASH2_MODEL)


In [3]:
class Answer(BaseModel):
   
    answer: str = Field(description="The answer for the question (if available).")
    notes: list[str] = Field(description="Add study notes or pieces of text that helps other assistants to answer the question in the future. The better the quality of the notes, the more likely it is to get better responses in the future.")
    index: int = Field(description="Equals question number or index.")
    rating: int = Field(description="Rate the quality of your response between 0 and 10", ge=0, le=10)

class Question(BaseModel):
    question: str = Field(description="The question that needs to be answered.")
    index: int = Field(description="The question number or index.")

class Deps(BaseModel):
    questions: dict[int, Question] = Field(description="A dict with questions that need to be answered (keys: question number, values: question).")

    def get_all_questions(self) -> dict:
        return self.questions

    def remove_question(self, idx: int) -> bool:
        if idx not in self.questions.keys():
            return False

        del self.questions[idx]
        
        return True

questions_list = [
    "Explain the concept of 'artificial intelligence' in a way that a 10-year-old could understand.", 
    "Compare and contrast the philosophies of Plato and Aristotle, highlighting their key differences and similarities in their views on ethics and knowledge.", 
    "Write a short poem about the feeling of walking through a forest in autumn.",
    "If someone is planning a trip to Italy and enjoys art and history, what are three cities you would recommend they visit and why?",
    "Summarize the main arguments for and against universal basic income."]

question_dict = {}
for (k,q) in enumerate(questions_list):
    question = Question(question=q, index=k)
    question_dict[k] = question

deps = Deps(questions=question_dict)

system_prompt="""
You are a helpful assistant.

- First get all questions (use tools)
- Then, choose only one question
- Finally answer the one question
"""

agent = Agent(
    model,
    deps_type=deps,  
    result_type=Answer,
    system_prompt=system_prompt)

@agent.tool
def get_all_questions(ctx: RunContext[Deps]) -> dict:
    return ctx.deps.get_all_questions()



In [None]:
saved_answered = []

for k in range(5):
    
    result = await agent.run('Get all questions and give an answer to only one question.', deps=deps)
    console.print(Markdown(result.data.answer))
    logger.info(f"LLM Output: {result.data.answer}")

    saved_answered.append(result.data)

    if result.data.rating >= 8:
        # remove question from deps
        deps.remove_question(result.data.index)

    time.sleep(1)

## Google Search Agent

#### Use google search to extract some useful links first

In [17]:
system_prompt = """
You are a search expert that has access to a search tool/function.
Use the search tool multiple times (if necessary) to find relevant links that might be useful for a given user prompt or search query.
You can add as many links to the output list as you like (but not more than 10).
"""

class SearchResponse(BaseModel):
    links: list[str] = Field(description="A list with relevant links (collection of links).")

search_agent = Agent(
    model,
    result_type=SearchResponse,
    system_prompt=system_prompt)


@search_agent.tool_plain
async def google_search(search_query: str) -> dict:
    """Use the Google Search API to find results given a search query."""
    return await google_general_search_async(search_query)

In [None]:
search_query = 'How suited is the H100 GPU from NVIDEA for mixture of experts LLMs/Transformer models?'
result = await search_agent.run(search_query)
rprint(result.data.links)


#### Use web crawling to get the content of the links

In [None]:
page_content_markdown = {}
for link in result.data.links:
    print(f"Link: {link}")
    markdown = await crawl4ai_website_async(link)
    page_content_markdown[link] = markdown


In [39]:
combined_markdown = ""

for (k, link) in enumerate(page_content_markdown.keys()):
    markdown = page_content_markdown[link]
    combined_markdown += f"From link ([{k+1}] {link}):\n\n{markdown}\n\n"


combined_markdown = f""" Here is the search query of the user: 
{search_query}

Here is some content that might be useful to answer the user query:

{combined_markdown}
"""

#### Response Agent or Summary Agent

In [41]:
system_prompt = """
You are an expert at writing professional technical writer (articles, blogs, books, etc.).

After receiving a user query and some files, your goal is to write an report about the user query.
This writen report should be technically detailed but comprehensive for normal readers.

Please use references in the report (e.g. [1]). You can find the link of a given input text above the text with "From link ([1] http ...)".

Always use References at the end of the report.
  
Write the output strictly in Markdown format. 
"""

summary_agent = Agent(
    model,
    result_type=str,
    system_prompt=system_prompt)

In [42]:
result = await summary_agent.run(combined_markdown)

INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent "HTTP/1.1 200 OK"


In [43]:
console.print(Markdown(result.data))