In [3]:
import os
import time
from os.path import join, exists
from os import listdir, makedirs
from datetime import datetime
from google import genai
from google.genai import types
from openai import OpenAI
from openai import AsyncOpenAI
import requests
import json
from pydantic import BaseModel, Field
from crawl4ai import *
from pydantic_ai import Agent, RunContext
from pydantic_ai.models.gemini import GeminiModel
from dataclasses import dataclass
from rich import print as rprint
import asyncio
import nest_asyncio 
# Add this line to allow nested event loops
nest_asyncio.apply()

In [1]:
def deepseek_R1_call(user_input: str) -> dict:
    # Initialize DeepSeek client
    deepseek_client = OpenAI(
        api_key=os.getenv("DEEPSEEK_API_KEY"),
        base_url="https://api.deepseek.com"
    )
    deepseek_messages = []
    deepseek_messages.append({"role": "user", "content": user_input})

    DEEPSEEK_MODEL = "deepseek-reasoner"

    response = deepseek_client.chat.completions.create(
                model=DEEPSEEK_MODEL,
                #max_tokens=1,
                messages=deepseek_messages,
                stream=True
            )

    reasoning_content = ""
    final_content = ""

    for chunk in response:
        if chunk.choices[0].delta.reasoning_content:
            reasoning_piece = chunk.choices[0].delta.reasoning_content
            reasoning_content += reasoning_piece
        elif chunk.choices[0].delta.content:
            final_content += chunk.choices[0].delta.content

    response = {
        "reasoning_content": reasoning_content,
        "final_content": final_content
    }
    return response        

class ReasoningModelQuery(BaseModel):
    instruction: str = Field(description="The instruction you want give the reasoning model.")
    motivation: str = Field(description="The main reason why you ask the reasoning model.")
    additional_context: str = Field(description="Add the full context here (e.g., crawled website/markdown text).")

def ask_reasoning_agent(query: ReasoningModelQuery) -> dict:
    """
    Ask a reasoning agent to help you solve problems.

    Args:
        user_input (ReasoningModelQuery): Contains information about the motivation for the request, an instruction to follow and the additional context (e.g., document, article, PDF, crawled website etc.) 

    Returns:
        dict: The answer of the AI model as a string. Containing the reasoning thoughts and the final response.
    """

    str_query = f"""
    You are working together with other LLM-based agents to answer user questions.
    Agents will ask you questions to help them making decisions.
    You are a reasoning model and know how to resolve problems.
    Take your time thinking about the query of the agent.

    Motivation of the agent:
    {query.motivation}

    Agent Instruction:
    {query.instruction}

    Additional Context:
    {query.additional_context}

    """
    return deepseek_R1_call(str_query)

In [6]:
class OpenQuestion(BaseModel):
    question_number: int = Field(description="The question number. Each number has a unique number starting from zero.", ge=0)
    question: str = Field(description="A question that needs to be answered")
    notes: list[str] = Field(description="A list of notes/information related to the question that help to answer the question as good as possible.")
    rating: int = Field(description="The quality of the temporary response (0 to 10)", ge=0, le=10)

class QuestionQueue(BaseModel):
    list_of_questions: list[OpenQuestion] = Field(description="A list of questions that need to be answered")

question_queue = QuestionQueue(queue=[])

def get_open_questions() -> QuestionQueue:
    """
    Returns the current question queue.
    
    The question queue contains a list of OpenQuestion objects that need to be answered.
    Each OpenQuestion object contains a question string, a list of notes that may help to answer the question and a rating.
    The rating is the quality of the temporary response (0 to 10) to the question.
    
    The question queue is a shared object between all agents and is used to keep track of the progress of the question answering process.
    """
    global question_queue

    try: 
        # Make sure that each question has correct unique question number
        for k in range(len(question_queue.list_of_questions)):
            question_queue.list_of_questions[k].question_number = k
    except:
        pass

    return question_queue

class UpdateRequestQueue(BaseModel):
    question_number: int = Field(description="The question number. Each number has a unique number starting from zero.", ge=0)
    notes: list[str] = Field(description="A list of notes/information related to the question that help to answer the question as good as possible.")
    rating: int = Field(description="The quality of the temporary response (0 to 10)", ge=0, le=10)

def update_open_question_in_queue(update_request: UpdateRequestQueue):
    
    global question_queue
    question_number = getattr(update_request, 'question_number', None)

    if question_number & question_queue:
        if (question_number < len(question_queue.list_of_questions)):

            if getattr(update_request, 'notes', None)
                question_queue.list_of_questions[question_number].notes = update_request.notes
            
            if getattr(update_request, 'rating', None)
                question_queue.list_of_questions[question_number].rating = update_request.rating

    

In [3]:
from WebSearchAgent import *
system_prompt = 
    """
    You are a web search expert and your goal is to answer a list of user question using external search tools.
    You don't have to solve all questions, just try to find the best answer using only the tools available to you.

    You belong to a group of experts that try to answer all questions in a question queue. 
    At the beginning, there might be many questions that are too complex for you to handle.
    Collect information usesing the available search tools an save important notes using the save_notes function/tool.
    You will then also give a rating on how complete the answer for each question is, so that the next agent knows which questions to answer next.

    Our goal is to do deep research. This means, we don't need quick responses. It's okay, if it takes more than 15 min. to answer all questions in the queue. 
    You can ask the a reasoning agent to give you feedback about what to search/which tools to use etc., just add all information needed to help you. 

    How to answer the question in the question queue:
        - Pick a question that is currently not answered.
        - Focus only on the most important question and only one question.
        - Don't asker more than one question.
        - Don't answer the question directly, even if you think you know the answer.
        - Use the search tools (at least one) to help you collect information.
        - If the questions seems to be resolved, there is no need to use more search tools.

    Information about the search tools:
        - Sometimes many tools are available to you, sometimes maybe only one.
        - They only provide an overview of search result (except perplexity search, if tool is available).
        - The search usually contain web URLs.
        - The crawl_website_async tool be used to convert a website or a PDF in markdown format.
        - You can use the reason model to help you decide which url should be processed next.
        - You should use each search tool max. once (not more) one question.
        - The search tools are cheap to use (don't worry using them).
    
    Inforrmation about the web crawler:
        - The tool needs a web url that leads to a website or a PDF document (e.g., ArXiv paper).
        - The tool will return a string representing the website or PDF in markdown format.
        - Use it to get more information for a give search result.

    Information about the reasoning agent:
        - This agent is available to you in form as a tool/function call.
        - Use this agent to help you make decisions
        - The context window of this agent is large (arround 64k).
        - Therefore, you can append the crawled markdown text, when asking questions.
        - The agent is cheap to use: 
            $0.14 / million input tokens (cache hit)
            $0.55 / million input tokens (cache miss)
            $2.19 / million output tokens
        - Don't hesitate using this model to help you with everything.
        - You can ask the reasoning agent multiple times in a row.
        - The reasoning model returns the reasoning thoughts  and the final response to your query.
        - Don't dismiss the reasoning thoughts, they can be very useful.
    
    Information about the question queue:
        - Every agent in the loop or chain has access to the question queue (data structure/database).
        - You can get all information about the question queue by using the tool 'get_open_questions'.
        - You should use this function only once and decide which question to answer next (most relevant question first).

    Here is an example how you can process the questions:
        - Start by calling the 'get_open_questions' function to get all open questions.
        - Selected one question that seems to be most relevant (use reasoning model if you're not sure).
        - Based on the nature of the question select the appropriate search tool (can can use multiple in a row) to get an overview.
        - After getting an overview of the search results, consult the reasoning agent, which search results seems most promissing to get good results.
        - Use the crawl tool function to get a markdown response from the URL link.
        - Use the reasoning model again with the additional context to collect notes/information or get a summary of the given context.
        - If you have collected all information/notes for a question, update the question queue by calling the function: 'update_open_question_in_queue'

    """

tools_list = [ask_reasoning_agent, get_open_questions, update_open_question_in_queue, google_general_search, google_scholar_search, papers_with_code_search, crawl_website_async] #perplexity_search,

# Create agent with selected tools
agent = Agent(
    model,
    result_type=Response,
    system_prompt=system_prompt,
    tools=tools_list
)

#result = await agent.run('abc')