In [6]:
from autogen_core.tools import FunctionTool
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet
from autogen_core.tools import FunctionTool
from autogen_core.tools import FunctionTool
from autogen_agentchat.agents import AssistantAgent
from autogen_agentchat.ui import Console

from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas

import os
import time
import markdown2

import requests
from bs4 import BeautifulSoup
from dotenv import load_dotenv

load_dotenv()

def google_search(query: str, num_results: int = 2, max_chars: int = 500) -> list:  # type: ignore[type-arg]
    
    api_key = os.getenv("GOOGLE_API_KEY")
    search_engine_id = os.getenv("GOOGLE_SEARCH_ENGINE_ID")

    if not api_key or not search_engine_id:
        raise ValueError("API key or Search Engine ID not found in environment variables")

    url = "https://www.googleapis.com/customsearch/v1"
    params = {"key": api_key, "cx": search_engine_id, "q": query, "num": num_results}

    response = requests.get(url, params=params)  # type: ignore[arg-type]

    if response.status_code != 200:
        print(response.json())
        raise Exception(f"Error in API request: {response.status_code}")

    results = response.json().get("items", [])

    def get_page_content(url: str) -> str:
        try:
            response = requests.get(url, timeout=10)
            soup = BeautifulSoup(response.content, "html.parser")
            text = soup.get_text(separator=" ", strip=True)
            words = text.split()
            content = ""
            for word in words:
                if len(content) + len(word) + 1 > max_chars:
                    break
                content += " " + word
            return content.strip()
        except Exception as e:
            print(f"Error fetching {url}: {str(e)}")
            return ""

    enriched_results = []
    for item in results:
        body = get_page_content(item["link"])
        enriched_results.append(
            {"title": item["title"], "link": item["link"], "snippet": item["snippet"], "body": body}
        )
        time.sleep(1)  # Be respectful to the servers

    return enriched_results


def arxiv_search(query: str, max_results: int = 2) -> list:  # type: ignore[type-arg]
    """
    Search Arxiv for papers and return the results including abstracts.
    """
    import arxiv

    client = arxiv.Client()
    search = arxiv.Search(query=query, max_results=max_results, sort_by=arxiv.SortCriterion.Relevance)

    results = []
    for paper in client.results(search):
        results.append(
            {
                "title": paper.title,
                "authors": [author.name for author in paper.authors],
                "published": paper.published.strftime("%Y-%m-%d"),
                "abstract": paper.summary,
                "pdf_url": paper.pdf_url,
            }
        )

    # # Write results to a file
    # with open('arxiv_search_results.json', 'w') as f:
    #     json.dump(results, f, indent=2)

    return results


def generate_pdf(content: str, filename: str = "output.pdf", output_dir: str = "output") -> str:
    """Generate a PDF file from text content with proper word wrapping and markdown support."""
    
    os.makedirs(output_dir, exist_ok=True)

    # Ensure the filename has a .pdf extension
    if not filename.lower().endswith(".pdf"):
        filename += ".pdf"

    filepath = os.path.join(output_dir, filename)

    # Convert Markdown to HTML
    html_content = markdown2.markdown(content)

    # Create a PDF document
    doc = SimpleDocTemplate(filepath, pagesize=letter)
    styles = getSampleStyleSheet()
    story = []

    # Convert HTML to ReportLab Paragraph
    for paragraph in html_content.split("\n"):
        if paragraph.strip():  # Ignore empty lines
            story.append(Paragraph(paragraph, styles["Normal"]))
            story.append(Spacer(1, 12))  # Add spacing between paragraphs

    # Build the PDF
    doc.build(story)

    return filepath

google_search_tool = FunctionTool(
    google_search,
    name="google_search",
    description="Search Google for information, returning results with snippets and body content."
)

arxiv_search_tool = FunctionTool(
    arxiv_search,
    name="arxiv_search",
    description="Search Arxiv for papers related to a given topic, including abstracts."
)

pdf_generator_tool = FunctionTool(
    generate_pdf,
    name="pdf_generator",
    description="Generate a PDF document from given content and save it in the output folder with markdown support."
)

from autogen_ext.models.openai import OpenAIChatCompletionClient
import os

model_client = OpenAIChatCompletionClient(
    model="gemini-1.5-pro",  # Ensure this model supports the required features
    api_key=os.getenv("GEMINI_API_KEY"),
    api_type="google",
    model_info={
        "vision": True,
        "function_calling": True,
        "json_output": True,
        "family": "unknown",
    },
)


from autogen_agentchat.agents import AssistantAgent

google_search_agent = AssistantAgent(
    name="Google_Search_Agent",
    tools=[google_search_tool],
    model_client=model_client,
    description="An agent that searches Google for information, returning results with snippets and body content.",
    system_message="You are a helpful AI assistant. Use the provided tools to retrieve and present information effectively.",
)

arxiv_search_agent = AssistantAgent(
    name="Arxiv_Search_Agent",
    tools=[arxiv_search_tool],
    model_client=model_client,
    description="An agent that searches Arxiv for academic papers related to a given topic, including abstracts.",
    system_message="You are a helpful AI assistant. Utilize the provided tools to find and summarize academic papers relevant to the user's query.",
)

report_agent_gemini = AssistantAgent(
    name="Report_Agent",
    model_client=model_client,
    description="Generate a report based on a given topic",
    tools=[pdf_generator_tool],
    system_message=(
        "You are a helpful assistant. Your task is to synthesize data extracted into a high-quality "
        "literature review including CORRECT references. You MUST write a final report that is formatted "
        "as a literature review with CORRECT references. Use the pdf_generator_tool to save the report as pdf file "
        "Your response should end with the word 'TERMINATE'."
    ),
)


from autogen_agentchat.teams import RoundRobinGroupChat
from autogen_agentchat.conditions import TextMentionTermination

termination_condition = TextMentionTermination("TERMINATE")
team = RoundRobinGroupChat(
    participants=[google_search_agent, arxiv_search_agent, report_agent_gemini],
    termination_condition=termination_condition
)

from autogen_agentchat.ui import Console

await Console(
    team.run_stream(
        task="Write a literature review on the topic 'Advance features on Earbuds or Airpods within last 2 years'. You can search for some relevant keywords on google, you can find the publications in arxiv archives too if required.",
    )
)



---------- user ----------
Write a literature review on the topic 'Advance features on Earbuds or Airpods within last 2 years'. You can search for some relevant keywords on google, you can find the publications in arxiv archives too if required.


Error processing publish message for Google_Search_Agent/3eb19c67-a23b-4ebe-a1cd-a7eaa06523a1
Traceback (most recent call last):
  File "/Users/hissain/git/github/llm/llm_literature/.venv/lib/python3.11/site-packages/autogen_core/_single_threaded_agent_runtime.py", line 505, in _on_message
    return await agent.on_message(
           ^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/hissain/git/github/llm/llm_literature/.venv/lib/python3.11/site-packages/autogen_core/_base_agent.py", line 113, in on_message
    return await self.on_message_impl(message, ctx)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/hissain/git/github/llm/llm_literature/.venv/lib/python3.11/site-packages/autogen_agentchat/teams/_group_chat/_sequential_routed_agent.py", line 48, in on_message_impl
    return await super().on_message_impl(message, ctx)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/hissain/git/github/llm/llm_literature/.venv/lib/python3.11/site-packages/autogen_core/

TaskResult(messages=[TextMessage(source='user', models_usage=None, content="Write a literature review on the topic 'Advance features on Earbuds or Airpods within last 2 years'. You can search for some relevant keywords on google, you can find the publications in arxiv archives too if required.", type='TextMessage')], stop_reason=None)