In [27]:
import os
import time
import markdown2
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet
from autogen_core.tools import FunctionTool
from autogen_core.tools import FunctionTool
from autogen_agentchat.agents import AssistantAgent
from autogen_agentchat.ui import Console

from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from autogen_ext.models.openai import OpenAIChatCompletionClient
import requests
from bs4 import BeautifulSoup

from dotenv import load_dotenv

load_dotenv()

model_client = OpenAIChatCompletionClient(
    model="gemini-1.5-pro",  # Ensure this model supports the required features
    api_key=os.getenv("GEMINI_API_KEY"),
    api_type="google",
    model_info={
        "vision": True,
        "function_calling": True,
        "json_output": True,
        "family": "unknown",
    },
)

def generate_pdf(content: str, filename: str = "output.pdf", output_dir: str = "output") -> str:
    """Generate a PDF file from text content with proper word wrapping and markdown support."""
    
    os.makedirs(output_dir, exist_ok=True)

    # Ensure the filename has a .pdf extension
    if not filename.lower().endswith(".pdf"):
        filename += ".pdf"

    filepath = os.path.join(output_dir, filename)

    # Convert Markdown to HTML
    html_content = markdown2.markdown(content)

    # Create a PDF document
    doc = SimpleDocTemplate(filepath, pagesize=letter)
    styles = getSampleStyleSheet()
    story = []

    # Convert HTML to ReportLab Paragraph
    for paragraph in html_content.split("\n"):
        if paragraph.strip():  # Ignore empty lines
            story.append(Paragraph(paragraph, styles["Normal"]))
            story.append(Spacer(1, 12))  # Add spacing between paragraphs

    # Build the PDF
    doc.build(story)

    return filepath

def google_search(query: str, num_results: int = 2, max_chars: int = 500) -> list:  # type: ignore[type-arg]
     
    """
    Search Google for patents, keywords and return the results.
    """
     
    api_key = os.getenv("GOOGLE_API_KEY")
    search_engine_id = os.getenv("GOOGLE_SEARCH_ENGINE_ID")

    if not api_key or not search_engine_id:
        raise ValueError("API key or Search Engine ID not found in environment variables")

    url = "https://www.googleapis.com/customsearch/v1"
    params = {"key": api_key, "cx": search_engine_id, "q": query, "num": num_results}

    response = requests.get(url, params=params)  # type: ignore[arg-type]

    if response.status_code != 200:
        print(response.json())
        raise Exception(f"Error in API request: {response.status_code}")

    results = response.json().get("items", [])

    def get_page_content(url: str) -> str:
        try:
            response = requests.get(url, timeout=10)
            soup = BeautifulSoup(response.content, "html.parser")
            text = soup.get_text(separator=" ", strip=True)
            words = text.split()
            content = ""
            for word in words:
                if len(content) + len(word) + 1 > max_chars:
                    break
                content += " " + word
            return content.strip()
        except Exception as e:
            print(f"Error fetching {url}: {str(e)}")
            return ""

    enriched_results = []
    for item in results:
        body = get_page_content(item["link"])
        enriched_results.append(
            {"title": item["title"], "link": item["link"], "snippet": item["snippet"], "body": body}
        )
        time.sleep(1)  # Be respectful to the servers

    return enriched_results


def arxiv_search(query: str, max_results: int = 2) -> list:  # type: ignore[type-arg]
    """
    Search Arxiv for papers and return the results including abstracts.
    """
    import arxiv

    client = arxiv.Client()
    search = arxiv.Search(query=query, max_results=max_results, sort_by=arxiv.SortCriterion.Relevance)

    results = []
    for paper in client.results(search):
        results.append(
            {
                "title": paper.title,
                "authors": [author.name for author in paper.authors],
                "published": paper.published.strftime("%Y-%m-%d"),
                "abstract": paper.summary,
                "pdf_url": paper.pdf_url,
            }
        )

    # # Write results to a file
    # with open('arxiv_search_results.json', 'w') as f:
    #     json.dump(results, f, indent=2)

    return results

google_search_tool = FunctionTool(
    google_search,
    name="google_search",
    description="Search Google for information, returning results with snippets and body content."
)

arxiv_search_tool = FunctionTool(
    arxiv_search,
    name="arxiv_search",
    description="Search Arxiv for publications related to a given topic, including abstracts."
)

pdf_generator_tool = FunctionTool(
    generate_pdf,
    name="pdf_generator",
    description="Generate a PDF document from given content and save it in the output folder with markdown support."
)

research_agent = AssistantAgent(
    name="Researcher",
    model_client=model_client,
    description="Research specialist for finding information",
    tools=[google_search_tool, arxiv_search_tool],
    system_message="You are a research specialist. Use the provided tools to find relevant information."
)

report_agent_gemini = AssistantAgent(
    name="Report_Agent",
    model_client=model_client,
    description="Generate a report based on a given topic",
    tools=[pdf_generator_tool, google_search_tool, arxiv_search_tool],
    system_message=(
        "You are a helpful assistant. Your task is to synthesize data into a high-quality "
        "literature review and generate a PDF report."
        "Use the google_search_tool, arxiv_search_tool tools to generate the technical report and "
        "then use the pdf_generator_tool tool to save the final PDF reports."
        "Your response should end with the word 'TERMINATE'."
    )
)

from autogen_agentchat.teams import RoundRobinGroupChat
from autogen_agentchat.conditions import TextMentionTermination

termination_condition = TextMentionTermination("TERMINATE")
team = RoundRobinGroupChat(
    participants=[report_agent_gemini],
    termination_condition=termination_condition
)

await Console(
    team.run_stream(
        task="Write a literature review on 'Advanced features on Earbuds or AirPods within the last 2 years' and save it as a PDF.",
    )
)

---------- user ----------
Write a literature review on 'Advanced features on Earbuds or AirPods within the last 2 years' and save it as a PDF.
---------- Report_Agent ----------
[FunctionCall(id='', arguments='{"query":"Advanced features on Earbuds or AirPods within the last 2 years"}', name='research'), FunctionCall(id='', arguments='{"filename":"Advanced Features on Earbuds or AirPods.pdf","content":{"query":"Advanced features on Earbuds or AirPods within the last 2 years"}}', name='pdf_generator')]
---------- Report_Agent ----------
[FunctionExecutionResult(content="Error: The tool 'research' is not available.", call_id='', is_error=True), FunctionExecutionResult(content="Error: 1 validation error for pdf_generatorargs\ncontent\n  Input should be a valid string [type=string_type, input_value={'query': 'Advanced featu...ithin the last 2 years'}, input_type=dict]\n    For further information visit https://errors.pydantic.dev/2.10/v/string_type", call_id='', is_error=True)]
----------

  model_result = await self._model_client.create(
  model_result = await self._model_client.create(


---------- Researcher ----------
[FunctionCall(id='', arguments='{"query":"Advanced features on Earbuds or AirPods within the last 2 years","num_results":10}', name='google_search'), FunctionCall(id='', arguments='{"query":"Earbuds noise cancellation OR AirPods advanced features","max_results":5}', name='arxiv_search')]
---------- Researcher ----------
[FunctionExecutionResult(content='[{\'title\': \'Mass production of two new AirPods models to start in May ...\', \'link\': \'https://www.reddit.com/r/apple/comments/1bh2wxd/mass_production_of_two_new_airpods_models_to/\', \'snippet\': "Mar 17, 2024 ... Really wish they\'d release the gen 1/2 form factor with some of the new features. ... They\'ve been my bedside earbuds for the past year and a half\\xa0...", \'body\': \'\'}, {\'title\': \'Apple introduces AirPods 4 and a hearing health experience with ...\', \'link\': \'https://www.apple.com/newsroom/2024/09/apple-introduces-airpods-4-and-a-hearing-health-experience-with-airpods-pro-2/\

TaskResult(messages=[TextMessage(source='user', models_usage=None, content="Write a literature review on 'Advanced features on Earbuds or AirPods within the last 2 years' and save it as a PDF.", type='TextMessage'), ToolCallRequestEvent(source='Report_Agent', models_usage=RequestUsage(prompt_tokens=123, completion_tokens=46), content=[FunctionCall(id='', arguments='{"query":"Advanced features on Earbuds or AirPods within the last 2 years"}', name='research'), FunctionCall(id='', arguments='{"filename":"Advanced Features on Earbuds or AirPods.pdf","content":{"query":"Advanced features on Earbuds or AirPods within the last 2 years"}}', name='pdf_generator')], type='ToolCallRequestEvent'), ToolCallExecutionEvent(source='Report_Agent', models_usage=None, content=[FunctionExecutionResult(content="Error: The tool 'research' is not available.", call_id='', is_error=True), FunctionExecutionResult(content="Error: 1 validation error for pdf_generatorargs\ncontent\n  Input should be a valid strin