In [1]:
%pip install arxiv

Collecting arxiv
  Downloading arxiv-2.1.3-py3-none-any.whl.metadata (6.1 kB)
Collecting feedparser~=6.0.10 (from arxiv)
  Downloading feedparser-6.0.11-py3-none-any.whl.metadata (2.4 kB)
Collecting sgmllib3k (from feedparser~=6.0.10->arxiv)
  Downloading sgmllib3k-1.0.0.tar.gz (5.8 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Downloading arxiv-2.1.3-py3-none-any.whl (11 kB)
Downloading feedparser-6.0.11-py3-none-any.whl (81 kB)
Building wheels for collected packages: sgmllib3k
  Building wheel for sgmllib3k (pyproject.toml) ... [?25ldone
[?25h  Created wheel for sgmllib3k: filename=sgmllib3k-1.0.0-py3-none-any.whl size=6089 sha256=f30466ae84a13ea68b1a868a8f65c1f55f1e2f9299397dc954e7c8b267d55e68
  Stored in directory: /Users/andy/Library/Caches/pip/wheels/3b/25/2a/105d6a15df6914f4d15047691c6c28f9052cc1173e40285d03
Successfully built sgmllib3k
Installing collecte

In [2]:
from autogen_agentchat.agents import AssistantAgent
from autogen_agentchat.conditions import TextMentionTermination
from autogen_agentchat.teams import RoundRobinGroupChat
from autogen_agentchat.ui import Console
from autogen_core.tools import FunctionTool
from autogen_ext.models.openai import AzureOpenAIChatCompletionClient
from dotenv import load_dotenv
import os

load_dotenv()

api_version = os.getenv("AZURE_OPENAI_API_VERSION")
api_key = os.getenv("AZURE_OPENAI_API_KEY")
azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
azure_openai_chat_completion_client = AzureOpenAIChatCompletionClient(
            model="gpt-4o",
            azure_endpoint=azure_endpoint,
            api_version=api_version,
            api_key=api_key,
)


In [3]:
def google_search(query: str, num_results: int = 2, max_chars: int = 500) -> list:  # type: ignore[type-arg]
    import os
    import time

    import requests
    from bs4 import BeautifulSoup
    from dotenv import load_dotenv

    load_dotenv()

    serp_api_key = os.getenv("SERPAPI_KEY")

    if not serp_api_key:
        raise ValueError("SerpAPI key not found in environment variables")

    url = "https://serpapi.com/search"
    params = {
        "engine": "google",
        "q": query,
        "num": num_results,
        "api_key": serp_api_key,
    }

    response = requests.get(url, params=params)

    if response.status_code != 200:
        print(response.json())
        raise Exception(f"Error in API request: {response.status_code}")

    results = response.json().get("organic_results", [])

    def get_page_content(url: str) -> str:
        try:
            response = requests.get(url, timeout=10)
            soup = BeautifulSoup(response.content, "html.parser")
            text = soup.get_text(separator=" ", strip=True)
            words = text.split()
            content = ""
            for word in words:
                if len(content) + len(word) + 1 > max_chars:
                    break
                content += " " + word
            return content.strip()
        except Exception as e:
            print(f"Error fetching {url}: {str(e)}")
            return ""

    enriched_results = []
    for item in results:
        body = get_page_content(item["link"])
        enriched_results.append(
            {"title": item["title"], "link": item["link"], "snippet": item.get("snippet", ""), "body": body}
        )
        time.sleep(1)  # Be respectful to the servers

    print(enriched_results)    

    return enriched_results


In [None]:
def arxiv_search(query: str, max_results: int = 2) -> list:  # type: ignore[type-arg]
    """
    Search Arxiv for papers and return the results including abstracts.
    """
    import arxiv

    client = arxiv.Client()
    search = arxiv.Search(query=query, max_results=max_results, sort_by=arxiv.SortCriterion.Relevance)

    results = []
    for paper in client.results(search):
        results.append(
            {
                "title": paper.title,
                "authors": [author.name for author in paper.authors],
                "published": paper.published.strftime("%Y-%m-%d"),
                "abstract": paper.summary,
                "pdf_url": paper.pdf_url,
            }
        )


    print(results)
    return results

In [5]:
google_search_tool = FunctionTool(
    google_search, description="Search Google for information, returns results with a snippet and body content"
)
arxiv_search_tool = FunctionTool(
    arxiv_search, description="Search Arxiv for papers related to a given topic, including abstracts"
)

In [6]:
google_search_agent = AssistantAgent(
    name="Google_Search_Agent",
    tools=[google_search_tool],
    model_client=azure_openai_chat_completion_client,
    description="An agent that can search Google for information, returns results with a snippet and body content",
    system_message="You are a helpful AI assistant. Solve tasks using your tools.",
)

arxiv_search_agent = AssistantAgent(
    name="Arxiv_Search_Agent",
    tools=[arxiv_search_tool],
    model_client=azure_openai_chat_completion_client,
    description="An agent that can search Arxiv for papers related to a given topic, including abstracts",
    system_message="You are a helpful AI assistant. Solve tasks using your tools. Specifically, you can take into consideration the user's request and craft a search query that is most likely to return relevant academi papers.",
)


report_agent = AssistantAgent(
    name="Report_Agent",
    model_client=azure_openai_chat_completion_client,
    description="Generate a report based on a given topic",
    system_message="You are a helpful assistant. Your task is to synthesize data extracted into a high quality literature review including CORRECT references. You MUST write a final report that is formatted as a literature review with CORRECT references.  Your response should end with the word 'TERMINATE'",
)

In [7]:
termination = TextMentionTermination("TERMINATE")
team = RoundRobinGroupChat(
    participants=[google_search_agent, arxiv_search_agent, report_agent], termination_condition=termination
)

In [8]:
await Console(
    team.run_stream(
        # task="멀티 에이전트 AI 시스템 구축을 위한한 논문 리뷰 리포트 작성",
        task="최신 RAG 기술에 대한 동향 및 논문 리뷰",
    )
)

---------- user ----------
최신 RAG 기술에 대한 동향 및 논문 리뷰
---------- Google_Search_Agent ----------
[FunctionCall(id='call_JAOwPSZHLvT5ves4U6jWSMF6', arguments='{"query": "최신 RAG 기술 동향", "num_results": 2, "max_chars": 500}', name='google_search'), FunctionCall(id='call_K0f4UfIxcsCD6SUxqclfdr68', arguments='{"query": "최신 RAG 논문 리뷰", "num_results": 2, "max_chars": 500}', name='google_search')]


  model_result = await model_client.create(


[{'title': '2024 Year Of The RAG :: RAG가 주목 받는 이유와 미래 동향', 'link': 'https://www.skelterlabs.com/blog/2024-year-of-the-rag', 'snippet': 'RAG의 미래는 개인화와 확장성 그리고 정확성에 초점을 맞추며, 다양한 데이터를 처리하고 실시간으로 반응하는 더 똑똑한 시스템으로 발전할 것으로 전망됩니다. 그러나 ...', 'body': '2024 Year Of The RAG :: RAG가 주목 받는 이유와 미래 동향 Company Gen ai Offerings AI Agent 를 활용한 비즈니스 자동화 기업의 복잡한 업무를 독립적으로 수행하는 AI Agent 모델을 구축합니다. RAG 기반 맞춤형 챗봇 등 애플리케이션 개발 RAG 기술로 할루시네이션을 제어하고, 기업 데이터 기반으로 동작해 정확도와 효율을 극대화합니다. Private Data 기반, 기업에 꼭 맞는 LLM 모델 구축 모델 학습에 기업 데이터를 활용해 보안성과 성능을 모두 갖춘 기업향 LLM 모델을 구축합니다. 대고객용 노코드 AI 챗봇 빌더 구축 및 운영 지원 코딩 없이 구축 가능한 AI 챗봇으로, 신속한 고객 응대를 통해 비즈니스 민첩성을 강화합니다. AI 컨택센터 구축을 위한 실시간 STT 및 TTS 모델 제공 한국어 특화 음성 인식과 자연스러운 음성 합성 기술로 고객 커뮤니케이션을 지원합니다. USECASE Newsroom Blog'}, {'title': 'RAG의 짧은 역사 훑어보기(첫 논문부터 최근 동향까지)', 'link': 'https://medium.com/rate-labs/rag%EC%9D%98-%EC%A7%A7%EC%9D%80-%EC%97%AD%EC%82%AC-%ED%9B%91%EC%96%B4%EB%B3%B4%EA%B8%B0-%EC%B2%AB-%EB%85%BC%EB%AC%B8%EB%B6%80%ED%84%B0-%EC%B5%9C%EA%B7%BC-%EB%8F%99%ED%96%A5%EA%B

TaskResult(messages=[TextMessage(source='user', models_usage=None, metadata={}, content='최신 RAG 기술에 대한 동향 및 논문 리뷰', type='TextMessage'), ToolCallRequestEvent(source='Google_Search_Agent', models_usage=RequestUsage(prompt_tokens=122, completion_tokens=76), metadata={}, content=[FunctionCall(id='call_JAOwPSZHLvT5ves4U6jWSMF6', arguments='{"query": "최신 RAG 기술 동향", "num_results": 2, "max_chars": 500}', name='google_search'), FunctionCall(id='call_K0f4UfIxcsCD6SUxqclfdr68', arguments='{"query": "최신 RAG 논문 리뷰", "num_results": 2, "max_chars": 500}', name='google_search')], type='ToolCallRequestEvent'), ToolCallExecutionEvent(source='Google_Search_Agent', models_usage=None, metadata={}, content=[FunctionExecutionResult(content="[{'title': '2024 Year Of The RAG :: RAG가 주목 받는 이유와 미래 동향', 'link': 'https://www.skelterlabs.com/blog/2024-year-of-the-rag', 'snippet': 'RAG의 미래는 개인화와 확장성 그리고 정확성에 초점을 맞추며, 다양한 데이터를 처리하고 실시간으로 반응하는 더 똑똑한 시스템으로 발전할 것으로 전망됩니다. 그러나 ...', 'body': '2024 Year Of The RAG :: RAG