In [5]:
import requests
from bs4 import BeautifulSoup
from typing import Any, Type
from langchain.chat_models import ChatOpenAI
from langchain.tools import BaseTool
from pydantic import BaseModel, Field
from langchain.agents import initialize_agent, AgentType
from langchain.retrievers import WikipediaRetriever
from langchain.document_loaders import WebBaseLoader

# LLM 설정 (OpenAI API)
llm = ChatOpenAI(
    temperature=0.1,
)

# 파일 저장 함수
def save_the_file(docs):
    with open("research_result.txt", "w", encoding="utf-8") as f:
        f.write(docs)
    return "Research saved to research_result.txt"

# Wikipedia 검색 기능
def search_wikipedia(keyword):
    result = "Wikipedia Results\n\n"
    retriever = WikipediaRetriever(top_k_results=3, lang="ko")
    data_list = retriever.get_relevant_documents(keyword)
    
    for page_content in data_list:
        result += f"{page_content.page_content}\n\n"
    
    return result

# Wikipedia 도구 정의
class WikipediaToolArgsSchema(BaseModel):
    keyword: str = Field(description="Search keyword for Wikipedia")

class WikipediaTool(BaseTool):
    name = "Wikipedia"
    description = """
    A tool to search Wikipedia for a specific keyword. The 'keyword' argument is required.
    """
    args_schema: Type[WikipediaToolArgsSchema] = WikipediaToolArgsSchema

    def _run(self, keyword):
        result = search_wikipedia(keyword)
        return result

# DuckDuckGo 검색 기능 (requests 사용) 및 웹사이트 텍스트 추출
def search_duckduckgo(query):
    url = "https://html.duckduckgo.com/html/"
    params = {'q': query}
    headers = {'User-Agent': 'Mozilla/5.0'}
    
    response = requests.post(url, data=params, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    results = ""
    urls = []
    for a in soup.find_all('a', {'class': 'result__a'}, href=True):
        results += f"{a['href']}\n"
        urls.append(a['href'])  # DuckDuckGo에서 URL을 수집
    
    return results, urls

# 웹사이트에서 텍스트 추출
def extract_website_content(url):
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        # 사이트에서 텍스트만 추출
        paragraphs = soup.find_all('p')
        text_content = "\n".join([para.get_text() for para in paragraphs])
        return text_content
    except Exception as e:
        return f"Failed to extract content from {url}: {e}"

# DuckDuckGo 도구 정의
class DuckDuckGoSearchToolArgsSchema(BaseModel):
    query: str = Field(description="Query for DuckDuckGo search")

class DuckDuckGoSearchTool(BaseTool):
    name = "DuckDuckGoSearchTool"
    description = """
    A tool to search DuckDuckGo for a specific query and return the results.
    """
    args_schema: Type[DuckDuckGoSearchToolArgsSchema] = DuckDuckGoSearchToolArgsSchema

    def _run(self, query):
        results, urls = search_duckduckgo(query)
        if urls:
            # 첫 번째 URL에서 콘텐츠 추출
            content = extract_website_content(urls[0])
            return f"DuckDuckGo Search Results:\n{results}\n\nExtracted Content from {urls[0]}:\n\n{content}"
        else:
            return "No results found in DuckDuckGo"

# 에이전트 초기화
agent = initialize_agent(
    llm=llm,
    verbose=True,
    agent=AgentType.OPENAI_FUNCTIONS,
    handle_parsing_errors=True,
    tools=[
        WikipediaTool(),
        DuckDuckGoSearchTool(),
    ],
)

# 에이전트 실행 함수
def run_research(query):
    # Wikipedia에서 검색
    print("Searching Wikipedia...")
    wikipedia_result = agent.tools[0]._run(query)
    
    # DuckDuckGo에서 검색 및 웹사이트 콘텐츠 추출
    print("Searching DuckDuckGo...")
    duckduckgo_result = agent.tools[1]._run(query)
    
    # 결과를 파일에 저장
    research_content = f"{wikipedia_result}\n\n{duckduckgo_result}"
    save_the_file(research_content)

# 에이전트 실행: "Research about the XZ backdoor"
run_research("XZ backdoor")

Searching Wikipedia...
Searching DuckDuckGo...
