In [None]:
from typing import Any, Type
from langchain.chat_models import ChatOpenAI
from langchain.tools import BaseTool
from pydantic import BaseModel, Field
from langchain.agents import initialize_agent, AgentType
from langchain.retrievers import WikipediaRetriever
from langchain_community.tools import DuckDuckGoSearchResults
from langchain.document_loaders import WebBaseLoader

llm = ChatOpenAI(
    temperature=0.1,
    # model="gpt-4o"
    )

In [None]:
# 데이터 저장용 툴

def save_the_file(docs):
    f = open("./file.txt","w",encoding="utf-8")
    f.write(docs)

# WikipediaRetriever 사용한 tool
def Wikipedia(keyword):
    rs = "Wikipedia Result\n\n"
    retriver = WikipediaRetriever(top_k_results=3, lang="ko")
    data_list = retriver.get_relevant_documents(keyword)
    for page_content in data_list:
        rs += f"{page_content.page_content} \n\n"
    return rs

class WikipediaToolArgsSchema(BaseModel):
    keyword: str = Field(description="'Search Keyword' to Use for Wikipedia Search")

class WikipediaTool(BaseTool):
    name = "Wikipedia"
    description = """
    A tool used by Wikipedia to search for specific keywords. Use 'keyword' as a parameter.
    """
    args_schema: Type[WikipediaToolArgsSchema] = WikipediaToolArgsSchema

    def _run(self, keyword):
        rs = Wikipedia(keyword)
        save_the_file(rs)
        return rs
    

# DuckDuckGoSearchAPIWrapper 사용한 tool

# 해당 함수를 통해 duckduckgo가 찾아낸 사이트의 정보를 추출하려했으나, 매 시도마다 너무 많은양의 문서 데이터로 에러가 발생.
# 이로인해 간단한 데이터와 출처만 저장하는 식으로 지정
def parse_DuckDuckGo(docs):
    data_list = []
    return_data = []
    datas = docs.split("https://")
    data = datas[1].split(")\n")[0].split(",")[0]
    data_list.append(f"https://{data}")
    for url in data_list:
        loader = WebBaseLoader(web_path=url)
        documents = loader.load()
        for page_content in documents:
            return_data.append(page_content.page_content)
    return return_data

class DuckDuckGoSearchToolArgsSchema(BaseModel):
    query: str = Field(
        description="The query you will search for.Example query: WW2"
    )

class DuckDuckGoSearchTool(BaseTool):
    name = "DuckDuckGoSearchTool"
    description = """
    A tool that uses DuckDuckGo to search for specific keywords and return the results to the user.
    """
    args_schema: Type[
        DuckDuckGoSearchToolArgsSchema
    ] = DuckDuckGoSearchToolArgsSchema

    def _run(self, query):
        ddg = DuckDuckGoSearchResults()
        rs = ddg.run(query)
        save_the_file(rs)
        return rs


agent = initialize_agent(
    llm=llm,
    verbose=True,
    agent=AgentType.OPENAI_FUNCTIONS,
    handle_parsing_errors=True,
    tools=[
        WikipediaTool(),
        DuckDuckGoSearchTool()
    ],
)

prompt = "Tell me about WW2 using Wikipedia"

agent.invoke(prompt)

In [None]:
rs="DuckDuckGo\n\n"
ddg = DuckDuckGoSearchResults()
keyword = "ww2"
data_list = ddg.run(f"what is {keyword}")
for page_content in data_list:
    rs += f"{page_content} \n\n"
data_list