In [1]:
import requests
from bs4 import BeautifulSoup
from typing import Type
from langchain.chat_models import ChatOpenAI
from langchain.tools import BaseTool
from pydantic import BaseModel, Field
from langchain.agents import initialize_agent, AgentType
from langchain.utilities import DuckDuckGoSearchAPIWrapper, WikipediaAPIWrapper

llm = ChatOpenAI(temperature=0.1, model_name="gpt-4o-mini")

class ResearchToolArgsSchema(BaseModel):
    query: str = Field(
        description="The query you will search for. Example query: Research about the XZ backdoor"
    )


class DuckDuckSearchTool(BaseTool):
    name = "DuckDuckSearchTool"
    description = """
    Use this tool to search for the provided query and provide one url from the searching.
    """
    args_schema: Type[
        ResearchToolArgsSchema
    ] = ResearchToolArgsSchema

    def _run(self, query):
        ddg = DuckDuckGoSearchAPIWrapper()
        return ddg.run(query)

class WikiSearchTool(BaseTool):
    name = "WikiSearchTool"
    description = """
    Use this tool to search for the provided query and provide one url from the searching.
    
    """
    args_schema: Type[
        ResearchToolArgsSchema
    ] = ResearchToolArgsSchema

    def _run(self, query):
        wiki = WikipediaAPIWrapper()
        return wiki.run(query)

class URLSearchToolArgsSchema(BaseModel):
    url: str = Field(
        description="The url you will search. Example url: https://www.techrepublic.com/article/xz-backdoor-linux/",
    )

class LoadWebsiteTool(BaseTool):

    name = "LoadWebsiteTool"
    description = """
    Use this tool to extract one website from either Wikipedia or DuckDuckGo.
    """
    args_schema: Type[URLSearchToolArgsSchema] = URLSearchToolArgsSchema

    def _run(self, url):
        print(url)
        try:
            response = requests.get(url)
            soup = BeautifulSoup(response.content, "html.parser")
            response.raise_for_status()  # Raises an error for bad responses
            title_text = soup.find('title').text if soup.find('title') else 'Title tag not found'
            return title_text
        except requests.RequestException as e:
            return f"Failed to load URL: {str(e)}"
        except ValueError:
            return "URL did not return a JSON response."
        
class SaveToTextFileToolArgsSchema(BaseModel):
    text: str = Field(
        description="the extracted content from the website will be saved to .txt file.",
    ) 
class SaveToTextFileTool(BaseTool):
    name = "SaveToTextFileTool"
    description = """
    Use this tool to save the text file.
    """
    args_schema: Type[SaveToTextFileToolArgsSchema] = SaveToTextFileToolArgsSchema

    def _run(self, text):
        file_path = f"MyResearch.txt"
        with open(file_path, 'w') as f:
            f.write(text)
        


agent = initialize_agent(
    llm=llm,
    verbose=True,
    agent=AgentType.OPENAI_FUNCTIONS,
    handle_parsing_errors=True,
    tools=[
            DuckDuckSearchTool(),
            WikiSearchTool(),
            LoadWebsiteTool(),
            SaveToTextFileTool(),
    ],
)

prompt = "손"
agent.invoke(prompt)





[1m> Entering new AgentExecutor chain...[0m


Failed to post https://api.smith.langchain.com/runs in LangSmith API. HTTPError('403 Client Error: Forbidden for url: https://api.smith.langchain.com/runs', '{"detail":"Forbidden"}')


[32;1m[1;3m"손"은 한국어로 "hand"를 의미합니다. 손에 대해 어떤 정보를 원하시나요? 예를 들어, 손의 구조, 손의 기능, 손의 건강 관리 등에 대해 설명할 수 있습니다.[0m

[1m> Finished chain.[0m


{'input': '손',
 'output': '"손"은 한국어로 "hand"를 의미합니다. 손에 대해 어떤 정보를 원하시나요? 예를 들어, 손의 구조, 손의 기능, 손의 건강 관리 등에 대해 설명할 수 있습니다.'}

Failed to patch https://api.smith.langchain.com/runs/10f142c1-c42b-42fa-8095-29ccc1248000 in LangSmith API. HTTPError('403 Client Error: Forbidden for url: https://api.smith.langchain.com/runs/10f142c1-c42b-42fa-8095-29ccc1248000', '{"detail":"Forbidden"}')
