<!-- 새로운 Jupyter notebook에서 리서치 AI 에이전트를 만들고 커스텀 도구를 부여합니다.
에이전트는 다음 작업을 수행할 수 있어야 합니다:
Wikipedia에서 검색
DuckDuckGo에서 검색
웹사이트의 텍스트를 스크랩하고 추출합니다.
리서치 결과를 .txt 파일에 저장하기
다음 쿼리로 에이전트를 실행합니다: "Research about the XZ backdoor" 라는 쿼리로 에이전트를 실행하면, 에이전트는 Wikipedia 또는 DuckDuckGo에서 검색을 시도하고, DuckDuckGo에서 웹사이트를 찾으면 해당 웹사이트에 들어가서 콘텐츠를 추출한 다음 .txt 파일에 조사 내용을 저장하는 것으로 완료해야 합니다. 
https://nomadcoders.co/c/gpt-challenge/lobby
-->

<!-- 새로운 Jupyter notebook에서 리서치 AI 에이전트를 만들고 커스텀 도구를 부여합니다.
에이전트는 다음 작업을 수행할 수 있어야 합니다:
Wikipedia에서 검색
DuckDuckGo에서 검색
웹사이트의 텍스트를 스크랩하고 추출합니다.
리서치 결과를 .txt 파일에 저장하기
다음 쿼리로 에이전트를 실행합니다: "Research about the XZ backdoor" 라는 쿼리로 에이전트를 실행하면, 에이전트는 Wikipedia 또는 DuckDuckGo에서 검색을 시도하고, DuckDuckGo에서 웹사이트를 찾으면 해당 웹사이트에 들어가서 콘텐츠를 추출한 다음 .txt 파일에 조사 내용을 저장하는 것으로 완료해야 합니다. -->

In [23]:
from langchain.utilities import WikipediaAPIWrapper, DuckDuckGoSearchAPIWrapper
from langchain.tools import StructuredTool, Tool, BaseTool
import wikipedia
from langchain.chat_models import ChatOpenAI
from langchain.agents import initialize_agent, AgentType
from pydantic import BaseModel, Field
from typing import Any, Type

# wikipedia.set_lang("en")
# page = wikipedia.page("java 8")
# full_content = page.content
# print(full_content)

llm = ChatOpenAI(
    temperature=0.1
)
class SearchDataFromWebSiteSchema(BaseModel):
    query: str = Field(
        description="The query you will search for.Example query: Research about the XZ backdoor"
    )

class SearchDataFromWebSiteTool(BaseTool):
    name="SearchDataFromWebSiteTool"
    description=""" 
        Use both Wikipedia and DuckDuckGoSearch tools to gather information. Provide a query as an argument, and include the retrieved content from both tools exactly as found, without any modifications or summarization.
    """
    args_schema: Type[
        SearchDataFromWebSiteSchema
    ] = SearchDataFromWebSiteSchema
    def _run(self, query):
        ddg = DuckDuckGoSearchAPIWrapper()
        ddg_result = ddg.run(query)
        wkp = WikipediaAPIWrapper()
        wkp_result = wkp.run(query)
        title = "_".join(query.split(" "))
        return f"Title:{title} \n\nDuckDuckGoSearch Result : {ddg_result}\n\n Wikipedia Result: {wkp_result}"

class SummarizeSearchResultsSchema(BaseModel):
    content: str = Field(
        description="Results of Searching. Example: Title: ... \n\n DuckDuckGoSearch Result : .... \n\n Wikipedia Result: ...",
    )

class SummarizeSearchResultsTool(BaseTool):
    name="SummarizeSearchResultsTool"
    description=""" 
       Combine results from DuckDuckGoSearch and Wikipedia into a one article. 
       Ensure the article has both results meanings. do not delete any of meaningful content. 
       mix distinct and important information from both sources without redundant repetition. 
       blending them to form a unified and comprehensive explanation.
       And Leave Title the way they are. don't change.

       Example
       title : XZ_backdoor
       result : The XZ backdoor incident ...
    """
    args_schema: Type[SummarizeSearchResultsSchema] = SummarizeSearchResultsSchema
    def _run(self, content):
        response = llm.invoke(content)
        print(response)
        return response.content

class SaveToTxtToolSchema(BaseModel):
    title: str = Field(description="the text file name. Example: XZ_backdoor")
    result: str = Field(description="the search result content. Example: The XZ backdoor incident ...")

class SaveToTxtTool(BaseTool):
    name="SaveToTxtTool"
    description=""" 
        saving result data exactly to text file
    """
    args_schema: Type[SaveToTxtToolSchema] = SaveToTxtToolSchema
    def _run(self, title, result):
        try:
            with open(f"{title}.txt", "w") as file:
                file.write(result)
            return f"Text saved successfully to {title}.txt."
        except Exception as e:
            return f"An error occurred: {str(e)}"

agent = initialize_agent(
    llm=llm,
    verbose=True,
    agent=AgentType.OPENAI_FUNCTIONS,
    handle_parsing_error=True,
    tools=[
        SearchDataFromWebSiteTool(),
        SummarizeSearchResultsTool(),
        SaveToTxtTool()
    ]
)

prompt = "Research about the XZ backdoor"

agent.invoke(prompt)





[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `SearchDataFromWebSiteTool` with `{'query': 'XZ backdoor'}`


[0m[36;1m[1;3mTitle:XZ_backdoor 

DuckDuckGoSearch Result : What does the backdoor do? Malicious code added to xz Utils versions 5.6.0 and 5.6.1 modified the way the software functions. The backdoor manipulated sshd, the executable file used to make remote ... The xz-utils package, used by many Linux distributions, had a backdoor that could compromise sshd authentication. Learn how to check and update your Kali installation to avoid this issue. Now that the XZ backdoor has control over sshd, attackers can possess the encryption key used to make the SSH connections hide their malicious code in the SSH login certificate and use it as an entry point to execute various malicious actions on the compromised device - such as stealing files or installing malware (e.g., ransomware ... CVE-2024-3094 is a vulnerability discovered in the open-source library XZ Ut

{'input': 'Research about the XZ backdoor',
 'output': 'I have researched information about the XZ backdoor. The XZ backdoor is a malicious code added to the XZ Utils versions 5.6.0 and 5.6.1, compromising sshd authentication and enabling remote code execution on affected Linux systems. Backdoors are covert methods of bypassing normal authentication or encryption in a computer system. It is crucial to be aware of such vulnerabilities and take steps to protect systems. \n\nI have saved the detailed information in a text file named "XZ_backdoor.txt".'}