In [8]:
from langchain.schema import SystemMessage
from langchain.chat_models import ChatOpenAI
from langchain.tools import BaseTool
from langchain.tools import WikipediaQueryRun, DuckDuckGoSearchRun
from langchain.utilities import WikipediaAPIWrapper
from langchain.document_loaders import WebBaseLoader
from typing import Type
from pydantic import BaseModel, Field
from langchain.agents import initialize_agent, AgentType
import os


class WikipediaSearchToolArgsSchema(BaseModel):
    query: str = Field(description="Search query for Wikipedia.")


class WikipediaSearchTool(BaseTool):
    name: Type[str] = "WikipediaSearchTool"
    description: Type[str] = "Searches Wikipedia for a given query."
    args_schema: Type[WikipediaSearchToolArgsSchema] = (
        WikipediaSearchToolArgsSchema
    )

    def _run(self, query):
        wiki = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
        return wiki.run(query)


class DuckDuckGoSearchToolArgsSchema(BaseModel):
    query: str = Field(description="Search query for DuckDuckGo.")


class DuckDuckGoSearchTool(BaseTool):
    name: Type[str] = "DuckDuckGoSearchTool"
    description: Type[str] = "Searches DuckDuckGo for a given query."
    args_schema: Type[DuckDuckGoSearchToolArgsSchema] = (
        DuckDuckGoSearchToolArgsSchema
    )

    def _run(self, query):
        ddg = DuckDuckGoSearchRun()
        return ddg.run(query)


class WebContentExtractorArgsSchema(BaseModel):
    url: str = Field(description="URL of the website to extract content from.")


class WebContentExtractorTool(BaseTool):
    name: Type[str] = "WebContentExtractorTool"
    description: Type[str] = "Extracts text content from a given website URL."
    args_schema: Type[WebContentExtractorArgsSchema] = (
        WebContentExtractorArgsSchema
    )

    def _run(self, url):
        loader = WebBaseLoader(url)
        documents = loader.load()
        return "\n".join([doc.page_content for doc in documents])


class SaveToFileArgsSchema(BaseModel):
    content: str = Field(description="Content to save to a file.")
    filename: str = Field(description="Filename to save the content to.")


class SaveToFileTool(BaseTool):
    name: Type[str] = "SaveToFileTool"
    description: Type[str] = "Saves content to a .txt file."
    args_schema: Type[SaveToFileArgsSchema] = SaveToFileArgsSchema

    def _run(self, content, filename="output.txt"):
        target_dir = "output_files"
        os.makedirs(target_dir, exist_ok=True)
        save_path = os.path.join(target_dir, filename)
        with open(save_path, "w", encoding="utf-8") as file:
            file.write(content)
        return f"Content saved to {save_path}"


llm = ChatOpenAI(temperature=0.1, model_name="gpt-4o-mini")

agent = initialize_agent(
    llm=llm,
    verbose=True,
    agent=AgentType.OPENAI_FUNCTIONS,
    tools=[
        WikipediaSearchTool(),
        DuckDuckGoSearchTool(),
        WebContentExtractorTool(),
        SaveToFileTool(),
    ],
    agent_kwargs={
        "system_message": SystemMessage(
            content="""
            You are a research assistant. Your task is to gather information from various sources
            like Wikipedia and DuckDuckGo. If a relevant website is found, extract its content.
            Finally, save all gathered data into a text file."
        """
        )
    },
)


result = agent.invoke("Research about the XZ backdoor")
print(result)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `WikipediaSearchTool` with `{'query': 'XZ backdoor'}`


[0m[36;1m[1;3mPage: XZ Utils backdoor
Summary: In February 2024, a malicious backdoor was introduced to the Linux build of the xz utility within the liblzma library in versions 5.6.0 and 5.6.1 by an account using the name "Jia Tan". The backdoor gives an attacker who possesses a specific Ed448 private key remote code execution capabilities on the affected Linux system. The issue has been given the Common Vulnerabilities and Exposures number CVE-2024-3094 and has been assigned a CVSS score of 10.0, the highest possible score.
While xz is commonly present in most Linux distributions, at the time of discovery the backdoored version had not yet been widely deployed to production systems, but was present in development versions of major distributions. The backdoor was discovered by the software developer Andres Freund, who announced his findings on 29 March 2024