In [38]:
from bs4 import BeautifulSoup
from langchain.chat_models import ChatOpenAI
from langchain.agents import initialize_agent, AgentType
from langchain.tools import BaseTool
from pydantic import BaseModel, Field
from typing import Type
from langchain.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper
from langchain.document_loaders import AsyncChromiumLoader
from langchain.document_transformers import Html2TextTransformer
import os
import requests

llm = ChatOpenAI(
    model="gpt-3.5-turbo",
    temperature=0.1,
)

class WikipediaSearchToolArgsSchema(BaseModel):
    query: str = Field(description="The Query you will search for")
    

class WikipediaSearchTool(BaseTool):

    name: str = "WikipediaSearchTool"
    description: str = """
    Use this tool to search in Wikipedia.
    It takes a query as an argument.
    Example query: Apple compnay's products
    """
    args_schema: Type[WikipediaSearchToolArgsSchema] = WikipediaSearchToolArgsSchema

    def _run(self, query):
        wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
        return wikipedia.run(query)


class DuckDuckGoSearchToolArgsSchema(BaseModel):
    query: str = Field(description="The Query you will search for")
    

class DuckDuckGoSearchTool(BaseTool):

    name: str = "DuckDuckGoSearchTool"
    description: str = """
    Use this tool to search in DuckDuckGo.
    It takes a query as an argument.
    Example query: Apple compnay's products
    """
    args_schema: Type[DuckDuckGoSearchToolArgsSchema] = DuckDuckGoSearchToolArgsSchema

    def _run(self, query):
        ddg = DuckDuckGoSearchAPIWrapper()
        return ddg.run(query)


class WebScrapToolArgsSchema(BaseModel):
    url: str = Field(description="URL of the webpage to scrape. Example: https://www.apple.com")


html2text = Html2TextTransformer()

class WebScrapTool(BaseTool):
    name: str = "WebScrapTool"
    description: str = """
    Use this to scrape content from a webpage.
    You should enter the URL of the webpage you want to scrape.
    """
    args_schema: Type[WebScrapToolArgsSchema] = WebScrapToolArgsSchema

    def _run(self, url):
        try: 
            headers = {"User-Agent": "Mozilla/5.0 (compatible; WebPageTextTool/1.0)"}
            r = requests.get(url)
            r.raise_for_status()

            soup = BeautifulSoup(r.text)

            for tag in soup(["header", "footer", "nav"]):
                tag.decompose()

            main = soup.find("article") or soup.find("main") or soup.body or soup
            text = main.get_text(separator="\n", strip=True)

            lines = [ln.strip() for ln in text.splitlines() if ln.strip()]
            return "\n".join(lines)
        except requests.exceptions.HTTPError as e:
            status_code = e.response.status_code
            return "Page collection failed due to a {status} response status "

class FileStoreToolArgsSchema(BaseModel):
    filename: str = Field(description="Filename to create. Example: sample.txt")
    content: str = Field(description="Content to save to the file.")

class FileStoreTool(BaseTool):
    name: str = "FileStoreTool"
    description: str = """
    Use this to save content to a file.
    You should enter the filename and the content to save.
    """
    args_schema: Type[FileStoreToolArgsSchema] = FileStoreToolArgsSchema

    def _run(self, filename, content):
        with open(f"./{filename}", "w") as f:
            f.write(content)
            f.write("\n")
        f.close()
        return f"./{filename} created"

agent = initialize_agent(
    llm=llm,
    verbose=True,
    handle_parsing_errors=True, 
    agent=AgentType.OPENAI_FUNCTIONS,
    tools=[
        WikipediaSearchTool(),
        DuckDuckGoSearchTool(),
        WebScrapTool(),
        FileStoreTool(),
    ]
)

prompt = "'Research about the XZ backdoor', the agent should try to search in Wikipedia or DuckDuckGo, if it finds a website in DuckDuckGo it should enter the website and extract it's content, then it should finish by saving the research to a .txt file."

result = agent.invoke(prompt)

from IPython.display import display, Markdown

display(Markdown(result["output"]))




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `WikipediaSearchTool` with `{'query': 'XZ backdoor'}`


[0m[36;1m[1;3mPage: XZ Utils backdoor
Summary: In February 2024, a malicious backdoor was introduced to the Linux build of the xz utility within the liblzma library in versions 5.6.0 and 5.6.1 by an account using the name "Jia Tan". The backdoor gives an attacker who possesses a specific Ed448 private key remote code execution through OpenSSH on the affected Linux system. The issue has been given the Common Vulnerabilities and Exposures number CVE-2024-3094 and has been assigned a CVSS score of 10.0, the highest possible score.
While xz is commonly present in most Linux distributions, at the time of discovery the backdoored version had not yet been widely deployed to production systems, but was present in development versions of major distributions. The backdoor was discovered by the software developer Andres Freund, who announced his findings on 29 March 2

I have researched about the XZ backdoor and saved the information to a file. You can download the research from the following link: [Download XZ backdoor research](sandbox:/XZ_backdoor_research.txt)