In [10]:
import requests
from bs4 import BeautifulSoup
from typing import Type
from langchain.chat_models import ChatOpenAI
from langchain.tools import BaseTool
from pydantic import BaseModel, Field
from langchain.agents import initialize_agent, AgentType
from langchain.utilities import DuckDuckGoSearchAPIWrapper, WikipediaAPIWrapper

llm = ChatOpenAI(temperature=0.1, model_name="gpt-3.5-turbo-1106")

class ResearchToolArgsSchema(BaseModel):
    query: str = Field(
        description="The query you will search for. Example query: Research about the XZ backdoor"
    )


class DuckDuckSearchTool(BaseTool):
    name = "DuckDuckSearchTool"
    description = """
    Use this tool to search for the provided query and provide one url from the searching.
    """
    args_schema: Type[
        ResearchToolArgsSchema
    ] = ResearchToolArgsSchema

    def _run(self, query):
        ddg = DuckDuckGoSearchAPIWrapper()
        return ddg.run(query)

class WikiSearchTool(BaseTool):
    name = "WikiSearchTool"
    description = """
    Use this tool to search for the provided query and provide one url from the searching.
    
    """
    args_schema: Type[
        ResearchToolArgsSchema
    ] = ResearchToolArgsSchema

    def _run(self, query):
        wiki = WikipediaAPIWrapper()
        return wiki.run(query)

class URLSearchToolArgsSchema(BaseModel):
    url: str = Field(
        description="The url you will search. Example url: https://www.techrepublic.com/article/xz-backdoor-linux/",
    )

class LoadWebsiteTool(BaseTool):

    name = "LoadWebsiteTool"
    description = """
    Use this tool to extract one website from either Wikipedia or DuckDuckGo.
    """
    args_schema: Type[URLSearchToolArgsSchema] = URLSearchToolArgsSchema

    def _run(self, url):
        print(url)
        try:
            response = requests.get(url)
            soup = BeautifulSoup(response.content, "html.parser")
            response.raise_for_status()  # Raises an error for bad responses
            title_text = soup.find('title').text if soup.find('title') else 'Title tag not found'
            return title_text
        except requests.RequestException as e:
            return f"Failed to load URL: {str(e)}"
        except ValueError:
            return "URL did not return a JSON response."
        
class SaveToTextFileToolArgsSchema(BaseModel):
    text: str = Field(
        description="the extracted content from the website will be saved to .txt file.",
    ) 
class SaveToTextFileTool(BaseTool):
    name = "SaveToTextFileTool"
    description = """
    Use this tool to save the text file.
    """
    args_schema: Type[SaveToTextFileToolArgsSchema] = SaveToTextFileToolArgsSchema

    def _run(self, text):
        file_path = f"MyResearch.txt"
        with open(file_path, 'w') as f:
            f.write(text)
        


agent = initialize_agent(
    llm=llm,
    verbose=True,
    agent=AgentType.OPENAI_FUNCTIONS,
    handle_parsing_errors=True,
    tools=[
            DuckDuckSearchTool(),
            WikiSearchTool(),
            LoadWebsiteTool(),
            SaveToTextFileTool(),
    ],
)

prompt = "Research about the XZ backdoor"
agent.invoke(prompt)





[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `DuckDuckSearchTool` with `{'query': 'XZ backdoor'}`


[0m[36;1m[1;3mThe XZ Backdoor: Everything You Need to Know. Details are starting to emerge about a stunning supply chain attack that sent the open source software community reeling. On Friday, a lone Microsoft ... On March 28, 2024 a backdoor was identified in XZ Utils. This vulnerability, CVE-2024-3094 with a CVSS score of 10 is a result of a software supply chain compromise impacting versions 5.6.0 and 5.6.1 of XZ Utils. The U.S. Cybersecurity and Infrastructure Security Agency (CISA) has recommended organizations to downgrade to a previous non-compromised XZ Utils version. That operation matches the style of the XZ Utils backdoor far more than the cruder supply chain attacks of APT41 or Lazarus, by comparison. "It could very well be someone else," says Aitel. The first piece of the backdoor is the m4/build-to-host.m4 file. This file orchestrates minor mod

{'input': 'Research about the XZ backdoor',
 'output': 'The XZ backdoor is a supply chain attack that affected the open source software community. It was identified in XZ Utils, with the vulnerability CVE-2024-3094 and a CVSS score of 10. The U.S. Cybersecurity and Infrastructure Security Agency (CISA) has recommended organizations to downgrade to a previous non-compromised version of XZ Utils. The backdoor involves the m4/build-to-host.m4 file and the execution of the Stage 1 backdoor file, bad-3-corrupt_lzma2.xz. It is advised to carefully check the version of XZ Utils and revert to a previous known safe version, such as 5.4. If you would like to learn more details, I can provide you with a specific website to read further.'}