In [None]:
import requests
from typing import Type
from bs4 import BeautifulSoup
from langchain.chat_models import ChatOpenAI
from langchain.agents import initialize_agent, AgentType
from langchain.tools import BaseTool
from pydantic import BaseModel, Field
from langchain.utilities import DuckDuckGoSearchAPIWrapper, WikipediaAPIWrapper

# === LLM Setup ===
llm = ChatOpenAI(
    temperature=0.1
)

# === Tool 1: Wikipedia Search ===
class WikiSearchArgs(BaseModel):
    query: str = Field(description="Search term for Wikipedia")

class WikipediaSearchTool(BaseTool):
    name = "WikipediaSearch"
    description = "Search Wikipedia and return relevant summary."
    args_schema: Type[WikiSearchArgs] = WikiSearchArgs

    def _run(self, query: str):
        wiki = WikipediaAPIWrapper()
        return wiki.run(query)

# === Tool 2: DuckDuckGo Search ===
class DuckSearchArgs(BaseModel):
    query: str = Field(description="Search query for DuckDuckGo")

class DuckDuckGoSearchTool(BaseTool):
    name = "DuckDuckGoSearch"
    description = "Search DuckDuckGo and return URLs or snippets."
    args_schema: Type[DuckSearchArgs] = DuckSearchArgs

    def _run(self, query: str):
        ddg = DuckDuckGoSearchAPIWrapper()
        return ddg.run(query)

# === Tool 3: Website Scraper ===
class ScrapeWebsiteArgs(BaseModel):
    url: str = Field(description="A full URL of the website to scrape")

class WebsiteScraperTool(BaseTool):
    name = "WebsiteScraper"
    description = "Extract visible text from a given webpage URL"
    args_schema: Type[ScrapeWebsiteArgs] = ScrapeWebsiteArgs

    def _run(self, url: str):
        try:
            response = requests.get(url, timeout=10)
            soup = BeautifulSoup(response.text, "html.parser")
            text = soup.get_text()
            return text[:5000]  # limit text to prevent token overflow
        except Exception as e:
            return f"Error scraping {url}: {e}"

# === Tool 4: Save to File ===
class SaveFileArgs(BaseModel):
    content: str = Field(description="Text content to save")

class SaveResearchTool(BaseTool):
    name = "SaveResearch"
    description = "Save research content to a .txt file"
    args_schema: Type[SaveFileArgs] = SaveFileArgs

    def _run(self, content: str):
        path = "xz_backdoor_research.txt"
        with open(path, "w", encoding="utf-8") as f:
            f.write(content)
        return f"Research saved to {path}"
    
# Initialize the agent with custom tools
agent = initialize_agent(
    llm=llm,
    agent=AgentType.OPENAI_FUNCTIONS,
    handle_parsing_errors=True,
    verbose=True,
    tools=[
        WikipediaSearchTool(),
        DuckDuckGoSearchTool(),
        WebsiteScraperTool(),
        SaveResearchTool()
    ]
)

# Run the agent with your query
prompt = "Research about the XZ backdoor. Use Wikipedia or DuckDuckGo. If you find websites, extract content. Then save everything to a .txt file."

agent.invoke(prompt)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `WikipediaSearch` with `{'query': 'XZ backdoor'}`


[0m[36;1m[1;3mPage: XZ Utils backdoor
Summary: In February 2024, a malicious backdoor was introduced to the Linux build of the xz utility within the liblzma library in versions 5.6.0 and 5.6.1 by an account using the name "Jia Tan". The backdoor gives an attacker who possesses a specific Ed448 private key remote code execution through OpenSSH on the affected Linux system. The issue has been given the Common Vulnerabilities and Exposures number CVE-2024-3094 and has been assigned a CVSS score of 10.0, the highest possible score.
While xz is commonly present in most Linux distributions, at the time of discovery the backdoored version had not yet been widely deployed to production systems, but was present in development versions of major distributions. The backdoor was discovered by the software developer Andres Freund, who announced his findings on 29 March 2024.

{'input': 'Research about the XZ backdoor. Use Wikipedia or DuckDuckGo. If you find websites, extract content. Then save everything to a .txt file.',
 'output': 'I have researched about the XZ backdoor. Here is a summary of the information:\n\nIn February 2024, a malicious backdoor was introduced to the Linux build of the xz utility within the liblzma library in versions 5.6.0 and 5.6.1 by an account using the name "Jia Tan". The backdoor allows an attacker with a specific Ed448 private key to execute remote code through OpenSSH on the affected Linux system. The issue has been assigned the Common Vulnerabilities and Exposures number CVE-2024-3094 and has a CVSS score of 10.0.\n\nThe backdoor was discovered by software developer Andres Freund, who reported it after investigating a performance regression in Debian Sid. The attacker made efforts to obfuscate the code, and once the compromised version is incorporated into the operating system, it alters the behavior of OpenSSH\'s SSH serve