In [1]:
from langchain.schema import SystemMessage
import requests
from bs4 import BeautifulSoup
from typing import Type
from langchain_openai import ChatOpenAI
from langchain.tools import BaseTool
from pydantic import BaseModel, Field
from langchain.agents import initialize_agent, AgentType
from langchain_community.utilities import WikipediaAPIWrapper
from langchain.tools import DuckDuckGoSearchResults
from dotenv import load_dotenv

load_dotenv()

llm = ChatOpenAI(
    model_name="gpt-3.5-turbo",
    temperature=0.1,
)

class SearchToolArgsSchema(BaseModel):
    query: str = Field(description="The query you will search for")

class DuckDuckGoSearchTool(BaseTool):
    name = "DuckDuckGoSearchTool"
    description = """
    Use this tool to gather and summarize relevant research, articles, and comments from a given search term.
    It takes a query as an argument.
    Example query: Research about the XZ backdoor
    """
    args_schema: Type[SearchToolArgsSchema] = SearchToolArgsSchema

    def _run(self, query):
        ddg = DuckDuckGoSearchResults()
        return ddg.run(query)
    
class WikipediaSearchTool(BaseTool):
    name = "WikipediaSearchTool"
    description = """
    Use this tool to summarize the details on wikipedia with relevant keywords.
    Example query: Research about the XZ backdoor
    """
    args_schema: Type[SearchToolArgsSchema] = SearchToolArgsSchema

    def _run(self, query):
        wiki = WikipediaAPIWrapper()
        return wiki.run(query)

class ResearchArgsSchema(BaseModel):
    url: str = Field(
        description="""
        url of the research.
        Example: https://www.akamai.com/blog/security-research/critical-linux-backdoor-xz-utils-discovered-what-to-know,
        https://en.wikipedia.org/wiki/XZ_Utils_backdoor,
        https://news.ycombinator.com/item?id=39891607
        """
    )

class ResearchOverviewTool(BaseTool):
    name = "ResearchOverview"
    description = """
    Use this to get an overview of the research.
    You should enter a url.
    """
    args_schema: Type[ResearchArgsSchema] = ResearchArgsSchema

    def _run(self, url):
        r = requests.get(url)
        if r.status_code == 200:
            soup = BeautifulSoup(r.content, "html.parser")
            titles_and_paragraphs = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p'])
            text_content = '\n'.join([element.get_text(strip=True) for element in titles_and_paragraphs])
            return text_content

agent = initialize_agent(
    llm=llm,
    verbose=True,
    agent=AgentType.OPENAI_FUNCTIONS,
    handle_parsing_errors=True,
    tools=[
        DuckDuckGoSearchTool(),
        WikipediaSearchTool(),
        ResearchOverviewTool()
    ],
    agent_kwargs={
        "system_message": SystemMessage(content="""
            You're an academic who specializes in IT-related research materials.
            You are well-versed in the latest developments in IT and security-related issues and trends. 
                                        
            You should demonstrate as much scholarly research as possible on the given keywords, and be sure to provide sources and citations. 
                                        
            You should use all three tools we've provided (DuckDuckGoSearchTool,WikipediaSearchTool, and ResearchOverviewTool) and include both the search results, the results of extracting those pages, and the Wikipedia results.
                                        
            Please be as descriptive as possible to save your answers as a txt file.
        """)
    }
)


keyword ={ "input": "Research about the XZ backdoor"}
result = agent.invoke(keyword)

file_path = f"research_result_{keyword['input']}.txt"
if isinstance(result, str):
    with open(file_path, "w", encoding="utf-8-sig") as file:
        file.write(result)
    print(f"Research result saved to {file_path}")
else:
    import json
    with open(file_path, "w", encoding="utf-8-sig") as file:
        file.write(json.dumps(result, ensure_ascii=False, indent=4))
    print(f"Research result saved in JSON format to {file_path}")


  warn_deprecated(




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `DuckDuckGoSearchTool` with `{'query': 'XZ backdoor research'}`


[0m[36;1m[1;3m[snippet: What does the backdoor do? Malicious code added to xz Utils versions 5.6.0 and 5.6.1 modified the way the software functions. The backdoor manipulated sshd, the executable file used to make remote ..., title: What we know about the xz Utils backdoor that almost infected the world ..., link: https://arstechnica.com/security/2024/04/what-we-know-about-the-xz-utils-backdoor-that-almost-infected-the-world/], [snippet: That XZ Utils backdoor, it's now clear, was inserted by none other than the lead open source steward of XZ Utils, a developer who went by the name Jia Tan. ... who until last year served as the most senior researcher and head of the global research and analysis team at Russian cybersecurity firm Kaspersky. "I'd say this is a nation-state ..., title: Unraveling the Mystery: 'Jia Tan,' the Mastermind Behind XZ Backd



[33;1m[1;3mPage: XZ Utils backdoor
Summary: On 29 March 2024, software developer Andres Freund reported that he had found a maliciously introduced backdoor in the Linux utility xz within the liblzma library in versions 5.6.0 and 5.6.1 released in February 2024.While xz is commonly present in most Linux distributions, the backdoor only targeted Debian- and RPM-based systems running on the x86-64 architecture. At the time of discovery the backdoored version had not yet been widely deployed to production systems, but was present in development versions of major distributions.The backdoor gives an attacker who possesses a specific Ed448 private key remote code execution capabilities on the affected Linux systems. The issue has been assigned a CVSS score of 10.0, the highest possible score.

Page: XZ Utils
Summary: XZ Utils (previously LZMA Utils) is a set of free software command-line lossless data compressors, including the programs lzma and xz, for Unix-like operating systems and, from