In [31]:
import time
from altair.vegalite import Description
import requests
from typing import Type
from langchain.chat_models import ChatOpenAI
from langchain.tools import BaseTool
from pydantic import BaseModel, Field
from langchain.agents import initialize_agent, AgentType
from langchain.utilities import DuckDuckGoSearchAPIWrapper
from langchain.tools import DuckDuckGoSearchResults
from langchain.retrievers import WikipediaRetriever
from langchain.document_loaders import AsyncChromiumLoader
from langchain.document_transformers import Html2TextTransformer
import nest_asyncio

from tempfile import TemporaryDirectory
nest_asyncio.apply()



llm = ChatOpenAI(temperature=0.1)

class SearchToolArgsSchema(BaseModel):
    query: str = Field(
        description="The query you will search for."
    )

class WikipediaSearchTool(BaseTool):
    name = "WikipediaSearchTool"
    description = """
    Use this tool to research.
    It takes a query as an argument.
    """
    args_schema: Type[
        SearchToolArgsSchema
    ] = SearchToolArgsSchema

    def _run(self, query):
        retriever = WikipediaRetriever(top_k_results=1)
        return retriever.get_relevant_documents(query)


class DuckDuckGoSearchTool(BaseTool):
    name = "DuckDuckGoSearchTool"
    description = """
    Use this tool to find the ONLY ONE link to the most relevant result for my research.
    It takes a query as an argument.
    """
    args_schema: Type[
        SearchToolArgsSchema
    ] = SearchToolArgsSchema

    def _run(self, query):
        wrapper = DuckDuckGoSearchAPIWrapper(max_results=1)
        search = DuckDuckGoSearchResults(api_wrapper=wrapper)
        return search.run(query)

class LinkScrapeToolArgsSchema(BaseModel):
    link: list[str] = Field(description="The ONE link you will extract it's content. Example: ['https://www.wired.com/story/xz-backdoor-everything-you-need-to-know/']")
    
class LinkScrapeTool(BaseTool):
    name="LinkScrapeTool"
    description="""
        If you found the website link in DuckDuckGo,
        Use this to get the content of the link for my research.
        You shoud enter list of ONLY ONE link.
        """
    args_schema: Type[LinkScrapeToolArgsSchema]=(LinkScrapeToolArgsSchema)
    def _run(self, link):
        loader = AsyncChromiumLoader(link)
        docs = loader.load()
        html2text = Html2TextTransformer()
        docs_transformed = html2text.transform_documents(docs)
        return docs
    
class SaveTXTfileToolArgsSchema(BaseModel):
    doc: str = Field(description="the content extracted from the link and you will save to .txt file.")

class SaveTXTfileTool(BaseTool):
    name="SaveTXTfileTool"
    description="""
        If you found the website link in DuckDuckGo and extracted the content,
        Use this to save the content to a .txt file for my research.
        You shoud enter a document that you extracted from the website.
    """
    args_schema: Type[SaveTXTfileToolArgsSchema] = SaveTXTfileToolArgsSchema
    def _run(self, doc):
        file_path = f"./files/{time.strftime('%H%M%S')}.txt"
        with open(file_path, 'w') as f:
            f.write(doc)
        

agent = initialize_agent(
    llm=llm,
    verbose=True,
    agent=AgentType.OPENAI_FUNCTIONS,
    handle_parsing_errors=True,
    tools=[
        WikipediaSearchTool(),
        DuckDuckGoSearchTool(),
        LinkScrapeTool(),
        SaveTXTfileTool(),
    ],
)

prompt = "Research about the XZ backdoor, search in duckduckgo"

agent.invoke(prompt)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `DuckDuckGoSearchTool` with `{'query': 'XZ backdoor'}`


[0m[33;1m[1;3m[snippet: The XZ Backdoor: Everything You Need to Know. Details are starting to emerge about a stunning supply chain attack that sent the open source software community reeling. On Friday, a lone Microsoft ..., title: The XZ Backdoor: Everything You Need to Know | WIRED, link: https://www.wired.com/story/xz-backdoor-everything-you-need-to-know/], [snippet: That operation matches the style of the XZ Utils backdoor far more than the cruder supply chain attacks of APT41 or Lazarus, by comparison. "It could very well be someone else," says Aitel., title: The Mystery of 'Jia Tan,' the XZ Backdoor Mastermind | WIRED, link: https://www.wired.com/story/jia-tan-xz-backdoor/], [snippet: The version of XZ should be carefully checked, as versions 5.6.0 and 5.6.1 contain the backdoor. It is advised to revert to a previous known safe version of XZ Utils, s

{'input': 'Research about the XZ backdoor, search in duckduckgo',
 'output': 'I have extracted the information about the XZ backdoor from the article. The document has been saved successfully. If you need further information or assistance, feel free to ask!'}