In [6]:
import os
import requests
from bs4 import BeautifulSoup
from typing import Type
from langchain.chat_models import ChatOpenAI
from langchain.tools import BaseTool
from pydantic import BaseModel, Field
from langchain.agents import initialize_agent, AgentType
from langchain.utilities import DuckDuckGoSearchAPIWrapper
from langchain.utilities import WikipediaAPIWrapper

openai_api_key = os.getenv("OPENAI_API_KEY")

llm = ChatOpenAI(temperature=0.1, openai_api_key=openai_api_key, model="gpt-3.5-turbo")

class WikipediaSearchTool(BaseTool):
    name = "WikipediaSearch"
    description = "Search information from Wikipedia."

    class WikipediaArgs(BaseModel):
        query: str = Field(description="Search query for Wikipedia.")

    args_schema: Type[WikipediaArgs] = WikipediaArgs

    def _run(self, query: str):
        wiki = WikipediaAPIWrapper()
        return wiki.run(query)

class DuckDuckGoSearchTool(BaseTool):
    name = "DuckDuckGoSearch"
    description = "Search the web using DuckDuckGo."

    class DuckDuckGoArgs(BaseModel):
        query: str = Field(description="Search query for DuckDuckGo.")

    args_schema: Type[DuckDuckGoArgs] = DuckDuckGoArgs

    def _run(self, query: str):
        ddg = DuckDuckGoSearchAPIWrapper()
        return ddg.run(query)

class WebScrapingTool(BaseTool):
    name = "WebScraping"
    description = "Scrape and extract text from a given website URL."

    class ScrapingArgs(BaseModel):
        url: str = Field(description="The URL of the website to scrape.")

    args_schema: Type[ScrapingArgs] = ScrapingArgs

    def _run(self, url: str):
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        text = soup.get_text(separator="\n")
        return text

class WebScrapingTool(BaseTool):
    name = "WebScraping"
    description = "Scrape and extract text from a given website URL."

    class ScrapingArgs(BaseModel):
        url: str = Field(description="The URL of the website to scrape.")

    args_schema: Type[ScrapingArgs] = ScrapingArgs

    def _run(self, url: str):
        try:
            response = requests.get(url)
            response.raise_for_status() 
        except requests.exceptions.HTTPError as http_err:
            return f"HTTP error occurred: {http_err}"
        except Exception as err:
            return f"An error occurred: {err}"

        soup = BeautifulSoup(response.text, 'html.parser')
        text = soup.get_text(separator="\n")
        return text

def save_to_file(content: str, filename: str = "research.txt"):
    with open(filename, 'w', encoding='utf-8') as file:
        file.write(content)
    return f"Content saved to {filename}"

agent = initialize_agent(
    tools=[
        WikipediaSearchTool(),
        DuckDuckGoSearchTool(),
        WebScrapingTool(),
    ],
    llm=llm,
    agent_type=AgentType.OPENAI_FUNCTIONS,
    verbose=True,
)

query = "Research about the XZ backdoor"

response = agent.run(query)

save_message = save_to_file(response)
print(save_message)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI should start by searching for information on the XZ backdoor to gather relevant details.
Action: DuckDuckGoSearch
Action Input: XZ backdoor[0m

HTTPError: 