In [None]:
from langchain.agents import initialize_agent, AgentType
from langchain.tools import Tool
from langchain.chat_models import ChatOpenAI
import requests
from bs4 import BeautifulSoup
from duckduckgo_search import DDGS
import wikipedia

llm = ChatOpenAI(temperature=0.1, model_name="gpt-4-1106-preview")

def search_wikipedia(query):
    try:
        return wikipedia.summary(query, sentences=3)
    except wikipedia.exceptions.DisambiguationError as e:
        return f"Disambiguation error: {e.options}"
    except wikipedia.exceptions.PageError:
        return "No page found."

def search_duckduckgo(query):
    try:
        with DDGS() as ddgs:
            results = [result['href'] for result in ddgs.text(query, max_results=3)]
        return results
    except Exception as e:
        return [f"Error fetching search results: {str(e)}"]

def scrape_website(url):
    try:
        response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
        soup = BeautifulSoup(response.text, 'html.parser')
        text = ' '.join(p.get_text() for p in soup.find_all('p'))
        return text[:2000]
    except Exception as e:
        return f"Error scraping {url}: {str(e)}"

def save_to_file(content, filename="research_result.txt"):
    with open(filename, "w", encoding="utf-8") as file:
        file.write(content)
    return f"Results saved to {filename}"

tools = [
    Tool(name="Wikipedia Search", func=search_wikipedia, description="Search Wikipedia for summaries."),
    Tool(name="DuckDuckGo Search", func=search_duckduckgo, description="Search DuckDuckGo for web results."),
    Tool(name="Web Scraper", func=scrape_website, description="Scrape a website's text content."),
    Tool(name="File Saver", func=save_to_file, description="Save research results to a text file.")
]

agent = initialize_agent(tools=tools, llm=llm, agent=AgentType.OPENAI_FUNCTIONS, verbose=True)

query = "Research about the XZ backdoor"
wikipedia_result = search_wikipedia(query)
duckduckgo_results = search_duckduckgo(query)

scraped_content = ""
for url in duckduckgo_results:
    scraped_content += scrape_website(url) + "\n\n"

research_result = f"Wikipedia Result:\n{wikipedia_result}\n\nScraped Web Content:\n{scraped_content}"

save_to_file(research_result)


'Results saved to research_result.txt'