In [7]:
import requests
from bs4 import BeautifulSoup
from langchain.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper
from langchain.tools import WikipediaQueryRun
import openai
from dotenv import load_dotenv
import os

# .env 파일에서 환경 변수 로드
load_dotenv()

# OpenAI API 설정 (.env 파일에서 API 키 로드)
openai.api_key = os.getenv("OPENAI_API_KEY")

# Wikipedia 검색 함수 (DuckDuckGo API Wrapper 사용)
def search_wikipedia(query):
    wrapper = DuckDuckGoSearchAPIWrapper(max_results=1)
    wiki_query = WikipediaQueryRun(api_wrapper=wrapper)
    result = wiki_query.run(query)
    return result

# DuckDuckGo 검색 함수 (예외 처리 추가, requests.exceptions.HTTPError 사용)
def search_duckduckgo(query):
    search = DuckDuckGoSearchAPIWrapper(max_results=3)
    try:
        results = search.run(query)
    except requests.exceptions.HTTPError as e:
        return f"HTTPError occurred: {str(e)}"
    except Exception as e:
        return f"An error occurred during DuckDuckGo search: {str(e)}"
    return results

# 수동 DuckDuckGo 검색 (백업 방법)
def manual_duckduckgo_search(query):
    url = f"https://duckduckgo.com/html/?q={query}"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    try:
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, "html.parser")
        results = []
        for result in soup.find_all("a", class_="result__a"):
            title = result.get_text()
            link = result.get("href")
            results.append({"title": title, "url": link})

        return results
    except requests.RequestException as e:
        return f"Error in manual DuckDuckGo search: {str(e)}"

# 웹사이트 스크래핑 함수
def scrape_website(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    try:
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, "html.parser")
        for header in soup.find_all(["header", "footer", "nav"]):
            header.decompose()
        content = soup.get_text(separator="\n", strip=True)

        return content
    except requests.RequestException as e:
        return f"Error scraping {url}: {str(e)}"

# GPT-4o-mini 모델로 질문에 답변 생성
def ask_gpt(question, model="gpt-4o-mini"):
    response = openai.ChatCompletion.create(
        model=model,
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": question},
        ]
    )
    return response.choices[0].message['content']

# 연구 결과를 .txt 파일로 저장하는 함수
def save_research_to_file(content, filename="research.txt"):
    with open(filename, "w", encoding="utf-8") as f:
        f.write(content)
    print(f"Research saved to {filename}")

# 에이전트 실행 함수
def run_research_agent(query):
    # 1. Wikipedia 검색
    wiki_result = search_wikipedia(query)
    research_content = f"### Wikipedia Search Result:\n\n{wiki_result}\n\n"

    # 2. DuckDuckGo 검색 (예외 처리)
    ddg_results = search_duckduckgo(query)
    if isinstance(ddg_results, str):
        # DuckDuckGo 검색이 실패했을 경우 (에러 메시지)
        research_content += f"### DuckDuckGo Search Results:\nError: {ddg_results}\n"
    else:
        research_content += "### DuckDuckGo Search Results:\n"
        for idx, result in enumerate(ddg_results):
            research_content += f"{idx+1}. {result['title']}: {result['url']}\n"

        # 3. 첫 번째 DuckDuckGo 검색 결과로부터 웹사이트 스크래핑
        if ddg_results:
            first_result_url = ddg_results[0]['url']
            scraped_content = scrape_website(first_result_url)
            research_content += f"\n### Scraped Content from {first_result_url}:\n\n{scraped_content}\n"

    # 4. GPT-4o-mini 모델을 사용해 최종 답변 생성
    gpt_response = ask_gpt(f"Based on this research, can you summarize the findings about {query}?", model="gpt-4o-mini")
    research_content += f"\n### GPT-4o-mini Summary:\n\n{gpt_response}\n"

    # 5. 결과를 .txt 파일에 저장
    save_research_to_file(research_content)

# 실행 쿼리
query = "Research about the XZ backdoor"
run_research_agent(query)


Research saved to research.txt
