In [4]:
!pip install openai beautifulsoup4 requests python-dotenv





[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [5]:
import os
import requests
from bs4 import BeautifulSoup
from dotenv import load_dotenv
from openai import OpenAI

load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

# Connect to OpenAI
openai = OpenAI(api_key=api_key)


In [6]:
def search_web(query, max_results=3):
    print(f"🔍 Searching DuckDuckGo for: {query}")
    headers = {"User-Agent": "Mozilla/5.0"}
    response = requests.get(f"https://html.duckduckgo.com/html/?q={query}", headers=headers)

    soup = BeautifulSoup(response.text, "html.parser")
    results = []

    for link in soup.find_all("a", class_="result__a", href=True):
        url = link["href"]
        if url.startswith("http"):
            results.append(url)
        if len(results) >= max_results:
            break

    return results


In [7]:
def scrape_url(url):
    try:
        print(f"🧽 Scraping URL: {url}")
        headers = {"User-Agent": "Mozilla/5.0"}
        response = requests.get(url, headers=headers, timeout=10)
        soup = BeautifulSoup(response.text, "html.parser")
        paragraphs = soup.find_all("p")
        text = " ".join([para.get_text() for para in paragraphs])
        return text[:5000]  # limit to 5000 characters
    except Exception as e:
        print("❌ Error scraping:", e)
        return ""


In [8]:
def summarize_text(text):
    try:
        print("🧠 Summarizing content...")
        response = openai.chat.completions.create(
            model="GPT-4o mini",
            messages=[
                {"role": "system", "content": "You are a helpful academic assistant."},
                {"role": "user", "content": f"Summarize the following text:\n{text}"},
            ]
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print("❌ Error summarizing:", e)
        return ""


In [9]:
def organize_report(topic, summaries):
    report = f"# Research Summary: {topic}\n\n"
    for i, item in enumerate(summaries, 1):
        report += f"## Source {i}: {item['url']}\n{item['summary']}\n\n"
    return report


In [10]:
def save_markdown_report(report, filename="output.md"):
    with open(filename, "w", encoding="utf-8") as f:
        f.write(report)
    print(f"✅ Markdown report saved as {filename}")


In [11]:
import subprocess

def convert_to_pdf(input_file="output.md", output_file="output.pdf"):
    try:
        subprocess.run(["pandoc", input_file, "-o", output_file], check=True)
        print(f"✅ PDF saved as {output_file}")
    except Exception as e:
        print("❌ PDF conversion failed:", e)


In [12]:
# Step-by-step execution
topic = "Future of Artificial Intelligence"
urls = search_web(topic)

summaries = []
for url in urls:
    text = scrape_url(url)
    if text:
        summary = summarize_text(text)
        summaries.append({"url": url, "summary": summary})

report = organize_report(topic, summaries)
save_markdown_report(report)

# Optional: Convert to PDF
# convert_to_pdf()


🔍 Searching DuckDuckGo for: Future of Artificial Intelligence
✅ Markdown report saved as output.md
