In [1]:
import os
from bs4 import BeautifulSoup
from dotenv import load_dotenv
from openai import OpenAI
import pandas as pd

# Import fetch_website_contents from scraper module
from scraper import fetch_website_contents

# Load OpenAI API key from .env
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)


In [2]:
def scrape_headlines(url, limit=10):
    """Scrape top website headlines."""

    html = fetch_website_contents(url)
    
    if not html:
        return []
    
    soup = BeautifulSoup(html, "html.parser")
    headlines = []

    for h2 in soup.find_all("h2", limit=limit):
        text = h2.get_text().strip()
        if text:
            headlines.append(text)
    
    return headlines

In [3]:
def summarize_headlines_with_gpt(headlines):
    """Summarize headlines using OpenAI GPT."""
    if not headlines:
        return "No headlines found to summarize."

    system_prompt = "You are an AI assistant that analyses content of a website"
    user_prompt = (
        "Summarize the following headlines "
        "into a short news overview:\n\n"
        + "\n".join(f"- {h}" for h in headlines)
    )

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        max_tokens=200
    )

    return response.choices[0].message.content.strip()

In [4]:
headlines = scrape_headlines("https://www.bbc.com/news")
print(f"📰 Found {len(headlines)} headlines:\n")
for h in headlines:
    print("-", h)

summary = summarize_headlines_with_gpt(headlines)
print("\n🧾 GPT Summary:\n")
print(summary)

📰 Found 10 headlines:

- King Charles becomes first head of Church of England to pray publicly with Pope
- Myanmar's army is taking back territory with relentless air strikes - and China's help
- Strong winds in New Zealand leave tens of thousands without power
- Toxic air is back again in Delhi - here's why it's so hard to stop it
- British man drowns in Australia after surfboard snaps in wild weather
- Myanmar's army is taking back territory with relentless air strikes - and China's help
- Trump says Putin talks 'don't go anywhere' as he imposes new sanctions
- Person of interest in British girl's disappearance named in Australian state parliament
- Gen Z rising? Why young Indians aren't taking to the streets
- Strong winds in New Zealand leave tens of thousands without power

🧾 GPT Summary:

In recent news, King Charles has made history as the first head of the Church of England to pray publicly alongside Pope Francis. Meanwhile, the Myanmar army is advancing in territorial control,

In [5]:
df = pd.DataFrame({"Headline": headlines})
df.to_csv("headlines.csv", index=False)
print("✅ Headlines saved to headlines.csv")

✅ Headlines saved to headlines.csv
