In [1]:
import os
from bs4 import BeautifulSoup
from dotenv import load_dotenv
from openai import OpenAI
import pandas as pd

# Import fetch_website_contents from scraper module
from scraper import fetch_website_contents

# Load OpenAI API key from .env
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)


In [2]:
def scrape_bbc_headlines(limit=10):
    """Scrape top BBC News headlines."""
    url = "https://www.bbc.com/news"
    html = fetch_website_contents(url)
    
    if not html:
        return []
    
    soup = BeautifulSoup(html, "html.parser")
    headlines = []

    for h2 in soup.find_all("h2", limit=limit):
        text = h2.get_text().strip()
        if text:
            headlines.append(text)
    
    return headlines

In [3]:
def summarize_headlines_with_gpt(headlines):
    """Summarize BBC headlines using OpenAI GPT."""
    if not headlines:
        return "No headlines found to summarize."

    system_prompt = "You are an AI assistant that analyses content of a website"
    user_prompt = (
        "Summarize the following BBC headlines "
        "into a short news overview:\n\n"
        + "\n".join(f"- {h}" for h in headlines)
    )

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        max_tokens=200
    )

    return response.choices[0].message.content.strip()

In [4]:
headlines = scrape_bbc_headlines()
print(f"📰 Found {len(headlines)} BBC headlines:\n")
for h in headlines:
    print("-", h)

summary = summarize_headlines_with_gpt(headlines)
print("\n🧾 GPT Summary:\n")
print(summary)

📰 Found 10 BBC headlines:

- Security camera didn't cover area where jewel thieves broke into Louvre, director says
- Children among victims in Russian strikes, hours after Trump-Putin talks shelved
- SpaceX says it has cut Starlink services to Myanmar scam camps
- Effects of antidepressants on physical health ranked for first time
- Verifying aftermath pictures of devastating Uganda bus crash
- Children among victims in Russian strikes, hours after Trump-Putin talks shelved
- Why Trump made breakthrough in Gaza but can't with Putin over Ukraine
- Gaza health crisis will last for generations, WHO chief tells BBC
- UN's top court says Israel obliged to allow UN aid into Gaza
- SpaceX says it has cut Starlink services to Myanmar scam camps

🧾 GPT Summary:

Recent news highlights include the following:

The director of the Louvre revealed that security cameras did not cover the area where jewel thieves broke in. In Russia, strikes that resulted in child casualties occurred shortly after d

In [5]:
df = pd.DataFrame({"Headline": headlines})
df.to_csv("bbc_headlines.csv", index=False)
print("✅ Headlines saved to bbc_headlines.csv")

✅ Headlines saved to bbc_headlines.csv
