In [6]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# BBC sections to scrape
sections_to_scrape = {
    "World": "https://www.bbc.com/news/world",
    "Business": "https://www.bbc.com/news/business",
    "Technology": "https://www.bbc.com/news/technology",
    "Sports": "https://www.bbc.com/sport",
    "Entertainment": "https://www.bbc.com/news/entertainment_and_arts",
    "Science": "https://www.bbc.com/news/science_and_environment"
}

# Empty lists
headlines, sections, categories, links = [], [], [], []

for category, url in sections_to_scrape.items():
    print(f"🔎 Scraping {category} section...")
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    
    # Get headline blocks (BBC uses h2 a lot, sometimes h3 for sport)
    articles = soup.find_all(["h2", "h3"])
    
    for art in articles:
        headline = art.get_text(strip=True)
        if not headline or len(headline) < 10:  # filter out junk
            continue

        # Try to grab parent link
        parent_a = art.find_parent("a")
        if parent_a and parent_a.get("href"):
            href = parent_a.get("href")
            full_link = "https://www.bbc.com" + href if href.startswith("/") else href
            # dynamic section (from url)
            parts = href.split("/")
            section = parts[2] if len(parts) > 2 else "General"
        else:
            full_link = "N/A"
            section = "General"

        headlines.append(headline)
        sections.append(section.capitalize())
        categories.append(category)  # mapped from dict
        links.append(full_link)

# Build DataFrame
df = pd.DataFrame({
    "S.No": range(1, len(headlines) + 1),
    "Headline": headlines,
    "Section": sections,
    "Category": categories,
    "Link": links
})

print(df.head(20))
print(f"\n✅ Scraped total {len(df)} headlines from multiple sections")

# Save
df.to_csv("bbc_multi_section_news.csv", index=False)
print("💾 Dataset saved as bbc_multi_section_news.csv")


🔎 Scraping World section...
🔎 Scraping Business section...
🔎 Scraping Technology section...
🔎 Scraping Sports section...
🔎 Scraping Entertainment section...
🔎 Scraping Science section...
    S.No                                           Headline   Section  \
0      1  Charlie Kirk suspect confessed in hidden note ...  Articles   
1      2  'The bombing has been insane': Palestinians sc...  Articles   
2      3  Canadian writer Robert Munsch approved for ass...  Articles   
3      4  Syria's worst drought in decades pushes millio...  Articles   
4      5  Judge dismisses terrorism charges against Luig...  Articles   
5      6  'The bombing has been insane': Palestinians sc...  Articles   
6      7  Israel has committed genocide in Gaza, UN comm...  Articles   
7      8          Acting legend Robert Redford dies aged 89  Articles   
8      9  Canadian writer Robert Munsch approved for ass...  Articles   
9     10  Coup plot charges for former Romanian presiden...  Articles   
10    11  