# **Web Scraper for News Headlines.**

In [8]:
import requests
from bs4 import BeautifulSoup

In [9]:
def scrape_headlines(url, selectors):
    """Scrape headlines from a URL using CSS selectors."""
    response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
    response.raise_for_status()
    soup = BeautifulSoup(response.text, 'html.parser')
    headlines = []
    for sel in selectors:
        for tag in soup.select(sel):
            text = tag.get_text(strip=True)
            if text:
                headlines.append(text)
    return headlines

In [10]:
if __name__ == "__main__":
    URL = "https://www.bbc.com/news"
    # Common selectors on BBC News homepage
    selectors = [
        "h3.gs-c-promo-heading__title",  # headlines
        "h2",  # fallback generic
    ]

    try:
        headlines = scrape_headlines(URL, selectors)
        unique = []
        for h in headlines:
            if h not in unique and len(h) > 20:
                unique.append(h)

        with open("headlines.txt", "w", encoding="utf‑8") as f:
            for i, title in enumerate(unique[:20], 1):
                f.write(f"{i}. {title}\n")

        print(f"✅ Scraped and saved {len(unique[:20])} headlines to 'headlines.txt'.")
    except Exception as e:
        print("⚠️ Error scraping:", e)

✅ Scraped and saved 20 headlines to 'headlines.txt'.


In [11]:
!cat headlines.txt

1. Putin and Trump's relationship has soured - but behind the posturing, a Ukraine deal is still possible
2. Texas Republicans vote to arrest Democrats blocking redistricting plan
3. Watch: Fireworks ignite as blaze hits Japanese festival launch boats
4. BP says it makes biggest oil and gas find in 25 years
5. US may demand $15,000 deposit for visas
6. Disfigured, shamed and forgotten: BBC visits the Korean survivors of the Hiroshima bomb
7. US attorney general orders grand jury hearings on Trump-Russia probe
8. Aboriginal group launches legal bid to stop Brisbane Olympic stadium
9. Why Donald Trump is talking about actress Sydney Sweeney
10. India calls Trump's tariff threat over Russian oil 'unjustified'
11. Danish zoo asks for unwanted pets to feed its predators
12. Convicted rapist fighting to remain in parliament in Australia
13. China reports 7,000 cases of chikungunya virus
14. Miracle product or pollution nightmare? Why a deal to cut plastic waste is hard to reach
15. Why were 

In [12]:
# Read and display the contents of headlines.txt
with open("headlines.txt", "r", encoding="utf-8") as file:
    contents = file.read()
    print(contents)

1. Putin and Trump's relationship has soured - but behind the posturing, a Ukraine deal is still possible
2. Texas Republicans vote to arrest Democrats blocking redistricting plan
3. Watch: Fireworks ignite as blaze hits Japanese festival launch boats
4. BP says it makes biggest oil and gas find in 25 years
5. US may demand $15,000 deposit for visas
6. Disfigured, shamed and forgotten: BBC visits the Korean survivors of the Hiroshima bomb
7. US attorney general orders grand jury hearings on Trump-Russia probe
8. Aboriginal group launches legal bid to stop Brisbane Olympic stadium
9. Why Donald Trump is talking about actress Sydney Sweeney
10. India calls Trump's tariff threat over Russian oil 'unjustified'
11. Danish zoo asks for unwanted pets to feed its predators
12. Convicted rapist fighting to remain in parliament in Australia
13. China reports 7,000 cases of chikungunya virus
14. Miracle product or pollution nightmare? Why a deal to cut plastic waste is hard to reach
15. Why were 

In [13]:
# Download headlines.txt file in your device
#from google.colab import files
#files.download("headlines.txt")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>