# 📰 BBC News

## 📌 Instructions

1. Enter a **search term** (e.g., `"economy"`, `"sports"`, `"technology"`).  
2. Define the **page range** (`start_page` and `end_page`) to scrape multiple pages of results.  
3. The script retrieves:
   - Title  
   - Date  
   - Link  
   - Full article content  
4. The results are stored in a **pandas DataFrame** and can be exported to CSV:

```python
bbc_df.to_csv("data_bbc_df.csv", index=False)

In [6]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import urllib.parse
import time

def fetch_bbc_article_content(url):
    try:
        soup = BeautifulSoup(requests.get(url).text, "html.parser")
        return " ".join(p.get_text(strip=True) for p in soup.select("article p")) or None
    except Exception as e:
        print(f"Failed to fetch {url}: {e}")
        return None

def fetch_bbc_articles(query, start_page=0, end_page=1, delay=1.5):
    encoded_query = urllib.parse.quote(query)
    base_url = (
        f"https://www.bbc.com/search?q={encoded_query}&page={{}}&edgeauth=eyJhbGciOiAiSFMyNTYiLCAidHlwIjogIkpXVCJ9.eyJrZXkiOiAiZmFzdGx5LXVyaS10b2tlbi0xIiwiZXhwIjogMTc0ODQwNTc5MCwibmJmIjogMTc0ODQwNTQzMCwicmVxdWVzdHVyaSI6ICIlMkZzZWFyY2glM0ZxJTNE{encoded_query}JTI2cGFnZSUzRCU3YnBhZ2UlN0QifQ.fYFU7wbP7okGx82xU39zD-En8ipHt1B0AIyKaIUZEVM"
    )
    records = []

    for page in range(start_page, end_page + 1):
        try:
            soup = BeautifulSoup(requests.get(base_url.format(page)).text, "html.parser")
            titles = [h2.get_text(strip=True) for h2 in soup.select("h2")]
            dates = [span.get_text(strip=True) for span in soup.select('span[data-testid="card-metadata-lastupdated"]')]
            links = ["https://www.bbc.com" + a.get("href") for a in soup.select('a[data-testid="internal-link"]') if a.get("href", "").startswith("/news")]

            for title, date, link in zip(titles, dates, links):
                content = fetch_bbc_article_content(link)
                records.append({"Title": title, "Date": date, "Link": link, "Content": content})
                time.sleep(delay)

        except Exception as e:
            print(f"Error on page {page}: {e}")

    return pd.DataFrame(records)

# Usage
search_term = "covid" # change to "sports", "technology", etc.
bbc_df = fetch_bbc_articles(search_term, start_page=0, end_page=1)
bbc_df["Date"] = pd.to_datetime(bbc_df["Date"], format="%d %b %Y", errors="coerce")

print(f"\nBBC Articles Data (Keyword: '{search_term}'):")
bbc_df.head(10)


BBC Articles Data (Keyword: 'covid'):


Unnamed: 0,Title,Date,Link,Content
0,Home schooling almost triples since Covid,2025-09-04,https://www.bbc.com/news/articles/cwyrjxz3pn1o,The number of children being home schooled in ...
1,"Pubs adjusting to post-Covid world, managers say",2025-09-03,https://www.bbc.com/news/articles/cj6y69jy343o,Pubgoers' habits have changed significantly si...
2,Reform criticised over doctor's Covid jab clai...,2025-09-06,https://www.bbc.com/news/articles/c62z4rd87nlo,Reform UK has distanced itself from a conferen...
3,Covid and the story of a boat under lockdown,2025-09-06,https://www.bbc.com/news/articles/c9d08xq37jxo,"""As scary as [the lockdown] was, I didn't pani..."
4,The photographer who covered Covid from hospital,2025-08-27,https://www.bbc.com/news/articles/cwy0z8pv9evo,A man who was bedridden for four years due to ...
5,Covid loss businesses take action against insurer,2025-08-27,https://www.bbc.com/news/articles/c0l654l571no,A West Sussex man who contracted Covid-19 in 2...
6,"'Covid left me bedridden, now I've hiked 500 m...",2025-09-01,https://www.bbc.com/news,The panel finds that four of the five genocida...
7,Home schooling almost triples since Covid,2025-09-04,https://www.bbc.com/news/articles/cwyrjxz3pn1o,The number of children being home schooled in ...
8,"Pubs adjusting to post-Covid world, managers say",2025-09-03,https://www.bbc.com/news/articles/cj6y69jy343o,Pubgoers' habits have changed significantly si...
9,Reform criticised over doctor's Covid jab clai...,2025-09-06,https://www.bbc.com/news/articles/c62z4rd87nlo,Reform UK has distanced itself from a conferen...


In [7]:
bbc_df.to_csv("data_bbc_df.csv", index=False)