# 📰 CNBC

## 📌 Instructions

1. Enter a **search term** by changing the `query` variable (e.g., `"economy"`, `"sports"`, `"technology"`).  
2. The script will open CNBC’s search page and **scroll down** to load more articles.  
   - The number of results depends on how many times the script scrolls (`for _ in range(5):`).  
   - Increase this number to scrape more results.  
3. The script retrieves:
   - Title  
   - Date  
   - Link  
   - Full article content  
4. The results are stored in a **pandas DataFrame** and can be exported to CSV:

```python
cnbc_df.to_csv("data_cnbc_df.csv", index=False)

In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import pandas as pd
import requests
import time

query = "inflation"  # change to "sports", "technology", etc.
search_url = f"https://www.cnbc.com/search/?query={query}&qsearchterm={query}"

options = Options()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
driver = webdriver.Chrome(options=options)

driver.get(search_url)
time.sleep(5)

for _ in range(5):
    driver.execute_script("window.scrollTo(2, document.body.scrollHeight);")
    time.sleep(3)

articles = driver.find_elements(By.CLASS_NAME, "SearchResult-searchResult")

data = []

for article in articles:
    try:
        link_elem = article.find_element(By.CLASS_NAME, "resultlink")
        url = link_elem.get_attribute("href")

        if "/video/" in url:
            continue

        title_elem = article.find_element(By.CLASS_NAME, "Card-title")
        title = title_elem.text.strip()

        date_elem = article.find_element(By.CLASS_NAME, "SearchResult-publishedDate")
        date_full = date_elem.text.strip()
        date_only = date_full.split()[0]

        data.append({
            "Title": title,
            "Date": date_only,
            "Link": url
        })

    except Exception as e:
        print(f"Error processing an article: {e}")
        continue

driver.quit()

cnbc_df = pd.DataFrame(data)

# extract article content
def extract_article_content(url):
    try:
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
        }
        response = requests.get(url, headers=headers, timeout=10)
        soup = BeautifulSoup(response.content, 'html.parser')

        article_div = soup.find("div", class_="ArticleBody-articleBody")
        if not article_div:
            return None

        paragraphs = article_div.find_all("p")
        content = " ".join(p.get_text(strip=True) for p in paragraphs)
        return content
    except Exception as e:
        return f"Error: {e}"

cnbc_df['Content'] = cnbc_df['Link'].apply(lambda link: extract_article_content(link))

print(f"\nCNBC Articles (Keyword: '{query}'):")
cnbc_df["Date"] = pd.to_datetime(cnbc_df["Date"], errors="coerce")
cnbc_df.head()


CNBC Articles (Keyword: 'inflation'):


Unnamed: 0,Title,Date,Link,Content
0,"Floods, heavy rains unlikely to push India’s i...",2025-09-16,https://www.cnbc.com/2025/09/16/floods-heavy-r...,Large swathes of Indian farmland have been aff...
1,"Trump tariffs are fueling inflation, congressi...",2025-09-15,https://www.cnbc.com/2025/09/15/trump-trade-in...,Congressional Budget Office directorPhillip Sw...
2,"India’s inflation rises to 2.07% in August, in...",2025-09-12,https://www.cnbc.com/2025/09/12/india-cpi-infl...,"After easing for nine straight months,India's ..."
3,CNBC Daily Open: Hopes of lower rates overshad...,2025-09-12,https://www.cnbc.com/2025/09/12/cnbc-daily-ope...,"In this article Taken from CNBC’s Daily Open, ..."
4,Here’s the inflation breakdown for August 2025...,2025-09-11,https://www.cnbc.com/2025/09/11/inflation-brea...,Inflationpicked upin August amid higher prices...


In [None]:
cnbc_df.to_csv("data_cnbc_df.csv", index=False)