# 📰 Reuters

## 📌 Instructions

1. Enter your **search term** by changing the `query` variable (e.g.,`"inflation"`, `"interest rates"`).  
2. The script will open Reuters search results for that query.  
3. It retrieves:  
   - Title  
   - Date  
   - Link  
   - Full article content  
4. The results are stored in a **pandas DataFrame** and can be exported to CSV:

```python
reuters_df.to_csv("data_reuters_df.csv", index=False)

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import time
import pandas as pd

query = "economy" # change to "sports", "technology", etc.
url = f"https://www.reuters.com/site-search/?query={query}"

# Chrome options
options = Options()
# options.add_argument("--headless")  # to run in headless mode

# Start browser
driver = webdriver.Chrome(service=Service(), options=options)
driver.get(url)

time.sleep(5)

# Grab article blocks
article_blocks = driver.find_elements(By.CSS_SELECTOR, 'a[data-testid="TitleLink"]')

# Prepare lists
titles = []
dates = []
links = []
contents = []

base_url = "https://www.reuters.com"

# Extract title, date, and link
for block in article_blocks:
    try:
        # Construct full link
        href = block.get_attribute("href")
        link = href if href.startswith("https") else base_url + href

        # Get title
        title_span = block.find_element(By.CSS_SELECTOR, 'span[data-testid="TitleHeading"]')
        title = title_span.text.strip()

        # Get date
        parent = block.find_element(By.XPATH, "./ancestor::li")
        time_tag = parent.find_element(By.CSS_SELECTOR, 'time[data-testid="DateLineText"]')
        date = time_tag.text.strip()

        titles.append(title)
        dates.append(date)
        links.append(link)
    except Exception as e:
        print("Skipped one due to missing element:", e)

# Visit each link to get article content 
for link in links:
    try:
        driver.get(link)
        time.sleep(3)

        paragraphs = driver.find_elements(By.CSS_SELECTOR, 'div.article-body__content__17Yit div[data-testid^="paragraph-"]')

        article_text = "\n".join([p.text.strip() for p in paragraphs if p.text.strip()])
        contents.append(article_text)
    except Exception as e:
        print(f"Failed to extract content from {link}:", e)
        contents.append("")

reuters_df = pd.DataFrame({
    "Title": titles,
    "Date": dates,
    "Link": links,
  # "Content": contents
})

driver.quit()

reuters_df['Date'] = pd.to_datetime(reuters_df['Date']).dt.strftime('%Y-%m-%d')

print(f"\nReuters Articles Data (Keyword: '{query}'):")
reuters_df.head(10)


Reuters Articles Data (Keyword: 'economy'):


Unnamed: 0,Title,Date,Link
0,War Economy,2024-10-23,https://www.reuters.com/podcasts/war-economy-2...
1,German economy expected to contract again in 2...,2024-10-09,https://www.reuters.com/markets/europe/german-...
2,Tracking Trump’s economy,2025-07-12,https://www.reuters.com/data/tracking-trumps-e...
3,"Cuban economy continues five-year decline, eco...",2025-07-15,https://www.reuters.com/world/americas/cuban-e...
4,CEE ECONOMY Hungary's economy falls back in fi...,2025-04-30,https://www.reuters.com/markets/europe/cee-eco...
5,Russian economy is showing first signs of cool...,2025-02-17,https://www.reuters.com/markets/europe/russian...
6,CEE ECONOMY Czech economy accelerates in first...,2025-05-30,https://www.reuters.com/markets/europe/cee-eco...
7,Russia's economy will need to adapt to new US ...,2024-11-29,https://www.reuters.com/markets/europe/russias...
8,Peru economy undershoots expectations in February,2025-04-15,https://www.reuters.com/world/americas/peru-ec...
9,"Swedish economy in recession, preliminary data...",2024-10-29,https://www.reuters.com/markets/europe/swedish...


In [None]:
reuters_df.to_csv("data_reuters_df.csv", index=False)