Stock Price Data (Multiple Companies)
--> Primary Companies (Oil & Gas):
1.  ExxonMobil (XOM) – since you already have it.
2. Chevron (CVX)
3. BP (BP)
4. Shell (SHEL)
5. Sector ETF --> 
    Energy Select Sector SPDR Fund (XLE) – This helps show how the entire energy sector behaves compared to individual stocks.

In [70]:
import pandas as pd
import yfinance as yf
import os

# Define a folder path for all your files
FDS_FOLDER = "FDS project"

# 1. Ensure the folder exists (creates it if not present)
os.makedirs(FDS_FOLDER, exist_ok=True)


# STEP 1: DOWNLOAD & SAVE INDIVIDUAL CSV FILES TO "FDS project" FOLDER

def download_and_save_data(ticker, start_date="2019-01-01", end_date="2025-01-31"):
    
    df = yf.download(ticker, start=start_date, end=end_date, interval="1d")
    
    # We save with index=True so the dates go in the first column.
    csv_path = os.path.join(FDS_FOLDER, f"{ticker}_data.csv")
    df.to_csv(csv_path, index=True)
    print(f"{ticker} data saved to {csv_path}")

# List of tickers
tickers = ["XOM", "CVX", "BP", "SHEL", "SPY"]

# Download each ticker’s data
for t in tickers:
    download_and_save_data(t)


# STEP 2: MERGE ALL CSV FILES SIDE-BY-SIDE (WIDE FORMAT)
#         WITH "Date" AS THE FIRST COLUMN


def load_and_rename(csv_file, ticker):
    """
    Reads a CSV file (which has 2 extra header rows in your case, 
    so adjust skiprows if needed), renames 'Price' to 'Date', 
    parses it as a datetime column, and appends the ticker symbol 
    to other columns (e.g. XOM_Open, XOM_Close).
    """
    
    df = pd.read_csv(csv_file, skiprows=0)  

    if "Unnamed: 0" in df.columns:
        df.rename(columns={"Unnamed: 0": "Date"}, inplace=True)
    else:
        # If your CSV actually has a "Price" column, rename that instead
        if "Price" in df.columns:
            df.rename(columns={"Price": "Date"}, inplace=True)
    
    # Convert Date column to datetime
    df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d", errors="coerce")
    
    # Rename columns to XOM_Open, XOM_Close, etc.
    rename_map = {
        "Open":      f"{ticker}_Open",
        "High":      f"{ticker}_High",
        "Low":       f"{ticker}_Low",
        "Close":     f"{ticker}_Close",
        "Adj Close": f"{ticker}_Adj_Close",
        "Volume":    f"{ticker}_Volume",
    }
    df.rename(columns=rename_map, inplace=True)
    
    return df

merged_df = None

for t in tickers:
    # Path to each CSV in the FDS project folder
    csv_path = os.path.join(FDS_FOLDER, f"{t}_data.csv")
    temp_df = load_and_rename(csv_path, t)
    
    if merged_df is None:
        # First ticker: just take it as our base
        merged_df = temp_df
    else:
        # Merge side-by-side on 'Date'
        merged_df = pd.merge(merged_df, temp_df, on="Date", how="outer")

# Sort by date
merged_df.sort_values("Date", inplace=True)


# STEP 3 : REORDER COLUMNS (Date first, then XOM_Open, XOM_High, etc.)


base_cols = ["Open", "High", "Low", "Close", "Adj_Close", "Volume"]


final_cols = ["Date"]
for t in tickers:
    for c in base_cols:
        col_name = f"{t}_{c}"
        final_cols.append(col_name)

# Keep only existing columns in that order
existing_cols = [col for col in final_cols if col in merged_df.columns]
merged_df = merged_df[existing_cols]

# STEP 4 : SAVE THE FINAL COMBINED CSV IN THE FDS project FOLDER

combined_csv_path = os.path.join(FDS_FOLDER, "combined_stocks.csv")
merged_df.to_csv(combined_csv_path, index=False)
print(f"Combined wide-format CSV saved to {combined_csv_path}")

# Quick check
print("Columns in final DataFrame:")
print(merged_df.columns.tolist())
print("First few rows:")
print(merged_df.head())


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


XOM data saved to FDS project\XOM_data.csv
CVX data saved to FDS project\CVX_data.csv


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

BP data saved to FDS project\BP_data.csv
SHEL data saved to FDS project\SHEL_data.csv





SPY data saved to FDS project\SPY_data.csv
Combined wide-format CSV saved to FDS project\combined_stocks.csv
Columns in final DataFrame:
['Date', 'XOM_Open', 'XOM_High', 'XOM_Low', 'XOM_Close', 'XOM_Volume', 'CVX_Open', 'CVX_High', 'CVX_Low', 'CVX_Close', 'CVX_Volume', 'BP_Open', 'BP_High', 'BP_Low', 'BP_Close', 'BP_Volume', 'SHEL_Open', 'SHEL_High', 'SHEL_Low', 'SHEL_Close', 'SHEL_Volume', 'SPY_Open', 'SPY_High', 'SPY_Low', 'SPY_Close', 'SPY_Volume']
First few rows:
         Date            XOM_Open            XOM_High             XOM_Low  \
32 2019-01-02    49.9832638804862  51.853464940383276   49.91647381653701   
33 2019-01-03  51.949938965958026   52.13547446226502  50.732826563151676   
34 2019-01-04  51.682765994382216    52.8924553590896   51.47496715009814   
35 2019-01-07   52.87761184365416   53.50843134375624    52.4991178787555   
36 2019-01-08   53.83498398194337  53.872087686494346  53.389699892670826   

             XOM_Close XOM_Volume           CVX_Open           CV

In [105]:
import yfinance as yf
import pandas as pd
import os

FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

start_date = "2019-01-01"
end_date   = "2025-01-31"
interval   = "1d"


def flatten_columns(df):
    """
    If the DataFrame has multi-level columns, flatten them into a single level.
    For example, ("Adj Close", "BZ=F") becomes "Adj Close_BZ=F".
    """
    if isinstance(df.columns, pd.MultiIndex):
        # Convert multi-level column tuples to a single string
        df.columns = df.columns.to_flat_index()
        df.columns = ["_".join([str(c) for c in col if c]) for col in df.columns]
    return df

def rename_brent_columns(df):
    """
    Renames columns that contain 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'
    to 'Brent_Open', 'Brent_High', etc. Also ensures there's a 'Date' column.
    """
    rename_map = {}
    for col in df.columns:
        col_lower = col.lower()  # for easy matching
        if "open" in col_lower:
            rename_map[col] = "Brent_Open"
        elif "high" in col_lower:
            rename_map[col] = "Brent_High"
        elif "low" in col_lower:
            rename_map[col] = "Brent_Low"
        elif "adj close" in col_lower or "adjclose" in col_lower:
            rename_map[col] = "Brent_Adj_Close"
        elif "close" in col_lower and "adj" not in col_lower:
            rename_map[col] = "Brent_Close"
        elif "volume" in col_lower:
            rename_map[col] = "Brent_Volume"
        elif "date" in col_lower:
            rename_map[col] = "Date"
    df.rename(columns=rename_map, inplace=True)
    
    # Keep only the columns we need
    keep_cols = ["Date","Brent_Open","Brent_High","Brent_Low","Brent_Close","Brent_Adj_Close","Brent_Volume"]
    df = df[[c for c in keep_cols if c in df.columns]]
    return df

def rename_wti_columns(df):
    """
    Renames columns that contain 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'
    to 'WTI_Open', 'WTI_High', etc. Also ensures there's a 'Date' column.
    """
    rename_map = {}
    for col in df.columns:
        col_lower = col.lower()
        if "open" in col_lower:
            rename_map[col] = "WTI_Open"
        elif "high" in col_lower:
            rename_map[col] = "WTI_High"
        elif "low" in col_lower:
            rename_map[col] = "WTI_Low"
        elif "adj close" in col_lower or "adjclose" in col_lower:
            rename_map[col] = "WTI_Adj_Close"
        elif "close" in col_lower and "adj" not in col_lower:
            rename_map[col] = "WTI_Close"
        elif "volume" in col_lower:
            rename_map[col] = "WTI_Volume"
        elif "date" in col_lower:
            rename_map[col] = "Date"
    df.rename(columns=rename_map, inplace=True)
    
    # Keep only the columns we need
    keep_cols = ["Date","WTI_Open","WTI_High","WTI_Low","WTI_Close","WTI_Adj_Close","WTI_Volume"]
    df = df[[c for c in keep_cols if c in df.columns]]
    return df


# 3) DOWNLOAD & PROCESS BRENT


df_brent = yf.download(
    tickers="BZ=F",
    start=start_date,
    end=end_date,
    interval=interval
)

# Flatten columns if multi-level
df_brent = flatten_columns(df_brent)

# Convert index to a normal column
df_brent.reset_index(inplace=True)

# Rename columns for Brent
df_brent = rename_brent_columns(df_brent)


# 4) DOWNLOAD & PROCESS WTI


df_wti = yf.download(
    tickers="CL=F",
    start=start_date,
    end=end_date,
    interval=interval
)

df_wti = flatten_columns(df_wti)
df_wti.reset_index(inplace=True)
df_wti = rename_wti_columns(df_wti)


# 5) MERGE & SAVE


df_merged = pd.merge(df_brent, df_wti, on="Date", how="outer")
df_merged.sort_values("Date", inplace=True)

output_csv = os.path.join(FDS_FOLDER, "Brent_WTI_data.csv")
df_merged.to_csv(output_csv, index=False)

print(f"Merged Brent + WTI data saved to: {output_csv}")
print("Columns:", df_merged.columns.tolist())
print("Preview of the merged DataFrame:")
print(df_merged.head())

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Merged Brent + WTI data saved to: FDS project\Brent_WTI_data.csv
Columns: ['Date', 'Brent_Open', 'Brent_High', 'Brent_Low', 'Brent_Close', 'Brent_Volume', 'WTI_Open', 'WTI_High', 'WTI_Low', 'WTI_Close', 'WTI_Volume']
Preview of the merged DataFrame:
        Date  Brent_Open  Brent_High  Brent_Low  Brent_Close  Brent_Volume  \
0 2019-01-02   54.250000   56.560001  52.500000    54.910000         43517   
1 2019-01-03   54.770000   56.290001  53.930000    55.950001         36535   
2 2019-01-04   55.580002   58.299999  55.360001    57.060001         42426   
3 2019-01-07   57.369999   58.919998  57.279999    57.330002         41677   
4 2019-01-08   57.630001   58.860001  57.110001    58.720001         34135   

    WTI_Open   WTI_High    WTI_Low  WTI_Close  WTI_Volume  
0  45.799999  47.779999  44.349998  46.540001    850480.0  
1  46.259998  47.490002  45.349998  47.090000    788718.0  
2  46.900002  49.220001  46.650002  47.959999    817277.0  
3  48.299999  49.790001  48.110001  48.52

In [107]:
import pandas as pd
import pandas_datareader.data as web
import os
from datetime import datetime

# 1) Define FRED economic indicators
indicators = {
    "Inflation_Rate": "CPIAUCSL",  # Consumer Price Index (CPI)
    "Interest_Rate": "DFF",        # Effective Federal Funds Rate
    "GDP": "GDP"                   # Real Gross Domestic Product
}

# 2) Define date range
start_date = "2019-01-01"
end_date = "2025-01-31"

# 3) Fetch data from FRED
macro_data = pd.DataFrame()
for name, fred_code in indicators.items():
    macro_data[name] = web.DataReader(fred_code, "fred", start_date, end_date)


FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

csv_path = os.path.join(FDS_FOLDER, "Macroeconomic_Data.csv")
macro_data.to_csv(csv_path)

print(macro_data.head(30))
print(f"\nMacroeconomic data saved to: {csv_path}")


            Inflation_Rate  Interest_Rate        GDP
DATE                                                
2019-01-01         252.561           2.40  21111.600
2019-02-01         253.319           2.40        NaN
2019-03-01         254.277           2.40        NaN
2019-04-01         255.233           2.41  21397.938
2019-05-01         255.296           2.45        NaN
2019-06-01         255.213           2.40        NaN
2019-07-01         255.802           2.39  21717.171
2019-08-01         256.036           2.14        NaN
2019-09-01         256.430           2.13        NaN
2019-10-01         257.155           1.88  21933.217
2019-11-01         257.879           1.57        NaN
2019-12-01         258.630           1.56        NaN
2020-01-01         259.127           1.55  21727.657
2020-02-01         259.250           1.59        NaN
2020-03-01         258.076           1.58        NaN
2020-04-01         256.032           0.06  19935.444
2020-05-01         255.802           0.05     

In [120]:
import feedparser
import pandas as pd
import time
import os

# 1) Create/ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your desired date range
start_date = "2019-01-01"
end_date   = "2019-12-31"  

# 3) Build RSS feed URLs that incorporate the date filters
#    For example: ExxonMobil after:2019-01-01 before:2020-01-01
rss_feeds = [
   f"https://news.google.com/rss/search?q=ExxonMobil+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+news+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=XOM+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+market+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+earnings+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+finance+analysis+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+quarterly+results+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+energy+sector+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+environment+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+global+markets+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en"
]

# 4) Fetch all articles from these feeds
all_news = []
for feed_url in rss_feeds:
    print(f"Fetching XOM news from: {feed_url}")
    feed = feedparser.parse(feed_url)
    time.sleep(2)  # Avoid rate-limiting

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title":          entry.get("title", ""),
            "summary":        entry.get("summary", ""),
            "link":           entry.get("link", "")
        })

# 5) Convert to a DataFrame and remove duplicates (optional)
df_xom = pd.DataFrame(all_news)

# 6) Save to CSV (name it to reflect the date range)
csv_path = os.path.join(FDS_FOLDER, "XOM_News_2019.csv")
df_xom.to_csv(csv_path, index=False)
print(f"Saved ExxonMobil news to {csv_path}")

# 7) Quick preview
print(df_xom.head())
print("\nCount of articles:", df_xom.shape[0])


Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+stock+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+news+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=XOM+stock+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+market+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+earnings+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+finance+analysis+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+quarterly+results+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching

In [122]:
import feedparser
import pandas as pd
import time
import os

# 1) Create/ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your desired date range
start_date = "2020-01-01"
end_date   = "2020-12-31"  # Note: "before:2020-01-01" will cover up to Dec 31, 2019

# 3) Build RSS feed URLs that incorporate the date filters
#    For example: ExxonMobil after:2019-01-01 before:2020-01-01
rss_feeds = [
   f"https://news.google.com/rss/search?q=ExxonMobil+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+news+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=XOM+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+market+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+earnings+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+finance+analysis+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+quarterly+results+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+energy+sector+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+environment+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+global+markets+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en"
]

# 4) Fetch all articles from these feeds
all_news = []
for feed_url in rss_feeds:
    print(f"Fetching XOM news from: {feed_url}")
    feed = feedparser.parse(feed_url)
    time.sleep(2)  # Avoid rate-limiting

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title":          entry.get("title", ""),
            "summary":        entry.get("summary", ""),
            "link":           entry.get("link", "")
        })

# 5) Convert to a DataFrame and remove duplicates (optional)
df_xom = pd.DataFrame(all_news)

# 6) Save to CSV (name it to reflect the date range)
csv_path = os.path.join(FDS_FOLDER, "XOM_News_2020.csv")
df_xom.to_csv(csv_path, index=False)
print(f"Saved ExxonMobil news to {csv_path}")

# 7) Quick preview
print(df_xom.head())
print("\nCount of articles:", df_xom.shape[0])


Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+stock+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+news+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=XOM+stock+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+market+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+earnings+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+finance+analysis+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+quarterly+results+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching

In [124]:
import feedparser
import pandas as pd
import time
import os

# 1) Create/ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your desired date range
start_date = "2021-01-01"
end_date   = "2021-12-31"  # Note: "before:2020-01-01" will cover up to Dec 31, 2019

# 3) Build RSS feed URLs that incorporate the date filters
#    For example: ExxonMobil after:2019-01-01 before:2020-01-01
rss_feeds = [
   f"https://news.google.com/rss/search?q=ExxonMobil+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+news+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=XOM+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+market+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+earnings+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+finance+analysis+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+quarterly+results+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+energy+sector+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+environment+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+global+markets+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en"
]

# 4) Fetch all articles from these feeds
all_news = []
for feed_url in rss_feeds:
    print(f"Fetching XOM news from: {feed_url}")
    feed = feedparser.parse(feed_url)
    time.sleep(2)  # Avoid rate-limiting

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title":          entry.get("title", ""),
            "summary":        entry.get("summary", ""),
            "link":           entry.get("link", "")
        })

# 5) Convert to a DataFrame and remove duplicates (optional)
df_xom = pd.DataFrame(all_news)

# 6) Save to CSV (name it to reflect the date range)
csv_path = os.path.join(FDS_FOLDER, "XOM_News_2021.csv")
df_xom.to_csv(csv_path, index=False)
print(f"Saved ExxonMobil news to {csv_path}")

# 7) Quick preview
print(df_xom.head())
print("\nCount of articles:", df_xom.shape[0])


Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+stock+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+news+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=XOM+stock+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+market+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+earnings+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+finance+analysis+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+quarterly+results+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching

In [126]:
import feedparser
import pandas as pd
import time
import os

# 1) Create/ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your desired date range
start_date = "2022-01-01"
end_date   = "2022-12-31"  # Note: "before:2020-01-01" will cover up to Dec 31, 2019

# 3) Build RSS feed URLs that incorporate the date filters
#    For example: ExxonMobil after:2019-01-01 before:2020-01-01
rss_feeds = [
   f"https://news.google.com/rss/search?q=ExxonMobil+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+news+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=XOM+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+market+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+earnings+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+finance+analysis+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+quarterly+results+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+energy+sector+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+environment+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+global+markets+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en"
]

# 4) Fetch all articles from these feeds
all_news = []
for feed_url in rss_feeds:
    print(f"Fetching XOM news from: {feed_url}")
    feed = feedparser.parse(feed_url)
    time.sleep(2)  # Avoid rate-limiting

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title":          entry.get("title", ""),
            "summary":        entry.get("summary", ""),
            "link":           entry.get("link", "")
        })

# 5) Convert to a DataFrame and remove duplicates (optional)
df_xom = pd.DataFrame(all_news)

# 6) Save to CSV (name it to reflect the date range)
csv_path = os.path.join(FDS_FOLDER, "XOM_News_2022.csv")
df_xom.to_csv(csv_path, index=False)
print(f"Saved ExxonMobil news to {csv_path}")

# 7) Quick preview
print(df_xom.head())
print("\nCount of articles:", df_xom.shape[0])


Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+stock+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+news+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=XOM+stock+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+market+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+earnings+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+finance+analysis+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+quarterly+results+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching

In [128]:
import feedparser
import pandas as pd
import time
import os

# 1) Create/ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your desired date range
start_date = "2023-01-01"
end_date   = "2023-12-31"  # Note: "before:2020-01-01" will cover up to Dec 31, 2019

# 3) Build RSS feed URLs that incorporate the date filters
#    For example: ExxonMobil after:2019-01-01 before:2020-01-01
rss_feeds = [
   f"https://news.google.com/rss/search?q=ExxonMobil+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+news+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=XOM+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+market+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+earnings+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+finance+analysis+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+quarterly+results+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+energy+sector+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+environment+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+global+markets+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en"
]

# 4) Fetch all articles from these feeds
all_news = []
for feed_url in rss_feeds:
    print(f"Fetching XOM news from: {feed_url}")
    feed = feedparser.parse(feed_url)
    time.sleep(2)  # Avoid rate-limiting

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title":          entry.get("title", ""),
            "summary":        entry.get("summary", ""),
            "link":           entry.get("link", "")
        })

# 5) Convert to a DataFrame and remove duplicates (optional)
df_xom = pd.DataFrame(all_news)

# 6) Save to CSV (name it to reflect the date range)
csv_path = os.path.join(FDS_FOLDER, "XOM_News_2023.csv")
df_xom.to_csv(csv_path, index=False)
print(f"Saved ExxonMobil news to {csv_path}")

# 7) Quick preview
print(df_xom.head())
print("\nCount of articles:", df_xom.shape[0])


Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+stock+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+news+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=XOM+stock+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+market+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+earnings+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+finance+analysis+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+quarterly+results+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching

In [130]:
import feedparser
import pandas as pd
import time
import os

# 1) Create/ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your desired date range
start_date = "2024-01-01"
end_date   = "2024-12-31"  # Note: "before:2020-01-01" will cover up to Dec 31, 2019

# 3) Build RSS feed URLs that incorporate the date filters
#    For example: ExxonMobil after:2019-01-01 before:2020-01-01
rss_feeds = [
   f"https://news.google.com/rss/search?q=ExxonMobil+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+news+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=XOM+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+market+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+earnings+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+finance+analysis+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+quarterly+results+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+energy+sector+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+environment+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+global+markets+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en"
]

# 4) Fetch all articles from these feeds
all_news = []
for feed_url in rss_feeds:
    print(f"Fetching XOM news from: {feed_url}")
    feed = feedparser.parse(feed_url)
    time.sleep(2)  # Avoid rate-limiting

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title":          entry.get("title", ""),
            "summary":        entry.get("summary", ""),
            "link":           entry.get("link", "")
        })

# 5) Convert to a DataFrame and remove duplicates (optional)
df_xom = pd.DataFrame(all_news)

# 6) Save to CSV (name it to reflect the date range)
csv_path = os.path.join(FDS_FOLDER, "XOM_News_2024.csv")
df_xom.to_csv(csv_path, index=False)
print(f"Saved ExxonMobil news to {csv_path}")

# 7) Quick preview
print(df_xom.head())
print("\nCount of articles:", df_xom.shape[0])


Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+stock+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+news+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=XOM+stock+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+market+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+earnings+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+finance+analysis+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+quarterly+results+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching

In [132]:
import feedparser
import pandas as pd
import time
import os

# 1) Create/ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your desired date range
start_date = "2025-01-01"
end_date   = "2025-12-31"  # Note: "before:2020-01-01" will cover up to Dec 31, 2019

# 3) Build RSS feed URLs that incorporate the date filters
#    For example: ExxonMobil after:2019-01-01 before:2020-01-01
rss_feeds = [
   f"https://news.google.com/rss/search?q=ExxonMobil+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+news+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=XOM+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+market+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+earnings+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+finance+analysis+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+quarterly+results+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+energy+sector+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+environment+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
   f"https://news.google.com/rss/search?q=ExxonMobil+global+markets+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en"
]

# 4) Fetch all articles from these feeds
all_news = []
for feed_url in rss_feeds:
    print(f"Fetching XOM news from: {feed_url}")
    feed = feedparser.parse(feed_url)
    time.sleep(2)  # Avoid rate-limiting

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title":          entry.get("title", ""),
            "summary":        entry.get("summary", ""),
            "link":           entry.get("link", "")
        })

# 5) Convert to a DataFrame and remove duplicates (optional)
df_xom = pd.DataFrame(all_news)

# 6) Save to CSV (name it to reflect the date range)
csv_path = os.path.join(FDS_FOLDER, "XOM_News_2025.csv")
df_xom.to_csv(csv_path, index=False)
print(f"Saved ExxonMobil news to {csv_path}")

# 7) Quick preview
print(df_xom.head())
print("\nCount of articles:", df_xom.shape[0])


Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+stock+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+news+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=XOM+stock+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+market+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+earnings+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+finance+analysis+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching XOM news from: https://news.google.com/rss/search?q=ExxonMobil+quarterly+results+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching

In [134]:
import feedparser
import pandas as pd
import time
import os

# 1) Create or ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (for the query strings)
start_date = "2019-01-01"
end_date   = "2020-01-01"  # "before:2020-01-01" covers 2019

# 3) Build multiple RSS feeds for Shell
#    We add "after:2019-01-01 before:2020-01-01" to some queries to target 2019
rss_feeds = [
    f"https://news.google.com/rss/search?q=Shell+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+news+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+oil+market+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Royal+Dutch+Shell+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+plc+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",

    # Additional queries for broader coverage
    f"https://news.google.com/rss/search?q=Shell+earnings+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+environment+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+energy+sector+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+finance+analysis+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+global+markets+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+expansions+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en"
]

# 4) Parse all feeds and collect articles
all_news = []

for feed_url in rss_feeds:
    print(f"Fetching Shell news from: {feed_url}")
    feed = feedparser.parse(feed_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title":          entry.get("title", ""),
            "summary":        entry.get("summary", ""),
            "link":           entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_shell = pd.DataFrame(all_news)

# 6) Save to CSV in the FDS project folder
csv_path = os.path.join(FDS_FOLDER, "Shell_News_2019.csv")
df_shell.to_csv(csv_path, index=False)
print(f"\nSaved Shell news to {csv_path}")

# 7) Quick preview
print(df_shell.head())
print("\nArticle count:", len(df_shell))


Fetching Shell news from: https://news.google.com/rss/search?q=Shell+stock+after:2019-01-01+before:2020-01-01&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+news+after:2019-01-01+before:2020-01-01&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+oil+market+after:2019-01-01+before:2020-01-01&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Royal+Dutch+Shell+after:2019-01-01+before:2020-01-01&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+plc+after:2019-01-01+before:2020-01-01&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+earnings+after:2019-01-01+before:2020-01-01&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+environment+after:2019-01-01+before:2020-01-01&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https

In [136]:
import feedparser
import pandas as pd
import time
import os

# 1) Create or ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (for the query strings)
start_date = "2020-01-01"
end_date   = "2020-12-31"  


rss_feeds = [
    f"https://news.google.com/rss/search?q=Shell+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+news+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+oil+market+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Royal+Dutch+Shell+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+plc+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",

    # Additional queries for broader coverage
    f"https://news.google.com/rss/search?q=Shell+earnings+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+environment+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+energy+sector+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+finance+analysis+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+global+markets+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+expansions+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en"
]

# 4) Parse all feeds and collect articles
all_news = []

for feed_url in rss_feeds:
    print(f"Fetching Shell news from: {feed_url}")
    feed = feedparser.parse(feed_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title":          entry.get("title", ""),
            "summary":        entry.get("summary", ""),
            "link":           entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_shell = pd.DataFrame(all_news)

# 6) Save to CSV in the FDS project folder
csv_path = os.path.join(FDS_FOLDER, "Shell_News_2020.csv")
df_shell.to_csv(csv_path, index=False)
print(f"\nSaved Shell news to {csv_path}")

# 7) Quick preview
print(df_shell.head())
print("\nArticle count:", len(df_shell))


Fetching Shell news from: https://news.google.com/rss/search?q=Shell+stock+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+news+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+oil+market+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Royal+Dutch+Shell+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+plc+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+earnings+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+environment+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https

In [138]:
import feedparser
import pandas as pd
import time
import os

# 1) Create or ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (for the query strings)
start_date = "2021-01-01"
end_date   = "2021-12-31"  


rss_feeds = [
    f"https://news.google.com/rss/search?q=Shell+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+news+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+oil+market+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Royal+Dutch+Shell+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+plc+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",

    # Additional queries for broader coverage
    f"https://news.google.com/rss/search?q=Shell+earnings+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+environment+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+energy+sector+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+finance+analysis+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+global+markets+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+expansions+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en"
]

# 4) Parse all feeds and collect articles
all_news = []

for feed_url in rss_feeds:
    print(f"Fetching Shell news from: {feed_url}")
    feed = feedparser.parse(feed_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title":          entry.get("title", ""),
            "summary":        entry.get("summary", ""),
            "link":           entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_shell = pd.DataFrame(all_news)

# 6) Save to CSV in the FDS project folder
csv_path = os.path.join(FDS_FOLDER, "Shell_News_2021.csv")
df_shell.to_csv(csv_path, index=False)
print(f"\nSaved Shell news to {csv_path}")

# 7) Quick preview
print(df_shell.head())
print("\nArticle count:", len(df_shell))


Fetching Shell news from: https://news.google.com/rss/search?q=Shell+stock+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+news+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+oil+market+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Royal+Dutch+Shell+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+plc+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+earnings+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+environment+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https

In [140]:
import feedparser
import pandas as pd
import time
import os

# 1) Create or ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (for the query strings)
start_date = "2022-01-01"
end_date   = "2022-12-31"  


rss_feeds = [
    f"https://news.google.com/rss/search?q=Shell+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+news+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+oil+market+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Royal+Dutch+Shell+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+plc+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",

    # Additional queries for broader coverage
    f"https://news.google.com/rss/search?q=Shell+earnings+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+environment+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+energy+sector+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+finance+analysis+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+global+markets+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+expansions+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en"
]

# 4) Parse all feeds and collect articles
all_news = []

for feed_url in rss_feeds:
    print(f"Fetching Shell news from: {feed_url}")
    feed = feedparser.parse(feed_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title":          entry.get("title", ""),
            "summary":        entry.get("summary", ""),
            "link":           entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_shell = pd.DataFrame(all_news)

# 6) Save to CSV in the FDS project folder
csv_path = os.path.join(FDS_FOLDER, "Shell_News_2022.csv")
df_shell.to_csv(csv_path, index=False)
print(f"\nSaved Shell news to {csv_path}")

# 7) Quick preview
print(df_shell.head())
print("\nArticle count:", len(df_shell))


Fetching Shell news from: https://news.google.com/rss/search?q=Shell+stock+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+news+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+oil+market+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Royal+Dutch+Shell+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+plc+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+earnings+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+environment+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https

In [142]:
import feedparser
import pandas as pd
import time
import os

# 1) Create or ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (for the query strings)
start_date = "2023-01-01"
end_date   = "2023-12-31"  


rss_feeds = [
    f"https://news.google.com/rss/search?q=Shell+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+news+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+oil+market+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Royal+Dutch+Shell+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+plc+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",

    # Additional queries for broader coverage
    f"https://news.google.com/rss/search?q=Shell+earnings+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+environment+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+energy+sector+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+finance+analysis+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+global+markets+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+expansions+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en"
]

# 4) Parse all feeds and collect articles
all_news = []

for feed_url in rss_feeds:
    print(f"Fetching Shell news from: {feed_url}")
    feed = feedparser.parse(feed_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title":          entry.get("title", ""),
            "summary":        entry.get("summary", ""),
            "link":           entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_shell = pd.DataFrame(all_news)

# 6) Save to CSV in the FDS project folder
csv_path = os.path.join(FDS_FOLDER, "Shell_News_2023.csv")
df_shell.to_csv(csv_path, index=False)
print(f"\nSaved Shell news to {csv_path}")

# 7) Quick preview
print(df_shell.head())
print("\nArticle count:", len(df_shell))


Fetching Shell news from: https://news.google.com/rss/search?q=Shell+stock+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+news+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+oil+market+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Royal+Dutch+Shell+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+plc+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+earnings+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+environment+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https

In [144]:
import feedparser
import pandas as pd
import time
import os

# 1) Create or ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (for the query strings)
start_date = "2024-01-01"
end_date   = "2024-12-31"  


rss_feeds = [
    f"https://news.google.com/rss/search?q=Shell+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+news+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+oil+market+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Royal+Dutch+Shell+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+plc+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",

    # Additional queries for broader coverage
    f"https://news.google.com/rss/search?q=Shell+earnings+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+environment+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+energy+sector+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+finance+analysis+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+global+markets+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+expansions+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en"
]

# 4) Parse all feeds and collect articles
all_news = []

for feed_url in rss_feeds:
    print(f"Fetching Shell news from: {feed_url}")
    feed = feedparser.parse(feed_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title":          entry.get("title", ""),
            "summary":        entry.get("summary", ""),
            "link":           entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_shell = pd.DataFrame(all_news)

# 6) Save to CSV in the FDS project folder
csv_path = os.path.join(FDS_FOLDER, "Shell_News_2024.csv")
df_shell.to_csv(csv_path, index=False)
print(f"\nSaved Shell news to {csv_path}")

# 7) Quick preview
print(df_shell.head())
print("\nArticle count:", len(df_shell))


Fetching Shell news from: https://news.google.com/rss/search?q=Shell+stock+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+news+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+oil+market+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Royal+Dutch+Shell+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+plc+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+earnings+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+environment+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https

In [146]:
import feedparser
import pandas as pd
import time
import os

# 1) Create or ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (for the query strings)
start_date = "2025-01-01"
end_date   = "2025-12-31"  


rss_feeds = [
    f"https://news.google.com/rss/search?q=Shell+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+news+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+oil+market+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Royal+Dutch+Shell+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+plc+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",

    # Additional queries for broader coverage
    f"https://news.google.com/rss/search?q=Shell+earnings+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+environment+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+energy+sector+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+finance+analysis+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+global+markets+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Shell+expansions+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en"
]

# 4) Parse all feeds and collect articles
all_news = []

for feed_url in rss_feeds:
    print(f"Fetching Shell news from: {feed_url}")
    feed = feedparser.parse(feed_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title":          entry.get("title", ""),
            "summary":        entry.get("summary", ""),
            "link":           entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_shell = pd.DataFrame(all_news)

# 6) Save to CSV in the FDS project folder
csv_path = os.path.join(FDS_FOLDER, "Shell_News_2025.csv")
df_shell.to_csv(csv_path, index=False)
print(f"\nSaved Shell news to {csv_path}")

# 7) Quick preview
print(df_shell.head())
print("\nArticle count:", len(df_shell))


Fetching Shell news from: https://news.google.com/rss/search?q=Shell+stock+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+news+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+oil+market+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Royal+Dutch+Shell+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+plc+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+earnings+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https://news.google.com/rss/search?q=Shell+environment+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Shell news from: https

In [148]:
import feedparser
import pandas as pd
import time
import os

# 1) Create or ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (for the query strings)
start_date = "2019-01-01"
end_date   = "2019-12-31"  # "before:2020-01-01" covers 2019

# 3) Build multiple RSS feeds for Chevron (2019 coverage)
#    Using synonyms like "Chevron Corporation" or "CVX"
rss_feeds = [
    f"https://news.google.com/rss/search?q=Chevron+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+news+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+oil+market+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+corporation+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=CVX+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    
    # Additional queries for broader coverage
    f"https://news.google.com/rss/search?q=Chevron+earnings+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+environment+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+energy+sector+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+finance+analysis+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+global+markets+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+expansions+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en"
]

# 4) Parse all feeds and collect articles
all_news = []

for feed_url in rss_feeds:
    print(f"Fetching Chevron news from: {feed_url}")
    feed = feedparser.parse(feed_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title":          entry.get("title", ""),
            "summary":        entry.get("summary", ""),
            "link":           entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_chevron = pd.DataFrame(all_news)

# 6) Save to CSV in the FDS project folder
csv_path = os.path.join(FDS_FOLDER, "Chevron_News_2019.csv")
df_chevron.to_csv(csv_path, index=False)
print(f"\nSaved Chevron news to {csv_path}")

# 7) Quick preview
print(df_chevron.head())
print("\nArticle count:", len(df_chevron))


Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+stock+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+news+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+oil+market+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+corporation+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=CVX+stock+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+earnings+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+environment+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetch

In [150]:
import feedparser
import pandas as pd
import time
import os

# 1) Create or ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (for the query strings)
start_date = "2020-01-01"
end_date   = "2020-12-31"  # "before:2020-01-01" covers 2019

# 3) Build multiple RSS feeds for Chevron (2019 coverage)
#    Using synonyms like "Chevron Corporation" or "CVX"
rss_feeds = [
    f"https://news.google.com/rss/search?q=Chevron+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+news+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+oil+market+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+corporation+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=CVX+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    
    # Additional queries for broader coverage
    f"https://news.google.com/rss/search?q=Chevron+earnings+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+environment+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+energy+sector+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+finance+analysis+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+global+markets+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+expansions+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en"
]

# 4) Parse all feeds and collect articles
all_news = []

for feed_url in rss_feeds:
    print(f"Fetching Chevron news from: {feed_url}")
    feed = feedparser.parse(feed_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title":          entry.get("title", ""),
            "summary":        entry.get("summary", ""),
            "link":           entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_chevron = pd.DataFrame(all_news)

# 6) Save to CSV in the FDS project folder
csv_path = os.path.join(FDS_FOLDER, "Chevron_News_2020.csv")
df_chevron.to_csv(csv_path, index=False)
print(f"\nSaved Chevron news to {csv_path}")

# 7) Quick preview
print(df_chevron.head())
print("\nArticle count:", len(df_chevron))


Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+stock+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+news+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+oil+market+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+corporation+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=CVX+stock+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+earnings+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+environment+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetch

In [152]:
import feedparser
import pandas as pd
import time
import os

# 1) Create or ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (for the query strings)
start_date = "2021-01-01"
end_date   = "2021-12-31"  # "before:2020-01-01" covers 2019

# 3) Build multiple RSS feeds for Chevron (2019 coverage)
#    Using synonyms like "Chevron Corporation" or "CVX"
rss_feeds = [
    f"https://news.google.com/rss/search?q=Chevron+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+news+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+oil+market+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+corporation+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=CVX+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    
    # Additional queries for broader coverage
    f"https://news.google.com/rss/search?q=Chevron+earnings+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+environment+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+energy+sector+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+finance+analysis+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+global+markets+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+expansions+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en"
]

# 4) Parse all feeds and collect articles
all_news = []

for feed_url in rss_feeds:
    print(f"Fetching Chevron news from: {feed_url}")
    feed = feedparser.parse(feed_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title":          entry.get("title", ""),
            "summary":        entry.get("summary", ""),
            "link":           entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_chevron = pd.DataFrame(all_news)

# 6) Save to CSV in the FDS project folder
csv_path = os.path.join(FDS_FOLDER, "Chevron_News_2021.csv")
df_chevron.to_csv(csv_path, index=False)
print(f"\nSaved Chevron news to {csv_path}")

# 7) Quick preview
print(df_chevron.head())
print("\nArticle count:", len(df_chevron))


Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+stock+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+news+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+oil+market+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+corporation+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=CVX+stock+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+earnings+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+environment+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetch

In [154]:
import feedparser
import pandas as pd
import time
import os

# 1) Create or ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (for the query strings)
start_date = "2022-01-01"
end_date   = "2022-12-31"  # "before:2020-01-01" covers 2019

# 3) Build multiple RSS feeds for Chevron (2019 coverage)
#    Using synonyms like "Chevron Corporation" or "CVX"
rss_feeds = [
    f"https://news.google.com/rss/search?q=Chevron+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+news+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+oil+market+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+corporation+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=CVX+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    
    # Additional queries for broader coverage
    f"https://news.google.com/rss/search?q=Chevron+earnings+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+environment+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+energy+sector+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+finance+analysis+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+global+markets+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+expansions+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en"
]

# 4) Parse all feeds and collect articles
all_news = []

for feed_url in rss_feeds:
    print(f"Fetching Chevron news from: {feed_url}")
    feed = feedparser.parse(feed_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title":          entry.get("title", ""),
            "summary":        entry.get("summary", ""),
            "link":           entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_chevron = pd.DataFrame(all_news)

# 6) Save to CSV in the FDS project folder
csv_path = os.path.join(FDS_FOLDER, "Chevron_News_2022.csv")
df_chevron.to_csv(csv_path, index=False)
print(f"\nSaved Chevron news to {csv_path}")

# 7) Quick preview
print(df_chevron.head())
print("\nArticle count:", len(df_chevron))


Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+stock+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+news+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+oil+market+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+corporation+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=CVX+stock+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+earnings+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+environment+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetch

In [156]:
import feedparser
import pandas as pd
import time
import os

# 1) Create or ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (for the query strings)
start_date = "2023-01-01"
end_date   = "2023-12-31"  # "before:2020-01-01" covers 2019

# 3) Build multiple RSS feeds for Chevron (2019 coverage)
#    Using synonyms like "Chevron Corporation" or "CVX"
rss_feeds = [
    f"https://news.google.com/rss/search?q=Chevron+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+news+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+oil+market+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+corporation+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=CVX+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    
    # Additional queries for broader coverage
    f"https://news.google.com/rss/search?q=Chevron+earnings+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+environment+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+energy+sector+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+finance+analysis+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+global+markets+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+expansions+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en"
]

# 4) Parse all feeds and collect articles
all_news = []

for feed_url in rss_feeds:
    print(f"Fetching Chevron news from: {feed_url}")
    feed = feedparser.parse(feed_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title":          entry.get("title", ""),
            "summary":        entry.get("summary", ""),
            "link":           entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_chevron = pd.DataFrame(all_news)

# 6) Save to CSV in the FDS project folder
csv_path = os.path.join(FDS_FOLDER, "Chevron_News_2023.csv")
df_chevron.to_csv(csv_path, index=False)
print(f"\nSaved Chevron news to {csv_path}")

# 7) Quick preview
print(df_chevron.head())
print("\nArticle count:", len(df_chevron))


Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+stock+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+news+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+oil+market+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+corporation+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=CVX+stock+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+earnings+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+environment+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetch

In [158]:
import feedparser
import pandas as pd
import time
import os

# 1) Create or ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (for the query strings)
start_date = "2024-01-01"
end_date   = "2024-12-31"  # "before:2020-01-01" covers 2019

# 3) Build multiple RSS feeds for Chevron (2019 coverage)
#    Using synonyms like "Chevron Corporation" or "CVX"
rss_feeds = [
    f"https://news.google.com/rss/search?q=Chevron+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+news+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+oil+market+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+corporation+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=CVX+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    
    # Additional queries for broader coverage
    f"https://news.google.com/rss/search?q=Chevron+earnings+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+environment+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+energy+sector+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+finance+analysis+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+global+markets+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+expansions+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en"
]

# 4) Parse all feeds and collect articles
all_news = []

for feed_url in rss_feeds:
    print(f"Fetching Chevron news from: {feed_url}")
    feed = feedparser.parse(feed_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title":          entry.get("title", ""),
            "summary":        entry.get("summary", ""),
            "link":           entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_chevron = pd.DataFrame(all_news)

# 6) Save to CSV in the FDS project folder
csv_path = os.path.join(FDS_FOLDER, "Chevron_News_2024.csv")
df_chevron.to_csv(csv_path, index=False)
print(f"\nSaved Chevron news to {csv_path}")

# 7) Quick preview
print(df_chevron.head())
print("\nArticle count:", len(df_chevron))


Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+stock+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+news+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+oil+market+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+corporation+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=CVX+stock+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+earnings+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+environment+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetch

In [160]:
import feedparser
import pandas as pd
import time
import os

# 1) Create or ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (for the query strings)
start_date = "2025-01-01"
end_date   = "2025-12-31"  # "before:2020-01-01" covers 2019

# 3) Build multiple RSS feeds for Chevron (2019 coverage)
#    Using synonyms like "Chevron Corporation" or "CVX"
rss_feeds = [
    f"https://news.google.com/rss/search?q=Chevron+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+news+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+oil+market+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+corporation+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=CVX+stock+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    
    # Additional queries for broader coverage
    f"https://news.google.com/rss/search?q=Chevron+earnings+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+environment+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+energy+sector+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+finance+analysis+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+global+markets+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en",
    f"https://news.google.com/rss/search?q=Chevron+expansions+after:{start_date}+before:{end_date}&hl=en-US&gl=US&ceid=US:en"
]

# 4) Parse all feeds and collect articles
all_news = []

for feed_url in rss_feeds:
    print(f"Fetching Chevron news from: {feed_url}")
    feed = feedparser.parse(feed_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title":          entry.get("title", ""),
            "summary":        entry.get("summary", ""),
            "link":           entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_chevron = pd.DataFrame(all_news)

# 6) Save to CSV in the FDS project folder
csv_path = os.path.join(FDS_FOLDER, "Chevron_News_2025.csv")
df_chevron.to_csv(csv_path, index=False)
print(f"\nSaved Chevron news to {csv_path}")

# 7) Quick preview
print(df_chevron.head())
print("\nArticle count:", len(df_chevron))


Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+stock+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+news+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+oil+market+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+corporation+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=CVX+stock+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+earnings+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Chevron news from: https://news.google.com/rss/search?q=Chevron+environment+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetch

In [162]:
import feedparser
import pandas as pd
import time
import os

FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

start_date = "2019-01-01"
end_date   = "2019-12-31"

# BP synonyms
bp_synonyms = [
    f"BP+stock+after:{start_date}+before:{end_date}",
    f"BP+news+after:{start_date}+before:{end_date}",
    f"BP+oil+market+after:{start_date}+before:{end_date}",
    f"BP+corporation+after:{start_date}+before:{end_date}",
    f"BP+plc+after:{start_date}+before:{end_date}",
    f"BP+earnings+after:{start_date}+before:{end_date}",
    f"BP+environment+after:{start_date}+before:{end_date}",
    f"BP+energy+sector+after:{start_date}+before:{end_date}",
    f"BP+finance+analysis+after:{start_date}+before:{end_date}",
    f"BP+global+markets+after:{start_date}+before:{end_date}",
    f"BP+expansions+after:{start_date}+before:{end_date}"
]

all_news = []
for term in bp_synonyms:
    rss_url = f"https://news.google.com/rss/search?q={term}&hl=en-US&gl=US&ceid=US:en"
    print(f"Fetching BP news from: {rss_url}")
    feed = feedparser.parse(rss_url)
    time.sleep(2)
    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title": entry.get("title", ""),
            "summary": entry.get("summary", ""),
            "link": entry.get("link", "")
        })

df_bp = pd.DataFrame(all_news)

csv_path = os.path.join(FDS_FOLDER, "BP_News_2019.csv")
df_bp.to_csv(csv_path, index=False)
print(f"\nSaved BP news to {csv_path}")
print("Article count:", len(df_bp))
print(df_bp.head())
print("\nArticle count:", len(df_bp))


Fetching BP news from: https://news.google.com/rss/search?q=BP+stock+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+news+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+oil+market+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+corporation+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+plc+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+earnings+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+environment+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+energy+sec

In [164]:
import feedparser
import pandas as pd
import time
import os

FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

start_date = "2020-01-01"
end_date   = "2020-12-31"

# BP synonyms
bp_synonyms = [
    f"BP+stock+after:{start_date}+before:{end_date}",
    f"BP+news+after:{start_date}+before:{end_date}",
    f"BP+oil+market+after:{start_date}+before:{end_date}",
    f"BP+corporation+after:{start_date}+before:{end_date}",
    f"BP+plc+after:{start_date}+before:{end_date}",
    f"BP+earnings+after:{start_date}+before:{end_date}",
    f"BP+environment+after:{start_date}+before:{end_date}",
    f"BP+energy+sector+after:{start_date}+before:{end_date}",
    f"BP+finance+analysis+after:{start_date}+before:{end_date}",
    f"BP+global+markets+after:{start_date}+before:{end_date}",
    f"BP+expansions+after:{start_date}+before:{end_date}"
]

all_news = []
for term in bp_synonyms:
    rss_url = f"https://news.google.com/rss/search?q={term}&hl=en-US&gl=US&ceid=US:en"
    print(f"Fetching BP news from: {rss_url}")
    feed = feedparser.parse(rss_url)
    time.sleep(2)
    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title": entry.get("title", ""),
            "summary": entry.get("summary", ""),
            "link": entry.get("link", "")
        })

df_bp = pd.DataFrame(all_news)

csv_path = os.path.join(FDS_FOLDER, "BP_News_2020.csv")
df_bp.to_csv(csv_path, index=False)
print(f"\nSaved BP news to {csv_path}")
print("Article count:", len(df_bp))
print(df_bp.head())
print("\nArticle count:", len(df_bp))


Fetching BP news from: https://news.google.com/rss/search?q=BP+stock+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+news+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+oil+market+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+corporation+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+plc+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+earnings+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+environment+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+energy+sec

In [166]:
import feedparser
import pandas as pd
import time
import os

FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

start_date = "2021-01-01"
end_date   = "2021-12-31"

# BP synonyms
bp_synonyms = [
    f"BP+stock+after:{start_date}+before:{end_date}",
    f"BP+news+after:{start_date}+before:{end_date}",
    f"BP+oil+market+after:{start_date}+before:{end_date}",
    f"BP+corporation+after:{start_date}+before:{end_date}",
    f"BP+plc+after:{start_date}+before:{end_date}",
    f"BP+earnings+after:{start_date}+before:{end_date}",
    f"BP+environment+after:{start_date}+before:{end_date}",
    f"BP+energy+sector+after:{start_date}+before:{end_date}",
    f"BP+finance+analysis+after:{start_date}+before:{end_date}",
    f"BP+global+markets+after:{start_date}+before:{end_date}",
    f"BP+expansions+after:{start_date}+before:{end_date}"
]

all_news = []
for term in bp_synonyms:
    rss_url = f"https://news.google.com/rss/search?q={term}&hl=en-US&gl=US&ceid=US:en"
    print(f"Fetching BP news from: {rss_url}")
    feed = feedparser.parse(rss_url)
    time.sleep(2)
    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title": entry.get("title", ""),
            "summary": entry.get("summary", ""),
            "link": entry.get("link", "")
        })

df_bp = pd.DataFrame(all_news)

csv_path = os.path.join(FDS_FOLDER, "BP_News_2021.csv")
df_bp.to_csv(csv_path, index=False)
print(f"\nSaved BP news to {csv_path}")
print("Article count:", len(df_bp))
print(df_bp.head())
print("\nArticle count:", len(df_bp))


Fetching BP news from: https://news.google.com/rss/search?q=BP+stock+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+news+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+oil+market+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+corporation+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+plc+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+earnings+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+environment+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+energy+sec

In [168]:
import feedparser
import pandas as pd
import time
import os

FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

start_date = "2022-01-01"
end_date   = "2022-12-31"

# BP synonyms
bp_synonyms = [
    f"BP+stock+after:{start_date}+before:{end_date}",
    f"BP+news+after:{start_date}+before:{end_date}",
    f"BP+oil+market+after:{start_date}+before:{end_date}",
    f"BP+corporation+after:{start_date}+before:{end_date}",
    f"BP+plc+after:{start_date}+before:{end_date}",
    f"BP+earnings+after:{start_date}+before:{end_date}",
    f"BP+environment+after:{start_date}+before:{end_date}",
    f"BP+energy+sector+after:{start_date}+before:{end_date}",
    f"BP+finance+analysis+after:{start_date}+before:{end_date}",
    f"BP+global+markets+after:{start_date}+before:{end_date}",
    f"BP+expansions+after:{start_date}+before:{end_date}"
]

all_news = []
for term in bp_synonyms:
    rss_url = f"https://news.google.com/rss/search?q={term}&hl=en-US&gl=US&ceid=US:en"
    print(f"Fetching BP news from: {rss_url}")
    feed = feedparser.parse(rss_url)
    time.sleep(2)
    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title": entry.get("title", ""),
            "summary": entry.get("summary", ""),
            "link": entry.get("link", "")
        })

df_bp = pd.DataFrame(all_news)

csv_path = os.path.join(FDS_FOLDER, "BP_News_2022.csv")
df_bp.to_csv(csv_path, index=False)
print(f"\nSaved BP news to {csv_path}")
print("Article count:", len(df_bp))
print(df_bp.head())
print("\nArticle count:", len(df_bp))


Fetching BP news from: https://news.google.com/rss/search?q=BP+stock+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+news+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+oil+market+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+corporation+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+plc+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+earnings+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+environment+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+energy+sec

In [170]:
import feedparser
import pandas as pd
import time
import os

FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

start_date = "2023-01-01"
end_date   = "2023-12-31"

# BP synonyms
bp_synonyms = [
    f"BP+stock+after:{start_date}+before:{end_date}",
    f"BP+news+after:{start_date}+before:{end_date}",
    f"BP+oil+market+after:{start_date}+before:{end_date}",
    f"BP+corporation+after:{start_date}+before:{end_date}",
    f"BP+plc+after:{start_date}+before:{end_date}",
    f"BP+earnings+after:{start_date}+before:{end_date}",
    f"BP+environment+after:{start_date}+before:{end_date}",
    f"BP+energy+sector+after:{start_date}+before:{end_date}",
    f"BP+finance+analysis+after:{start_date}+before:{end_date}",
    f"BP+global+markets+after:{start_date}+before:{end_date}",
    f"BP+expansions+after:{start_date}+before:{end_date}"
]

all_news = []
for term in bp_synonyms:
    rss_url = f"https://news.google.com/rss/search?q={term}&hl=en-US&gl=US&ceid=US:en"
    print(f"Fetching BP news from: {rss_url}")
    feed = feedparser.parse(rss_url)
    time.sleep(2)
    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title": entry.get("title", ""),
            "summary": entry.get("summary", ""),
            "link": entry.get("link", "")
        })

df_bp = pd.DataFrame(all_news)

csv_path = os.path.join(FDS_FOLDER, "BP_News_2023.csv")
df_bp.to_csv(csv_path, index=False)
print(f"\nSaved BP news to {csv_path}")
print("Article count:", len(df_bp))
print(df_bp.head())
print("\nArticle count:", len(df_bp))


Fetching BP news from: https://news.google.com/rss/search?q=BP+stock+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+news+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+oil+market+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+corporation+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+plc+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+earnings+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+environment+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+energy+sec

In [172]:
import feedparser
import pandas as pd
import time
import os

FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

start_date = "2024-01-01"
end_date   = "2024-12-31"

# BP synonyms
bp_synonyms = [
    f"BP+stock+after:{start_date}+before:{end_date}",
    f"BP+news+after:{start_date}+before:{end_date}",
    f"BP+oil+market+after:{start_date}+before:{end_date}",
    f"BP+corporation+after:{start_date}+before:{end_date}",
    f"BP+plc+after:{start_date}+before:{end_date}",
    f"BP+earnings+after:{start_date}+before:{end_date}",
    f"BP+environment+after:{start_date}+before:{end_date}",
    f"BP+energy+sector+after:{start_date}+before:{end_date}",
    f"BP+finance+analysis+after:{start_date}+before:{end_date}",
    f"BP+global+markets+after:{start_date}+before:{end_date}",
    f"BP+expansions+after:{start_date}+before:{end_date}"
]

all_news = []
for term in bp_synonyms:
    rss_url = f"https://news.google.com/rss/search?q={term}&hl=en-US&gl=US&ceid=US:en"
    print(f"Fetching BP news from: {rss_url}")
    feed = feedparser.parse(rss_url)
    time.sleep(2)
    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title": entry.get("title", ""),
            "summary": entry.get("summary", ""),
            "link": entry.get("link", "")
        })

df_bp = pd.DataFrame(all_news)
csv_path = os.path.join(FDS_FOLDER, "BP_News_2024.csv")
df_bp.to_csv(csv_path, index=False)
print(f"\nSaved BP news to {csv_path}")
print("Article count:", len(df_bp))
print(df_bp.head())
print("\nArticle count:", len(df_bp))


Fetching BP news from: https://news.google.com/rss/search?q=BP+stock+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+news+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+oil+market+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+corporation+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+plc+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+earnings+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+environment+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+energy+sec

In [174]:
import feedparser
import pandas as pd
import time
import os

FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

start_date = "2025-01-01"
end_date   = "2025-12-31"

# BP synonyms
bp_synonyms = [
    f"BP+stock+after:{start_date}+before:{end_date}",
    f"BP+news+after:{start_date}+before:{end_date}",
    f"BP+oil+market+after:{start_date}+before:{end_date}",
    f"BP+corporation+after:{start_date}+before:{end_date}",
    f"BP+plc+after:{start_date}+before:{end_date}",
    f"BP+earnings+after:{start_date}+before:{end_date}",
    f"BP+environment+after:{start_date}+before:{end_date}",
    f"BP+energy+sector+after:{start_date}+before:{end_date}",
    f"BP+finance+analysis+after:{start_date}+before:{end_date}",
    f"BP+global+markets+after:{start_date}+before:{end_date}",
    f"BP+expansions+after:{start_date}+before:{end_date}"
]

all_news = []
for term in bp_synonyms:
    rss_url = f"https://news.google.com/rss/search?q={term}&hl=en-US&gl=US&ceid=US:en"
    print(f"Fetching BP news from: {rss_url}")
    feed = feedparser.parse(rss_url)
    time.sleep(2)
    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title": entry.get("title", ""),
            "summary": entry.get("summary", ""),
            "link": entry.get("link", "")
        })

df_bp = pd.DataFrame(all_news)

csv_path = os.path.join(FDS_FOLDER, "BP_News_2025.csv")
df_bp.to_csv(csv_path, index=False)
print(f"\nSaved BP news to {csv_path}")
print("Article count:", len(df_bp))
print(df_bp.head())
print("\nArticle count:", len(df_bp))


Fetching BP news from: https://news.google.com/rss/search?q=BP+stock+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+news+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+oil+market+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+corporation+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+plc+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+earnings+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+environment+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching BP news from: https://news.google.com/rss/search?q=BP+energy+sec

In [178]:
import feedparser
import pandas as pd
import time
import os

# 1) Create/ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (example: only January 2019)
start_date = "2019-01-01"
end_date   = "2019-12-31"  # "before:2019-02-01" covers January 2019

# 3) Expanded synonyms for SPY
spy_synonyms = [
    f"SPY+stock+after:{start_date}+before:{end_date}",
    f"SPDR+S%26P+500+ETF+after:{start_date}+before:{end_date}",  # Use %26 for '&'
    f"SPY+ETF+after:{start_date}+before:{end_date}",
    f"SPY+finance+analysis+after:{start_date}+before:{end_date}",
    f"SPY+S%26P+500+after:{start_date}+before:{end_date}",
    f"SPY+volatility+after:{start_date}+before:{end_date}",
    f"SPY+returns+after:{start_date}+before:{end_date}",
    f"SPY+performance+after:{start_date}+before:{end_date}",
    f"SPY+holdings+after:{start_date}+before:{end_date}",
    f"SPY+dividends+after:{start_date}+before:{end_date}",
    f"S%26P+500+ETF+after:{start_date}+before:{end_date}",
    f"SPY+analysis+after:{start_date}+before:{end_date}"
]

# 4) Parse all feeds
all_news = []

for term in spy_synonyms:
    rss_url = f"https://news.google.com/rss/search?q={term}&hl=en-US&gl=US&ceid=US:en"
    print(f"Fetching SPY news from: {rss_url}")
    feed = feedparser.parse(rss_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title": entry.get("title", ""),
            "summary": entry.get("summary", ""),
            "link": entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_spy = pd.DataFrame(all_news)

# 6) Save to CSV
csv_path = os.path.join(FDS_FOLDER, "SPY_News_2019.csv")
df_spy.to_csv(csv_path, index=False)

print(f"\nSaved SPY news to {csv_path}")
print("Article count:", len(df_spy))
print(df_spy.head())


Fetching SPY news from: https://news.google.com/rss/search?q=SPY+stock+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPDR+S%26P+500+ETF+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+ETF+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+finance+analysis+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+S%26P+500+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+volatility+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+returns+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com

In [180]:
import feedparser
import pandas as pd
import time
import os

# 1) Create/ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (example: only January 2019)
start_date = "2020-01-01"
end_date   = "2020-12-31"  # "before:2019-02-01" covers January 2019

# 3) Expanded synonyms for SPY
spy_synonyms = [
    f"SPY+stock+after:{start_date}+before:{end_date}",
    f"SPDR+S%26P+500+ETF+after:{start_date}+before:{end_date}",  # Use %26 for '&'
    f"SPY+ETF+after:{start_date}+before:{end_date}",
    f"SPY+finance+analysis+after:{start_date}+before:{end_date}",
    f"SPY+S%26P+500+after:{start_date}+before:{end_date}",
    f"SPY+volatility+after:{start_date}+before:{end_date}",
    f"SPY+returns+after:{start_date}+before:{end_date}",
    f"SPY+performance+after:{start_date}+before:{end_date}",
    f"SPY+holdings+after:{start_date}+before:{end_date}",
    f"SPY+dividends+after:{start_date}+before:{end_date}",
    f"S%26P+500+ETF+after:{start_date}+before:{end_date}",
    f"SPY+analysis+after:{start_date}+before:{end_date}"
]

# 4) Parse all feeds
all_news = []

for term in spy_synonyms:
    rss_url = f"https://news.google.com/rss/search?q={term}&hl=en-US&gl=US&ceid=US:en"
    print(f"Fetching SPY news from: {rss_url}")
    feed = feedparser.parse(rss_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title": entry.get("title", ""),
            "summary": entry.get("summary", ""),
            "link": entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_spy = pd.DataFrame(all_news)

# 6) Save to CSV
csv_path = os.path.join(FDS_FOLDER, "SPY_News_2020.csv")
df_spy.to_csv(csv_path, index=False)

print(f"\nSaved SPY news to {csv_path}")
print("Article count:", len(df_spy))
print(df_spy.head())


Fetching SPY news from: https://news.google.com/rss/search?q=SPY+stock+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPDR+S%26P+500+ETF+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+ETF+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+finance+analysis+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+S%26P+500+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+volatility+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+returns+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com

In [182]:
import feedparser
import pandas as pd
import time
import os

# 1) Create/ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (example: only January 2019)
start_date = "2021-01-01"
end_date   = "2021-12-31"  # "before:2019-02-01" covers January 2019

# 3) Expanded synonyms for SPY
spy_synonyms = [
    f"SPY+stock+after:{start_date}+before:{end_date}",
    f"SPDR+S%26P+500+ETF+after:{start_date}+before:{end_date}",  # Use %26 for '&'
    f"SPY+ETF+after:{start_date}+before:{end_date}",
    f"SPY+finance+analysis+after:{start_date}+before:{end_date}",
    f"SPY+S%26P+500+after:{start_date}+before:{end_date}",
    f"SPY+volatility+after:{start_date}+before:{end_date}",
    f"SPY+returns+after:{start_date}+before:{end_date}",
    f"SPY+performance+after:{start_date}+before:{end_date}",
    f"SPY+holdings+after:{start_date}+before:{end_date}",
    f"SPY+dividends+after:{start_date}+before:{end_date}",
    f"S%26P+500+ETF+after:{start_date}+before:{end_date}",
    f"SPY+analysis+after:{start_date}+before:{end_date}"
]

# 4) Parse all feeds
all_news = []

for term in spy_synonyms:
    rss_url = f"https://news.google.com/rss/search?q={term}&hl=en-US&gl=US&ceid=US:en"
    print(f"Fetching SPY news from: {rss_url}")
    feed = feedparser.parse(rss_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title": entry.get("title", ""),
            "summary": entry.get("summary", ""),
            "link": entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_spy = pd.DataFrame(all_news)

# 6) Save to CSV
csv_path = os.path.join(FDS_FOLDER, "SPY_News_2021.csv")
df_spy.to_csv(csv_path, index=False)

print(f"\nSaved SPY news to {csv_path}")
print("Article count:", len(df_spy))
print(df_spy.head())


Fetching SPY news from: https://news.google.com/rss/search?q=SPY+stock+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPDR+S%26P+500+ETF+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+ETF+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+finance+analysis+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+S%26P+500+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+volatility+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+returns+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com

In [184]:
import feedparser
import pandas as pd
import time
import os

# 1) Create/ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (example: only January 2019)
start_date = "2022-01-01"
end_date   = "2022-12-31"  # "before:2019-02-01" covers January 2019

# 3) Expanded synonyms for SPY
spy_synonyms = [
    f"SPY+stock+after:{start_date}+before:{end_date}",
    f"SPDR+S%26P+500+ETF+after:{start_date}+before:{end_date}",  # Use %26 for '&'
    f"SPY+ETF+after:{start_date}+before:{end_date}",
    f"SPY+finance+analysis+after:{start_date}+before:{end_date}",
    f"SPY+S%26P+500+after:{start_date}+before:{end_date}",
    f"SPY+volatility+after:{start_date}+before:{end_date}",
    f"SPY+returns+after:{start_date}+before:{end_date}",
    f"SPY+performance+after:{start_date}+before:{end_date}",
    f"SPY+holdings+after:{start_date}+before:{end_date}",
    f"SPY+dividends+after:{start_date}+before:{end_date}",
    f"S%26P+500+ETF+after:{start_date}+before:{end_date}",
    f"SPY+analysis+after:{start_date}+before:{end_date}"
]

# 4) Parse all feeds
all_news = []

for term in spy_synonyms:
    rss_url = f"https://news.google.com/rss/search?q={term}&hl=en-US&gl=US&ceid=US:en"
    print(f"Fetching SPY news from: {rss_url}")
    feed = feedparser.parse(rss_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title": entry.get("title", ""),
            "summary": entry.get("summary", ""),
            "link": entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_spy = pd.DataFrame(all_news)

# 6) Save to CSV
csv_path = os.path.join(FDS_FOLDER, "SPY_News_2022.csv")
df_spy.to_csv(csv_path, index=False)

print(f"\nSaved SPY news to {csv_path}")
print("Article count:", len(df_spy))
print(df_spy.head())


Fetching SPY news from: https://news.google.com/rss/search?q=SPY+stock+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPDR+S%26P+500+ETF+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+ETF+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+finance+analysis+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+S%26P+500+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+volatility+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+returns+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com

In [186]:
import feedparser
import pandas as pd
import time
import os

# 1) Create/ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (example: only January 2019)
start_date = "2023-01-01"
end_date   = "2023-12-31"  # "before:2019-02-01" covers January 2019

# 3) Expanded synonyms for SPY
spy_synonyms = [
    f"SPY+stock+after:{start_date}+before:{end_date}",
    f"SPDR+S%26P+500+ETF+after:{start_date}+before:{end_date}",  # Use %26 for '&'
    f"SPY+ETF+after:{start_date}+before:{end_date}",
    f"SPY+finance+analysis+after:{start_date}+before:{end_date}",
    f"SPY+S%26P+500+after:{start_date}+before:{end_date}",
    f"SPY+volatility+after:{start_date}+before:{end_date}",
    f"SPY+returns+after:{start_date}+before:{end_date}",
    f"SPY+performance+after:{start_date}+before:{end_date}",
    f"SPY+holdings+after:{start_date}+before:{end_date}",
    f"SPY+dividends+after:{start_date}+before:{end_date}",
    f"S%26P+500+ETF+after:{start_date}+before:{end_date}",
    f"SPY+analysis+after:{start_date}+before:{end_date}"
]

# 4) Parse all feeds
all_news = []

for term in spy_synonyms:
    rss_url = f"https://news.google.com/rss/search?q={term}&hl=en-US&gl=US&ceid=US:en"
    print(f"Fetching SPY news from: {rss_url}")
    feed = feedparser.parse(rss_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title": entry.get("title", ""),
            "summary": entry.get("summary", ""),
            "link": entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_spy = pd.DataFrame(all_news)

# 6) Save to CSV
csv_path = os.path.join(FDS_FOLDER, "SPY_News_2023.csv")
df_spy.to_csv(csv_path, index=False)

print(f"\nSaved SPY news to {csv_path}")
print("Article count:", len(df_spy))
print(df_spy.head())


Fetching SPY news from: https://news.google.com/rss/search?q=SPY+stock+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPDR+S%26P+500+ETF+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+ETF+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+finance+analysis+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+S%26P+500+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+volatility+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+returns+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com

In [188]:
import feedparser
import pandas as pd
import time
import os

# 1) Create/ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (example: only January 2019)
start_date = "2024-01-01"
end_date   = "2024-12-31"  # "before:2019-02-01" covers January 2019

# 3) Expanded synonyms for SPY
spy_synonyms = [
    f"SPY+stock+after:{start_date}+before:{end_date}",
    f"SPDR+S%26P+500+ETF+after:{start_date}+before:{end_date}",  # Use %26 for '&'
    f"SPY+ETF+after:{start_date}+before:{end_date}",
    f"SPY+finance+analysis+after:{start_date}+before:{end_date}",
    f"SPY+S%26P+500+after:{start_date}+before:{end_date}",
    f"SPY+volatility+after:{start_date}+before:{end_date}",
    f"SPY+returns+after:{start_date}+before:{end_date}",
    f"SPY+performance+after:{start_date}+before:{end_date}",
    f"SPY+holdings+after:{start_date}+before:{end_date}",
    f"SPY+dividends+after:{start_date}+before:{end_date}",
    f"S%26P+500+ETF+after:{start_date}+before:{end_date}",
    f"SPY+analysis+after:{start_date}+before:{end_date}"
]

# 4) Parse all feeds
all_news = []

for term in spy_synonyms:
    rss_url = f"https://news.google.com/rss/search?q={term}&hl=en-US&gl=US&ceid=US:en"
    print(f"Fetching SPY news from: {rss_url}")
    feed = feedparser.parse(rss_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title": entry.get("title", ""),
            "summary": entry.get("summary", ""),
            "link": entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_spy = pd.DataFrame(all_news)

# 6) Save to CSV
csv_path = os.path.join(FDS_FOLDER, "SPY_News_2024.csv")
df_spy.to_csv(csv_path, index=False)

print(f"\nSaved SPY news to {csv_path}")
print("Article count:", len(df_spy))
print(df_spy.head())


Fetching SPY news from: https://news.google.com/rss/search?q=SPY+stock+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPDR+S%26P+500+ETF+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+ETF+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+finance+analysis+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+S%26P+500+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+volatility+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+returns+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com

In [190]:
import feedparser
import pandas as pd
import time
import os

# 1) Create/ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (example: only January 2019)
start_date = "2025-01-01"
end_date   = "2025-12-31"  # "before:2019-02-01" covers January 2019

# 3) Expanded synonyms for SPY
spy_synonyms = [
    f"SPY+stock+after:{start_date}+before:{end_date}",
    f"SPDR+S%26P+500+ETF+after:{start_date}+before:{end_date}",  # Use %26 for '&'
    f"SPY+ETF+after:{start_date}+before:{end_date}",
    f"SPY+finance+analysis+after:{start_date}+before:{end_date}",
    f"SPY+S%26P+500+after:{start_date}+before:{end_date}",
    f"SPY+volatility+after:{start_date}+before:{end_date}",
    f"SPY+returns+after:{start_date}+before:{end_date}",
    f"SPY+performance+after:{start_date}+before:{end_date}",
    f"SPY+holdings+after:{start_date}+before:{end_date}",
    f"SPY+dividends+after:{start_date}+before:{end_date}",
    f"S%26P+500+ETF+after:{start_date}+before:{end_date}",
    f"SPY+analysis+after:{start_date}+before:{end_date}"
]

# 4) Parse all feeds
all_news = []

for term in spy_synonyms:
    rss_url = f"https://news.google.com/rss/search?q={term}&hl=en-US&gl=US&ceid=US:en"
    print(f"Fetching SPY news from: {rss_url}")
    feed = feedparser.parse(rss_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title": entry.get("title", ""),
            "summary": entry.get("summary", ""),
            "link": entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_spy = pd.DataFrame(all_news)

# 6) Save to CSV
csv_path = os.path.join(FDS_FOLDER, "SPY_News_2025.csv")
df_spy.to_csv(csv_path, index=False)

print(f"\nSaved SPY news to {csv_path}")
print("Article count:", len(df_spy))
print(df_spy.head())


Fetching SPY news from: https://news.google.com/rss/search?q=SPY+stock+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPDR+S%26P+500+ETF+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+ETF+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+finance+analysis+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+S%26P+500+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+volatility+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com/rss/search?q=SPY+returns+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching SPY news from: https://news.google.com

In [192]:
import feedparser
import pandas as pd
import time
import os

# 1) Create/ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (e.g., full 2019)
start_date = "2019-01-01"
end_date   = "2019-12-31"  # "before:2020-01-01" covers 2019

# 3) Expanded synonyms for Brent
brent_synonyms = [
    f"Brent+crude+oil+after:{start_date}+before:{end_date}",
    f"Brent+oil+price+after:{start_date}+before:{end_date}",
    f"Brent+oil+market+after:{start_date}+before:{end_date}",
    f"Brent+global+markets+after:{start_date}+before:{end_date}",
    f"Brent+energy+sector+after:{start_date}+before:{end_date}",
    f"Brent+volatility+after:{start_date}+before:{end_date}",
    f"Brent+futures+after:{start_date}+before:{end_date}",
    f"Brent+analysis+after:{start_date}+before:{end_date}",
    f"Brent+production+after:{start_date}+before:{end_date}"
]

# 4) Parse all feeds
all_news = []

for term in brent_synonyms:
    rss_url = f"https://news.google.com/rss/search?q={term}&hl=en-US&gl=US&ceid=US:en"
    print(f"Fetching Brent news from: {rss_url}")
    feed = feedparser.parse(rss_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title": entry.get("title", ""),
            "summary": entry.get("summary", ""),
            "link": entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_brent = pd.DataFrame(all_news)

# 6) Save to CSV
csv_path = os.path.join(FDS_FOLDER, "Brent_News_2019.csv")
df_brent.to_csv(csv_path, index=False)

print(f"\nSaved Brent news to {csv_path}")
print("Article count:", len(df_brent))
print(df_brent.head())

Fetching Brent news from: https://news.google.com/rss/search?q=Brent+crude+oil+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+oil+price+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+oil+market+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+global+markets+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+energy+sector+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+volatility+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+futures+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Br

In [194]:
import feedparser
import pandas as pd
import time
import os

# 1) Create/ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (e.g., full 2019)
start_date = "2020-01-01"
end_date   = "2020-12-31"  # "before:2020-01-01" covers 2019

# 3) Expanded synonyms for Brent
brent_synonyms = [
    f"Brent+crude+oil+after:{start_date}+before:{end_date}",
    f"Brent+oil+price+after:{start_date}+before:{end_date}",
    f"Brent+oil+market+after:{start_date}+before:{end_date}",
    f"Brent+global+markets+after:{start_date}+before:{end_date}",
    f"Brent+energy+sector+after:{start_date}+before:{end_date}",
    f"Brent+volatility+after:{start_date}+before:{end_date}",
    f"Brent+futures+after:{start_date}+before:{end_date}",
    f"Brent+analysis+after:{start_date}+before:{end_date}",
    f"Brent+production+after:{start_date}+before:{end_date}"
]

# 4) Parse all feeds
all_news = []

for term in brent_synonyms:
    rss_url = f"https://news.google.com/rss/search?q={term}&hl=en-US&gl=US&ceid=US:en"
    print(f"Fetching Brent news from: {rss_url}")
    feed = feedparser.parse(rss_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title": entry.get("title", ""),
            "summary": entry.get("summary", ""),
            "link": entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_brent = pd.DataFrame(all_news)

# 6) Save to CSV
csv_path = os.path.join(FDS_FOLDER, "Brent_News_2020.csv")
df_brent.to_csv(csv_path, index=False)

print(f"\nSaved Brent news to {csv_path}")
print("Article count:", len(df_brent))
print(df_brent.head())


Fetching Brent news from: https://news.google.com/rss/search?q=Brent+crude+oil+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+oil+price+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+oil+market+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+global+markets+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+energy+sector+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+volatility+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+futures+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Br

In [196]:
import feedparser
import pandas as pd
import time
import os

# 1) Create/ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (e.g., full 2019)
start_date = "2021-01-01"
end_date   = "2021-12-31"  # "before:2020-01-01" covers 2019

# 3) Expanded synonyms for Brent
brent_synonyms = [
    f"Brent+crude+oil+after:{start_date}+before:{end_date}",
    f"Brent+oil+price+after:{start_date}+before:{end_date}",
    f"Brent+oil+market+after:{start_date}+before:{end_date}",
    f"Brent+global+markets+after:{start_date}+before:{end_date}",
    f"Brent+energy+sector+after:{start_date}+before:{end_date}",
    f"Brent+volatility+after:{start_date}+before:{end_date}",
    f"Brent+futures+after:{start_date}+before:{end_date}",
    f"Brent+analysis+after:{start_date}+before:{end_date}",
    f"Brent+production+after:{start_date}+before:{end_date}"
]

# 4) Parse all feeds
all_news = []

for term in brent_synonyms:
    rss_url = f"https://news.google.com/rss/search?q={term}&hl=en-US&gl=US&ceid=US:en"
    print(f"Fetching Brent news from: {rss_url}")
    feed = feedparser.parse(rss_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title": entry.get("title", ""),
            "summary": entry.get("summary", ""),
            "link": entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_brent = pd.DataFrame(all_news)

# 6) Save to CSV
csv_path = os.path.join(FDS_FOLDER, "Brent_News_2021.csv")
df_brent.to_csv(csv_path, index=False)

print(f"\nSaved Brent news to {csv_path}")
print("Article count:", len(df_brent))
print(df_brent.head())


Fetching Brent news from: https://news.google.com/rss/search?q=Brent+crude+oil+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+oil+price+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+oil+market+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+global+markets+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+energy+sector+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+volatility+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+futures+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Br

In [198]:
import feedparser
import pandas as pd
import time
import os

# 1) Create/ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (e.g., full 2019)
start_date = "2022-01-01"
end_date   = "2022-12-31"  # "before:2020-01-01" covers 2019

# 3) Expanded synonyms for Brent
brent_synonyms = [
    f"Brent+crude+oil+after:{start_date}+before:{end_date}",
    f"Brent+oil+price+after:{start_date}+before:{end_date}",
    f"Brent+oil+market+after:{start_date}+before:{end_date}",
    f"Brent+global+markets+after:{start_date}+before:{end_date}",
    f"Brent+energy+sector+after:{start_date}+before:{end_date}",
    f"Brent+volatility+after:{start_date}+before:{end_date}",
    f"Brent+futures+after:{start_date}+before:{end_date}",
    f"Brent+analysis+after:{start_date}+before:{end_date}",
    f"Brent+production+after:{start_date}+before:{end_date}"
]

# 4) Parse all feeds
all_news = []

for term in brent_synonyms:
    rss_url = f"https://news.google.com/rss/search?q={term}&hl=en-US&gl=US&ceid=US:en"
    print(f"Fetching Brent news from: {rss_url}")
    feed = feedparser.parse(rss_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title": entry.get("title", ""),
            "summary": entry.get("summary", ""),
            "link": entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_brent = pd.DataFrame(all_news)

# 6) Save to CSV
csv_path = os.path.join(FDS_FOLDER, "Brent_News_2022.csv")
df_brent.to_csv(csv_path, index=False)

print(f"\nSaved Brent news to {csv_path}")
print("Article count:", len(df_brent))
print(df_brent.head())


Fetching Brent news from: https://news.google.com/rss/search?q=Brent+crude+oil+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+oil+price+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+oil+market+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+global+markets+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+energy+sector+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+volatility+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+futures+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Br

In [200]:
import feedparser
import pandas as pd
import time
import os

# 1) Create/ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (e.g., full 2019)
start_date = "2023-01-01"
end_date   = "2023-12-31"  # "before:2020-01-01" covers 2019

# 3) Expanded synonyms for Brent
brent_synonyms = [
    f"Brent+crude+oil+after:{start_date}+before:{end_date}",
    f"Brent+oil+price+after:{start_date}+before:{end_date}",
    f"Brent+oil+market+after:{start_date}+before:{end_date}",
    f"Brent+global+markets+after:{start_date}+before:{end_date}",
    f"Brent+energy+sector+after:{start_date}+before:{end_date}",
    f"Brent+volatility+after:{start_date}+before:{end_date}",
    f"Brent+futures+after:{start_date}+before:{end_date}",
    f"Brent+analysis+after:{start_date}+before:{end_date}",
    f"Brent+production+after:{start_date}+before:{end_date}"
]

# 4) Parse all feeds
all_news = []

for term in brent_synonyms:
    rss_url = f"https://news.google.com/rss/search?q={term}&hl=en-US&gl=US&ceid=US:en"
    print(f"Fetching Brent news from: {rss_url}")
    feed = feedparser.parse(rss_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title": entry.get("title", ""),
            "summary": entry.get("summary", ""),
            "link": entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_brent = pd.DataFrame(all_news)

# 6) Save to CSV
csv_path = os.path.join(FDS_FOLDER, "Brent_News_2023.csv")
df_brent.to_csv(csv_path, index=False)

print(f"\nSaved Brent news to {csv_path}")
print("Article count:", len(df_brent))
print(df_brent.head())


Fetching Brent news from: https://news.google.com/rss/search?q=Brent+crude+oil+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+oil+price+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+oil+market+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+global+markets+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+energy+sector+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+volatility+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+futures+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Br

In [202]:
import feedparser
import pandas as pd
import time
import os

# 1) Create/ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (e.g., full 2019)
start_date = "2024-01-01"
end_date   = "2024-12-31"  # "before:2020-01-01" covers 2019

# 3) Expanded synonyms for Brent
brent_synonyms = [
    f"Brent+crude+oil+after:{start_date}+before:{end_date}",
    f"Brent+oil+price+after:{start_date}+before:{end_date}",
    f"Brent+oil+market+after:{start_date}+before:{end_date}",
    f"Brent+global+markets+after:{start_date}+before:{end_date}",
    f"Brent+energy+sector+after:{start_date}+before:{end_date}",
    f"Brent+volatility+after:{start_date}+before:{end_date}",
    f"Brent+futures+after:{start_date}+before:{end_date}",
    f"Brent+analysis+after:{start_date}+before:{end_date}",
    f"Brent+production+after:{start_date}+before:{end_date}"
]

# 4) Parse all feeds
all_news = []

for term in brent_synonyms:
    rss_url = f"https://news.google.com/rss/search?q={term}&hl=en-US&gl=US&ceid=US:en"
    print(f"Fetching Brent news from: {rss_url}")
    feed = feedparser.parse(rss_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title": entry.get("title", ""),
            "summary": entry.get("summary", ""),
            "link": entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_brent = pd.DataFrame(all_news)

# 6) Save to CSV
csv_path = os.path.join(FDS_FOLDER, "Brent_News_2024.csv")
df_brent.to_csv(csv_path, index=False)

print(f"\nSaved Brent news to {csv_path}")
print("Article count:", len(df_brent))
print(df_brent.head())


Fetching Brent news from: https://news.google.com/rss/search?q=Brent+crude+oil+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+oil+price+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+oil+market+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+global+markets+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+energy+sector+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+volatility+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+futures+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Br

In [204]:
import feedparser
import pandas as pd
import time
import os

# 1) Create/ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (e.g., full 2019)
start_date = "2025-01-01"
end_date   = "2025-12-31"  # "before:2020-01-01" covers 2019

# 3) Expanded synonyms for Brent
brent_synonyms = [
    f"Brent+crude+oil+after:{start_date}+before:{end_date}",
    f"Brent+oil+price+after:{start_date}+before:{end_date}",
    f"Brent+oil+market+after:{start_date}+before:{end_date}",
    f"Brent+global+markets+after:{start_date}+before:{end_date}",
    f"Brent+energy+sector+after:{start_date}+before:{end_date}",
    f"Brent+volatility+after:{start_date}+before:{end_date}",
    f"Brent+futures+after:{start_date}+before:{end_date}",
    f"Brent+analysis+after:{start_date}+before:{end_date}",
    f"Brent+production+after:{start_date}+before:{end_date}"
]

# 4) Parse all feeds
all_news = []

for term in brent_synonyms:
    rss_url = f"https://news.google.com/rss/search?q={term}&hl=en-US&gl=US&ceid=US:en"
    print(f"Fetching Brent news from: {rss_url}")
    feed = feedparser.parse(rss_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title": entry.get("title", ""),
            "summary": entry.get("summary", ""),
            "link": entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_brent = pd.DataFrame(all_news)
# 6) Save to CSV
csv_path = os.path.join(FDS_FOLDER, "Brent_News_2025.csv")
df_brent.to_csv(csv_path, index=False)

print(f"\nSaved Brent news to {csv_path}")
print("Article count:", len(df_brent))
print(df_brent.head())


Fetching Brent news from: https://news.google.com/rss/search?q=Brent+crude+oil+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+oil+price+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+oil+market+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+global+markets+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+energy+sector+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+volatility+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Brent news from: https://news.google.com/rss/search?q=Brent+futures+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching Br

In [206]:
import feedparser
import pandas as pd
import time
import os

# 1) Create/ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (e.g., full 2019)
start_date = "2019-01-01"
end_date   = "2019-12-31"  # "before:2020-01-01" covers 2019

# 3) Expanded synonyms for WTI
wti_synonyms = [
    f"WTI+crude+oil+after:{start_date}+before:{end_date}",
    f"WTI+oil+price+after:{start_date}+before:{end_date}",
    f"WTI+oil+market+after:{start_date}+before:{end_date}",
    f"WTI+global+markets+after:{start_date}+before:{end_date}",
    f"WTI+energy+sector+after:{start_date}+before:{end_date}",
    f"WTI+volatility+after:{start_date}+before:{end_date}",
    f"WTI+futures+after:{start_date}+before:{end_date}",
    f"WTI+analysis+after:{start_date}+before:{end_date}",
    f"WTI+production+after:{start_date}+before:{end_date}"
]

# 4) Parse all feeds
all_news = []

for term in wti_synonyms:
    rss_url = f"https://news.google.com/rss/search?q={term}&hl=en-US&gl=US&ceid=US:en"
    print(f"Fetching WTI news from: {rss_url}")
    feed = feedparser.parse(rss_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title": entry.get("title", ""),
            "summary": entry.get("summary", ""),
            "link": entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_wti = pd.DataFrame(all_news)

# 6) Save to CSV
csv_path = os.path.join(FDS_FOLDER, "WTI_News_2019.csv")
df_wti.to_csv(csv_path, index=False)

print(f"\nSaved WTI news to {csv_path}")
print("Article count:", len(df_wti))
print(df_wti.head())


Fetching WTI news from: https://news.google.com/rss/search?q=WTI+crude+oil+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+oil+price+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+oil+market+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+global+markets+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+energy+sector+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+volatility+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+futures+after:2019-01-01+before:2019-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.go

In [208]:
import feedparser
import pandas as pd
import time
import os

# 1) Create/ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (e.g., full 2019)
start_date = "2020-01-01"
end_date   = "2020-12-31"  # "before:2020-01-01" covers 2019

# 3) Expanded synonyms for WTI
wti_synonyms = [
    f"WTI+crude+oil+after:{start_date}+before:{end_date}",
    f"WTI+oil+price+after:{start_date}+before:{end_date}",
    f"WTI+oil+market+after:{start_date}+before:{end_date}",
    f"WTI+global+markets+after:{start_date}+before:{end_date}",
    f"WTI+energy+sector+after:{start_date}+before:{end_date}",
    f"WTI+volatility+after:{start_date}+before:{end_date}",
    f"WTI+futures+after:{start_date}+before:{end_date}",
    f"WTI+analysis+after:{start_date}+before:{end_date}",
    f"WTI+production+after:{start_date}+before:{end_date}"
]

# 4) Parse all feeds
all_news = []

for term in wti_synonyms:
    rss_url = f"https://news.google.com/rss/search?q={term}&hl=en-US&gl=US&ceid=US:en"
    print(f"Fetching WTI news from: {rss_url}")
    feed = feedparser.parse(rss_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title": entry.get("title", ""),
            "summary": entry.get("summary", ""),
            "link": entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_wti = pd.DataFrame(all_news)
# 6) Save to CSV
csv_path = os.path.join(FDS_FOLDER, "WTI_News_2020.csv")
df_wti.to_csv(csv_path, index=False)

print(f"\nSaved WTI news to {csv_path}")
print("Article count:", len(df_wti))
print(df_wti.head())


Fetching WTI news from: https://news.google.com/rss/search?q=WTI+crude+oil+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+oil+price+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+oil+market+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+global+markets+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+energy+sector+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+volatility+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+futures+after:2020-01-01+before:2020-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.go

In [210]:
import feedparser
import pandas as pd
import time
import os

# 1) Create/ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (e.g., full 2019)
start_date = "2021-01-01"
end_date   = "2021-12-31"  # "before:2020-01-01" covers 2019

# 3) Expanded synonyms for WTI
wti_synonyms = [
    f"WTI+crude+oil+after:{start_date}+before:{end_date}",
    f"WTI+oil+price+after:{start_date}+before:{end_date}",
    f"WTI+oil+market+after:{start_date}+before:{end_date}",
    f"WTI+global+markets+after:{start_date}+before:{end_date}",
    f"WTI+energy+sector+after:{start_date}+before:{end_date}",
    f"WTI+volatility+after:{start_date}+before:{end_date}",
    f"WTI+futures+after:{start_date}+before:{end_date}",
    f"WTI+analysis+after:{start_date}+before:{end_date}",
    f"WTI+production+after:{start_date}+before:{end_date}"
]

# 4) Parse all feeds
all_news = []

for term in wti_synonyms:
    rss_url = f"https://news.google.com/rss/search?q={term}&hl=en-US&gl=US&ceid=US:en"
    print(f"Fetching WTI news from: {rss_url}")
    feed = feedparser.parse(rss_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title": entry.get("title", ""),
            "summary": entry.get("summary", ""),
            "link": entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_wti = pd.DataFrame(all_news)

# 6) Save to CSV
csv_path = os.path.join(FDS_FOLDER, "WTI_News_2021.csv")
df_wti.to_csv(csv_path, index=False)

print(f"\nSaved WTI news to {csv_path}")
print("Article count:", len(df_wti))
print(df_wti.head())


Fetching WTI news from: https://news.google.com/rss/search?q=WTI+crude+oil+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+oil+price+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+oil+market+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+global+markets+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+energy+sector+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+volatility+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+futures+after:2021-01-01+before:2021-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.go

In [212]:
import feedparser
import pandas as pd
import time
import os

# 1) Create/ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (e.g., full 2019)
start_date = "2022-01-01"
end_date   = "2022-12-31"  # "before:2020-01-01" covers 2019

# 3) Expanded synonyms for WTI
wti_synonyms = [
    f"WTI+crude+oil+after:{start_date}+before:{end_date}",
    f"WTI+oil+price+after:{start_date}+before:{end_date}",
    f"WTI+oil+market+after:{start_date}+before:{end_date}",
    f"WTI+global+markets+after:{start_date}+before:{end_date}",
    f"WTI+energy+sector+after:{start_date}+before:{end_date}",
    f"WTI+volatility+after:{start_date}+before:{end_date}",
    f"WTI+futures+after:{start_date}+before:{end_date}",
    f"WTI+analysis+after:{start_date}+before:{end_date}",
    f"WTI+production+after:{start_date}+before:{end_date}"
]

# 4) Parse all feeds
all_news = []

for term in wti_synonyms:
    rss_url = f"https://news.google.com/rss/search?q={term}&hl=en-US&gl=US&ceid=US:en"
    print(f"Fetching WTI news from: {rss_url}")
    feed = feedparser.parse(rss_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title": entry.get("title", ""),
            "summary": entry.get("summary", ""),
            "link": entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_wti = pd.DataFrame(all_news)

# 6) Save to CSV
csv_path = os.path.join(FDS_FOLDER, "WTI_News_2022.csv")
df_wti.to_csv(csv_path, index=False)

print(f"\nSaved WTI news to {csv_path}")
print("Article count:", len(df_wti))
print(df_wti.head())


Fetching WTI news from: https://news.google.com/rss/search?q=WTI+crude+oil+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+oil+price+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+oil+market+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+global+markets+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+energy+sector+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+volatility+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+futures+after:2022-01-01+before:2022-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.go

In [214]:
import feedparser
import pandas as pd
import time
import os

# 1) Create/ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (e.g., full 2019)
start_date = "2023-01-01"
end_date   = "2023-12-31"  # "before:2020-01-01" covers 2019

# 3) Expanded synonyms for WTI
wti_synonyms = [
    f"WTI+crude+oil+after:{start_date}+before:{end_date}",
    f"WTI+oil+price+after:{start_date}+before:{end_date}",
    f"WTI+oil+market+after:{start_date}+before:{end_date}",
    f"WTI+global+markets+after:{start_date}+before:{end_date}",
    f"WTI+energy+sector+after:{start_date}+before:{end_date}",
    f"WTI+volatility+after:{start_date}+before:{end_date}",
    f"WTI+futures+after:{start_date}+before:{end_date}",
    f"WTI+analysis+after:{start_date}+before:{end_date}",
    f"WTI+production+after:{start_date}+before:{end_date}"
]

# 4) Parse all feeds
all_news = []

for term in wti_synonyms:
    rss_url = f"https://news.google.com/rss/search?q={term}&hl=en-US&gl=US&ceid=US:en"
    print(f"Fetching WTI news from: {rss_url}")
    feed = feedparser.parse(rss_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title": entry.get("title", ""),
            "summary": entry.get("summary", ""),
            "link": entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_wti = pd.DataFrame(all_news)

# 6) Save to CSV
csv_path = os.path.join(FDS_FOLDER, "WTI_News_2023.csv")
df_wti.to_csv(csv_path, index=False)

print(f"\nSaved WTI news to {csv_path}")
print("Article count:", len(df_wti))
print(df_wti.head())


Fetching WTI news from: https://news.google.com/rss/search?q=WTI+crude+oil+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+oil+price+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+oil+market+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+global+markets+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+energy+sector+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+volatility+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+futures+after:2023-01-01+before:2023-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.go

In [216]:
import feedparser
import pandas as pd
import time
import os

# 1) Create/ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (e.g., full 2019)
start_date = "2024-01-01"
end_date   = "2024-12-31"  # "before:2020-01-01" covers 2019

# 3) Expanded synonyms for WTI
wti_synonyms = [
    f"WTI+crude+oil+after:{start_date}+before:{end_date}",
    f"WTI+oil+price+after:{start_date}+before:{end_date}",
    f"WTI+oil+market+after:{start_date}+before:{end_date}",
    f"WTI+global+markets+after:{start_date}+before:{end_date}",
    f"WTI+energy+sector+after:{start_date}+before:{end_date}",
    f"WTI+volatility+after:{start_date}+before:{end_date}",
    f"WTI+futures+after:{start_date}+before:{end_date}",
    f"WTI+analysis+after:{start_date}+before:{end_date}",
    f"WTI+production+after:{start_date}+before:{end_date}"
]

# 4) Parse all feeds
all_news = []

for term in wti_synonyms:
    rss_url = f"https://news.google.com/rss/search?q={term}&hl=en-US&gl=US&ceid=US:en"
    print(f"Fetching WTI news from: {rss_url}")
    feed = feedparser.parse(rss_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title": entry.get("title", ""),
            "summary": entry.get("summary", ""),
            "link": entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_wti = pd.DataFrame(all_news)

# 6) Save to CSV
csv_path = os.path.join(FDS_FOLDER, "WTI_News_2024.csv")
df_wti.to_csv(csv_path, index=False)

print(f"\nSaved WTI news to {csv_path}")
print("Article count:", len(df_wti))
print(df_wti.head())


Fetching WTI news from: https://news.google.com/rss/search?q=WTI+crude+oil+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+oil+price+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+oil+market+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+global+markets+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+energy+sector+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+volatility+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+futures+after:2024-01-01+before:2024-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.go

In [218]:
import feedparser
import pandas as pd
import time
import os

# 1) Create/ensure the "FDS project" folder
FDS_FOLDER = "FDS project"
os.makedirs(FDS_FOLDER, exist_ok=True)

# 2) Define your date range (e.g., full 2019)
start_date = "2025-01-01"
end_date   = "2025-12-31"  # "before:2020-01-01" covers 2019

# 3) Expanded synonyms for WTI
wti_synonyms = [
    f"WTI+crude+oil+after:{start_date}+before:{end_date}",
    f"WTI+oil+price+after:{start_date}+before:{end_date}",
    f"WTI+oil+market+after:{start_date}+before:{end_date}",
    f"WTI+global+markets+after:{start_date}+before:{end_date}",
    f"WTI+energy+sector+after:{start_date}+before:{end_date}",
    f"WTI+volatility+after:{start_date}+before:{end_date}",
    f"WTI+futures+after:{start_date}+before:{end_date}",
    f"WTI+analysis+after:{start_date}+before:{end_date}",
    f"WTI+production+after:{start_date}+before:{end_date}"
]

# 4) Parse all feeds
all_news = []

for term in wti_synonyms:
    rss_url = f"https://news.google.com/rss/search?q={term}&hl=en-US&gl=US&ceid=US:en"
    print(f"Fetching WTI news from: {rss_url}")
    feed = feedparser.parse(rss_url)
    time.sleep(2)  # Sleep to avoid hitting rate limits

    for entry in feed.entries:
        all_news.append({
            "published_date": entry.get("published", ""),
            "title": entry.get("title", ""),
            "summary": entry.get("summary", ""),
            "link": entry.get("link", "")
        })

# 5) Convert to DataFrame & remove duplicates
df_wti = pd.DataFrame(all_news)
# 6) Save to CSV
csv_path = os.path.join(FDS_FOLDER, "WTI_News_2025.csv")
df_wti.to_csv(csv_path, index=False)

print(f"\nSaved WTI news to {csv_path}")
print("Article count:", len(df_wti))
print(df_wti.head())


Fetching WTI news from: https://news.google.com/rss/search?q=WTI+crude+oil+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+oil+price+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+oil+market+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+global+markets+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+energy+sector+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+volatility+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.google.com/rss/search?q=WTI+futures+after:2025-01-01+before:2025-12-31&hl=en-US&gl=US&ceid=US:en
Fetching WTI news from: https://news.go