# BLS

In [13]:
!pip install -r requirements.txt

Collecting beautifulsoup4 (from -r requirements.txt (line 3))
  Downloading beautifulsoup4-4.14.3-py3-none-any.whl.metadata (3.8 kB)
Collecting soupsieve>=1.6.1 (from beautifulsoup4->-r requirements.txt (line 3))
  Downloading soupsieve-2.8.1-py3-none-any.whl.metadata (4.6 kB)
Downloading beautifulsoup4-4.14.3-py3-none-any.whl (107 kB)
Downloading soupsieve-2.8.1-py3-none-any.whl (36 kB)
Installing collected packages: soupsieve, beautifulsoup4

   -------------------- ------------------- 1/2 [beautifulsoup4]
   ---------------------------------------- 2/2 [beautifulsoup4]

Successfully installed beautifulsoup4-4.14.3 soupsieve-2.8.1


## Fetching and Parsing a BLS RSS Feed

In [None]:
import feedparser
import datetime

rss_url = "https://www.bls.gov/feed/bls_latest.rss"  # e.g., Latest Numbers feed
feed = feedparser.parse(rss_url)

for entry in feed.entries:
    title = entry.title            # e.g. "Employment Situation"
    published = entry.published    # e.g. "Fri, 09 Jan 2026 08:30:00 -0500"
    link = entry.link              # URL to detailed news release
    summary = entry.summary        # HTML summary of the release
    print(f"Release: {title}, Date: {published}")
    # Here you could compare published date with last seen date to identify new items


In [None]:
import requests
import feedparser

URL = "https://www.bls.gov/feed/bls_latest.rss"

headers = {
    # A boring, common UA helps a lot vs default python-requests signatures
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
                  "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
    "Accept": "application/rss+xml, application/xml;q=0.9, */*;q=0.8",
    "Accept-Language": "en-US,en;q=0.9",
    "Referer": "https://www.bls.gov/",
}

resp = requests.get(URL, headers=headers, timeout=20)
resp.raise_for_status()  # will raise on 403

feed = feedparser.parse(resp.content)
print("bozo:", feed.bozo)
print("entries:", len(feed.entries))


In [None]:
feed

In [None]:
from bs4 import BeautifulSoup
from urllib.parse import urlparse, parse_qs

def parse_latest_numbers_summary(html: str):
    """
    Parse the latest numbers summary from the BLS RSS feed.
    Args:
        html: The HTML content of the latest numbers summary.
    Returns:
        A list of dictionaries, each containing the following keys:
            - label: The label of the indicator.
            - latest_text: The latest text of the indicator.
            - data_definition: The data definition of the indicator.
            - series_id: The series ID of the indicator.
            - news_release_url: The URL of the news release.
            - historical_url: The URL of the historical data.

    Usage:
        ```python
        html = feed['entries'][0]['summary']
        data = parse_latest_numbers_summary(html)
        ```
    """
    soup = BeautifulSoup(html, "html.parser")

    out = []
    # Each indicator appears to be in its own <p> block
    for p in soup.find_all("p"):
        # label is the text up to ":" usually
        raw_text = p.get_text(" ", strip=True)
        if ":" not in raw_text:
            continue
        label = raw_text.split(":", 1)[0].strip()

        data_span = p.find("span", class_="data")
        latest_text = data_span.get_text(" ", strip=True) if data_span else None
        title_attr = data_span.get("title") if data_span else None

        news_a = p.find("a", string=lambda s: s and "News Release" in s)
        news_url = news_a["href"] if news_a else None

        hist_a = p.find("a", string=lambda s: s and "Historical Data" in s)
        hist_url = hist_a["href"] if hist_a else None

        series_id = None
        if hist_url:
            qs = parse_qs(urlparse(hist_url).query)
            series_id = (qs.get("series_id") or [None])[0]

        if latest_text or series_id or news_url:
            out.append({
                "label": label,
                "latest_text": latest_text,
                "data_definition": title_attr,   # what the number means
                "series_id": series_id,
                "news_release_url": news_url,
                "historical_url": hist_url,
            })

    return out

In [22]:
# parse_latest_numbers_summary(feed)
parse_latest_numbers_summary(feed['entries'][0]['summary'])

[{'label': 'Unemployment Rate',
  'latest_text': '4.4%  in Dec 2025',
  'data_definition': 'In percent, seasonally adjusted',
  'series_id': 'LNS14000000',
  'news_release_url': 'https://www.bls.gov/news.release/empsit.toc.htm',
  'historical_url': 'https://data.bls.gov/pdq/SurveyOutputServlet?data_tool=latest_numbers&series_id=LNS14000000'},
 {'label': 'Payroll Employment',
  'latest_text': '+50,000(p)  in Dec 2025',
  'data_definition': 'Number of jobs, 1-month net change, seasonally adjusted',
  'series_id': 'CES0000000001',
  'news_release_url': 'https://www.bls.gov/news.release/empsit.toc.htm',
  'historical_url': 'https://data.bls.gov/pdq/SurveyOutputServlet?data_tool=latest_numbers&series_id=CES0000000001&output_view=net_1mth'},
 {'label': 'Average Hourly Earnings',
  'latest_text': '+$0.12(p)  in Dec 2025',
  'data_definition': 'For all employees, seasonally adjusted',
  'series_id': 'CES0500000003',
  'news_release_url': 'https://www.bls.gov/news.release/empsit.toc.htm',
  'hi

## Querying the BLS API for Data

In [None]:
import requests
import json

series_to_get = ["CUSR0000SA0", "LNS14000000"]  # CPI-U all items, and unemployment rate series IDs
payload = {
    "seriesid": series_to_get,
    "startyear": "2025", 
    "endyear": "2026"
    # You can include your "registrationkey": "YOUR_API_KEY_HERE" if required by v2.
}
response = requests.post("https://api.bls.gov/publicAPI/v2/timeseries/data/", 
                         json=payload, timeout=10)
data = response.json()  # parse JSON response to dict

if data.get("status") == "REQUEST_SUCCEEDED":
    series_list = data["Results"]["series"]
    for series in series_list:
        sid = series["seriesID"]
        print(f"Data for series {sid}:")
        for item in series["data"]:
            year = item["year"]; period = item["period"]; value = item["value"]
            print(f" {year}-{period}: {value}")
else:
    print("API request failed or returned an error:", data.get("message"))


## Scheduling the Polling Function

In [None]:
import schedule
import time

def poll_bls():
    print("Polling BLS for updates...")
    # Here you would call the RSS fetching function and/or API fetching function.
    # e.g., check feeds, download new data, save to files.
    # For demonstration, just printing timestamp.
    import datetime; print("Polled at", datetime.datetime.now())

# Schedule the poll_bls job every 6 hours
schedule.every(6).hours.do(poll_bls)

# Keep the script running to execute scheduled tasks
while True:
    schedule.run_pending()
    time.sleep(60)  # sleep a bit between checks


## Storing Data to CSV

In [None]:
import csv

new_data = {"series": "Unemployment Rate", "date": "2025-12", "value": 4.4}
csv_file = "unemployment_rate.csv"

# If file does not exist, write header
import os
file_exists = os.path.isfile(csv_file)
with open(csv_file, mode='a', newline='') as f:
    writer = csv.writer(f)
    if not file_exists:
        writer.writerow(["series", "date", "value"])
    writer.writerow([new_data["series"], new_data["date"], new_data["value"]])


## CLI Argument Parsing (using argparse)

In [None]:
import argparse

parser = argparse.ArgumentParser(description="BLS Data Polling CLI")
parser.add_argument("--once", action="store_true", help="Run a single poll and exit")
parser.add_argument("--use-api", action="store_true", help="Fetch data via API calls (default uses RSS triggers)")
args = parser.parse_args()

if args.once:
    # Run one iteration of polling
    poll_bls()  # assume this function encapsulates one full poll cycle
else:
    # Run continuous schedule
    if args.use_api:
        configure_mode(api_only=True)
    start_scheduler()  # assume this sets up schedule as above
