In [4]:
import requests
from bs4 import BeautifulSoup
import re

def parse_nyfed_speech(url):
    r = requests.get(url)
    r.raise_for_status()
    soup = BeautifulSoup(r.text, "html.parser")

    # Title
    title = soup.find("div", class_="ts-title")
    title = title.get_text(strip=True) if title else ""

    # Subtitle or date
    date_block = soup.find("div", class_="ts-contact-info")
    date_text = date_block.get_text(" ", strip=True) if date_block else ""

    # ---- MAIN CONTENT HERE ----
    article_div = soup.find("div", class_="ts-article-text")
    paragraphs = []

    if article_div:
        for p in article_div.find_all("p"):
            text = p.get_text(" ", strip=True)
            if text:
                paragraphs.append(text)

    full_text = "\n\n".join(paragraphs)

    # Clean whitespace
    full_text = re.sub(r"\n{2,}", "\n\n", full_text).strip()

    return {
        "url": url,
        "title": title,
        "date": date_text,
        "text": full_text,
        "length": len(full_text)
    }

# Test
url = "https://www.newyorkfed.org/newsevents/speeches/2012/dud120106.html"
data = parse_nyfed_speech(url)
print(data["date"])
print(data["text"], "...")



January 6, 2012
In a recent speech 1 I talked about a number of the challenges facing the economy as we seek to secure the recovery and build for the future. Today I will focus on one of these: the problems in the U.S. housing market. In focusing today on housing, I would emphasize that this is only one factor behind the frustratingly slow economic recovery. Nevertheless, it is an important one that deserves our attention. As always, what I have to say today reflects my own views and not necessarily those of the Federal Open Market Committee (FOMC) or the Federal Reserve System.

The New York Fed is deeply committed to contributing to efforts to resolve the housing crisis that still afflicts our nation. Our economists monitor the housing market and analyze its impact on the national economy. My outreach staff work with community groups and housing practitioners to support local programs that aid distressed homeowners. Our lawyers perform pro-bono work for homeowners facing foreclosure

In [2]:
import requests
from bs4 import BeautifulSoup
import re

def parse_board_html(url):
    """Parse Board of Governors (federalreserve.gov) speech HTML."""
    r = requests.get(url)
    r.raise_for_status()
    soup = BeautifulSoup(r.text, "html.parser")

    # ---- Extract title ----
    # Usually first <h3> or <h4>
    title_tag = soup.find(["h3", "h4"])
    title = title_tag.get_text(" ", strip=True) if title_tag else ""

    # ---- Extract date ----
    # Usually appears as <p class="date">
    date_tag = soup.find("p", class_="date")
    if date_tag:
        date = date_tag.get_text(" ", strip=True)
    else:
        # fallback: date is often the FIRST paragraph before the article section
        p_tags = soup.find_all("p")
        date = p_tags[0].get_text(strip=True) if p_tags else ""

    # ---- Extract main speech text ----
    article_div = soup.find("div", id="article")
    paragraphs = []

    if article_div:
        for p in article_div.find_all("p"):
            txt = p.get_text(" ", strip=True)
            if txt:
                paragraphs.append(txt)

    full_text = "\n\n".join(paragraphs)
    full_text = re.sub(r"\n{2,}", "\n\n", full_text).strip()

    return {
        "title": title,
        "date": date,
        "text": full_text,
        "length": len(full_text)
    }

# Test
url = "https://www.federalreserve.gov/newsevents/speech/powell20230825a.htm"
data = parse_board_html(url)

print(data["date"])
print(data["text"], "...")


An official website of the United States Government
August 25, 2023

Chair Jerome H. Powell

At âStructural Shifts in the Global Economy,â an economic policy symposium sponsored by the Federal Reserve Bank of Kansas City, Jackson Hole, Wyoming

Good morning. At last year's Jackson Hole symposium, I delivered a brief, direct message. My remarks this year will be a bit longer, but the message is the same: It is the Fed's job to bring inflation down to our 2 percent goal, and we will do so. We have tightened policy significantly over the past year. Although inflation has moved down from its peakâa welcome developmentâit remains too high. We are prepared to raise rates further if appropriate, and intend to hold policy at a restrictive level until we are confident that inflation is moving sustainably down toward our objective.

Today I will review our progress so far and discuss the outlook and the uncertainties we face as we pursue our dual mandate goals. I will conclude with a sum

In [7]:
import requests
from bs4 import BeautifulSoup
import re
from dateutil import parser

def extract_dallasfed_html(url):
    resp = requests.get(url)
    resp.raise_for_status()
    soup = BeautifulSoup(resp.text, "html.parser")

    # --- Extract DATE ---
    # Example: "June 23, 2016 New York"
    date_box = soup.select_one("div.dal-inline-list")
    raw_date = ""
    iso_date = ""

    if date_box:
        text = date_box.get_text(" ", strip=True)
        # capture: Month DD, YYYY
        m = re.search(r"[A-Za-z]+\s+\d{1,2},\s+\d{4}", text)
        if m:
            raw_date = m.group(0)
            try:
                iso_date = parser.parse(raw_date).date().isoformat()
            except:
                iso_date = ""

    # --- Extract MAIN SPEECH TEXT ---
    main = soup.select_one("div.dal-main-content")
    parts = []

    if main:
        for tag in main.find_all(["h1", "h2", "h3", "p"]):
            t = tag.get_text(" ", strip=True)
            if t:
                parts.append(t)

    full_text = "\n".join(parts)

    return {
        "date": iso_date,
        "raw_date": raw_date,
        "text": full_text,
        "length": len(full_text),
    }


# Example usage
url = "https://www.dallasfed.org/research/economics/2019/0305"
data = extract_dallasfed_html(url)

print(data["date"])        # -> 2016-06-23
print(data["text"])



2019-03-05
Corporate debt as a potential amplifier in a slowdown
March 05, 2019
As a central bank policymaker, I closely monitor various types of excesses and imbalances that may be developing in the economy. One of the areas I monitor is the level and growth of indebtedness in the household, government and corporate sectors. In previous essays , I have commented on the positive impacts of the deleveraging of the household sector since the Great Recession, and I have raised a cautionary flag regarding the growth of U.S. government debt, including a substantial increase in the present value of unfunded entitlements.
The purpose of this essay is to focus on trends in corporate debt growth and credit quality in the U.S. and discuss potential implications for economic conditions and financial stability.
Background on indebtedness in the U.S.
In the decade since the Great Recession, there has been an improvement in household balance sheets. In particular, it is estimated that household debt

In [16]:
import requests
from bs4 import BeautifulSoup
import re
from dateutil import parser

def extract_chicagofed_html(url):
    resp = requests.get(url)
    resp.raise_for_status()
    soup = BeautifulSoup(resp.text, "html.parser")

    # -------- Extract DATE ----------
    raw_date = ""
    iso_date = ""

    date_div = soup.select_one("div.cfedDetail__lastUpdated")
    if date_div:
        txt = date_div.get_text(" ", strip=True)
        # Find mm/dd/yy or mm/dd/yyyy
        m = re.search(r"\b\d{1,2}/\d{1,2}/\d{2,4}\b", txt)
        if m:
            raw_date = m.group(0)
            try:
                iso_date = parser.parse(raw_date).date().isoformat()
            except:
                iso_date = ""

    # -------- Extract TITLE ----------
    title = ""
    title_div = soup.select_one("div.cfedDetail__title h1")
    if title_div:
        title = title_div.get_text(" ", strip=True)

    # -------- Extract TEXT ----------
    paragraphs = []

    # MAIN CONTENT: each section is under cfedContent__body
    for body in soup.select("div.cfedContent__body"):
        # Section heading (h3)
        h = body.find("h3")
        if h:
            paragraphs.append(h.get_text(" ", strip=True))
        # Paragraphs inside cfedContent__text
        for txt_div in body.select("div.cfedContent__text"):
            for p in txt_div.find_all("p"):
                t = p.get_text(" ", strip=True)
                if t:
                    paragraphs.append(t)

    full_text = "\n\n".join(paragraphs)

    return {
        "title": title,
        "date": iso_date,
        "raw_date": raw_date,
        "text": full_text,
        "length": len(full_text)
    }


# Test
url = "https://www.chicagofed.org/publications/speeches/2009/02-11-iowa-speech"
data = extract_chicagofed_html(url)

print("Title:", data["title"])
print("Date:", data["date"])
print("Length:", data["length"])
print(data["text"])




Title: Economic Outlook and Policy Challenges
Date: 2009-11-30
Length: 20723
Introduction

Good afternoon and thank you for inviting me to speak to you today. And thank you, Phelps Hoyt [Vice President of the CFA Society of Iowa] for that kind introduction and arranging my visit.

Currently, we find ourselves in the midst of a serious recession—one that appears headed towards an experience more like the large downturns in the 1970s and 1980s than the moderate contractions of 1990 and 2001. Today I will discuss the events that brought us to this point, the outlook for the economy, and the policy challenges confronting the Fed during these troubling times. I should note that these are, of course, my own views and not necessarily those of my colleagues in the Federal Reserve System.

Background

Over the past 18 months the economy has experienced deteriorating housing markets, large-scale disruptions to our financial services industry, and plummeting consumer and business confidence. Thes

In [17]:
import requests
from bs4 import BeautifulSoup
import re
from dateutil import parser

def extract_clevelandfed_html(url):
    resp = requests.get(url)
    resp.raise_for_status()
    soup = BeautifulSoup(resp.text, "html.parser")

    # -------- Extract TITLE ----------
    title = ""
    title_tag = soup.select_one("div.component_content__title h1")
    if title_tag:
        title = title_tag.get_text(" ", strip=True)

    # -------- Extract DATE ----------
    raw_date = ""
    iso_date = ""

    intro = soup.select_one("div.component_content__intro")
    if intro:
        txt = intro.get_text(" ", strip=True)

        # many Cleveland pages use DD.MM.YYYY or MM.DD.YYYY with dots
        # capture formats like 11.20.2014 or 20.11.2014
        m = re.search(r"\b\d{1,2}\.\d{1,2}\.\d{4}\b", txt)
        if m:
            raw_date = m.group(0)
            try:
                iso_date = parser.parse(raw_date.replace(".", "/")).date().isoformat()
            except:
                iso_date = ""

    # -------- Extract TEXT ----------
    paragraphs = []

    # All main content appears under repeated: <div class="component_content__body">
    for body in soup.select("div.component_content__body"):
        for p in body.find_all("p"):
            t = p.get_text(" ", strip=True)
            if t:
                paragraphs.append(t)

    full_text = "\n\n".join(paragraphs)

    return {
        "title": title,
        "date": iso_date,
        "raw_date": raw_date,
        "text": full_text,
        "length": len(full_text),
    }


# Test
url = "https://www.clevelandfed.org/collections/speeches/2014/sp-20141120-forward-guidance-and-communications"
data = extract_clevelandfed_html(url)

print("Title:", data["title"])
print("Date:", data["date"])
print("Raw date:", data["raw_date"])
print("Length:", data["length"])
print()
print(data["text"][:500])


Title: 
Date: 
Raw date: 
Length: 0




In [19]:
import requests
from bs4 import BeautifulSoup
import re
from dateutil import parser

def extract_clevelandfed_html(url):
    resp = requests.get(url)
    resp.raise_for_status()
    soup = BeautifulSoup(resp.text, "html.parser")

    # ---------- TITLE ----------
    title = ""
    h1 = soup.select_one("main h1")
    if h1:
        title = h1.get_text(" ", strip=True)

    # ---------- DATE (line that starts with XX.XX.XXXX) ----------
    raw_date = ""
    iso_date = ""

    # Find date-like patterns in the first <p> under main
    for p in soup.select("main p"):
        text = p.get_text(" ", strip=True)
        m = re.match(r"\d{1,2}\.\d{1,2}\.\d{4}", text)
        if m:
            raw_date = m.group(0)
            try:
                iso_date = parser.parse(raw_date.replace(".", "/")).date().isoformat()
            except:
                iso_date = ""
            break

    # ---------- SPEECH TEXT ----------
    paragraphs = []

    # All paragraphs under component content__body
    for body in soup.select("div.component.content__body"):
        for p in body.find_all("p"):
            txt = p.get_text(" ", strip=True)
            if txt:
                paragraphs.append(txt)

    full_text = "\n\n".join(paragraphs)

    return {
        "title": title,
        "date": iso_date,
        "raw_date": raw_date,
        "text": full_text,
        "length": len(full_text)
    }


# Test
url = "https://www.clevelandfed.org/collections/speeches/2014/sp-20141120-forward-guidance-and-communications"
data = extract_clevelandfed_html(url)

print("TITLE:", data["title"])
print("DATE:", data["date"])
print("LENGTH:", data["length"])
print(data["text"][:600])


TITLE: Forward Guidance and Communications in U.S. Monetary Policy
DATE: 
LENGTH: 0



In [21]:
import requests
from bs4 import BeautifulSoup
import re
from dateutil import parser

def extract_clevelandfed_html(url):
    resp = requests.get(url)
    resp.raise_for_status()
    soup = BeautifulSoup(resp.text, "html.parser")

    # ---------- TITLE ----------
    title = ""
    h1 = soup.select_one("h1.field-title")
    if h1:
        title = h1.get_text(" ", strip=True)

    # ---------- DATE ----------
    raw_date = ""
    iso_date = ""

    date_tag = soup.select_one("span.field-release-date")
    if date_tag:
        raw_date = date_tag.get_text(" ", strip=True)
        raw_date = raw_date.strip()
        try:
            iso_date = parser.parse(raw_date.replace(".", "/")).date().isoformat()
        except:
            iso_date = ""

    # ---------- MAIN TEXT ----------
    paragraphs = []

    # This is the real speech container
    rich_text_blocks = soup.select("div.component.rich-text div.component-content")

    for block in rich_text_blocks:
        for tag in block.find_all(["p", "h2", "h3"]):
            text = tag.get_text(" ", strip=True)
            if text:
                paragraphs.append(text)

    full_text = "\n\n".join(paragraphs)

    return {
        "title": title,
        "date": iso_date,
        "text": full_text,
        "length": len(full_text)
    }


# Test
url = "https://www.clevelandfed.org/collections/speeches/2014/sp-20141120-forward-guidance-and-communications"
data = extract_clevelandfed_html(url)

print("TITLE:", data["title"])
print("DATE:", data["date"])
print("LENGTH:", data["length"])
print(data["text"][:600])


TITLE: Forward Guidance and Communications in U.S. Monetary Policy
DATE: 2014-11-20
LENGTH: 24617
Good evening and thank you very much for the invitation to speak in the Imperial Business Insights Series. I have learned that this successful series is now in its third year and it has brought speakers to the podium to discuss a wide range of topics in the major themes of finance, innovation, and entrepreneurship. Tonight I will speak about forward guidance and monetary policy communications. I think it is clear that this topic is related to finance, but I submit that it is also related to the two other themes of your series: innovation and entrepreneurship. Since the onset of the 2008 financ


In [23]:
import requests
from bs4 import BeautifulSoup
import re
from dateutil import parser

def extract_philadelphiafed_html(url):
    resp = requests.get(url)
    resp.raise_for_status()
    soup = BeautifulSoup(resp.text, "html.parser")

    # ---------- TITLE ----------
    title = ""
    h1 = soup.select_one("h1")
    if h1:
        title = h1.get_text(" ", strip=True)

    # ---------- DATE ----------
    raw_date = ""
    iso_date = ""

    date_tag = soup.select_one(".article__meta-date")
    if date_tag:
        raw_date = date_tag.get_text(" ", strip=True).strip()

        # normalize weird apostrophe formats (e.g. "02 Jun ’21")
        raw_date_clean = raw_date.replace("’", "'")
        try:
            iso_date = parser.parse(raw_date_clean).date().isoformat()
        except:
            iso_date = ""

    # ---------- SPEECH TEXT ----------
    paragraphs = []

    body = soup.select_one("div.article-body")
    if body:
        for tag in body.find_all(["p", "h2", "h3"]):
            t = tag.get_text(" ", strip=True)
            if t:
                paragraphs.append(t)

    full_text = "\n\n".join(paragraphs)

    return {
        "title": title,
        "date": iso_date,
        "raw_date": raw_date,
        "text": full_text,
        "length": len(full_text),
    }


# Test
url = "https://www.philadelphiafed.org/community-development/workforce-and-economic-development/210602-women-in-housing-and-finance"
data = extract_philadelphiafed_html(url)

print("TITLE:", data["title"])
print("DATE:", data["date"])
print("LENGTH:", data["length"])
print()
print(data["text"])


TITLE: The Economy, Inflation, and Forbearance
DATE: 
LENGTH: 10492

Good afternoon! Thank you so much  for that introduction. It’s great to be here with this group —  and as much as I’d like to have been there in person, it’s pretty great that I  didn’t have to fight the traffic on I-95. I’m really looking forward to our discussion today.

My plan is to talk about where we  are economically and then to share some of the Philadelphia Fed’s research on  housing that I think will be of particular interest to this group. Then we can  open things up for a Q&A.

But before we do any of that, I  need to give you my standard Fed disclaimer: The views I express today are my  own and do not necessarily reflect those of anyone else on the Federal Open  Market Committee or in the Federal Reserve System.

Economic Outlook

Maybe it’s the summer weather or  the fact that the Phillies are a couple of games ahead of the Nationals in the  NL East, but I’m pleased to note that there is a palpable sense

In [1]:
import requests
from bs4 import BeautifulSoup
from dateutil import parser

def extract_stlouisfed_html(url):
    resp = requests.get(url)
    resp.raise_for_status()
    soup = BeautifulSoup(resp.text, "html.parser")

    # ---------- TITLE ----------
    title = ""
    h1 = soup.select_one("div.component.content h1")
    if h1:
        title = h1.get_text(" ", strip=True)

    # ---------- DATE ----------
    raw_date = ""
    iso_date = ""

    # Typically the first <p> after title block is the date
    date_tag = soup.select_one("div.component.content p")
    if date_tag:
        raw_date = date_tag.get_text(" ", strip=True)
        try:
            iso_date = parser.parse(raw_date).date().isoformat()
        except:
            iso_date = ""

    # ---------- MAIN TEXT ----------
    paragraphs = []

    body = soup.select_one("div.field-content div.wrapper")
    if body:
        for tag in body.find_all(["p", "h2", "h3"]):
            t = tag.get_text(" ", strip=True)
            if t:
                paragraphs.append(t)

    full_text = "\n\n".join(paragraphs)

    return {
        "title": title,
        "date": iso_date,
        "raw_date": raw_date,
        "text": full_text,
        "length": len(full_text),
    }


# Test
url = "https://www.stlouisfed.org/on-the-economy/2020/march/bullard-expected-us-macroeconomic-performance-pandemic-adjustment-period"
data = extract_stlouisfed_html(url)

print("TITLE:", data["title"])
print("DATE:", data["date"])
print("LENGTH:", data["length"])
print(data["text"][:600])


TITLE: Expected U.S. Macroeconomic Performance during the Pandemic Adjustment Period
DATE: 
LENGTH: 11988
Introduction Any views expressed are my own and do not necessarily reflect the views of the Federal Open Market Committee.

The coronavirus has the potential to create catastrophic health outcomes in the U.S. I take as a baseline for my analysis Ferguson et al. “Impact of Non-Pharmaceutical Interventions (NPIs) to Reduce COVID-19 Mortality and Healthcare Demand.” Imperial College, COVID-19 Report 9, March 16, 2020. In order to mitigate this, public health officials have recommended a variety of social-distancing policies to slow the spread of the virus. In addition, social interaction has decl


In [2]:
import requests
from bs4 import BeautifulSoup
from dateutil import parser

def extract_bostonfed_html(url):
    resp = requests.get(url)
    resp.raise_for_status()
    soup = BeautifulSoup(resp.text, "html.parser")

    # ---------- TITLE ----------
    title = ""
    t = soup.select_one("div.title-container h1")
    if t:
        title = t.get_text(" ", strip=True)

    # ---------- AUTHOR ----------
    author = ""
    a = soup.select_one("div.author-container")
    if a:
        author = a.get_text(" ", strip=True).replace("By ", "").strip()

    # ---------- DATE ----------
    raw_date = ""
    iso_date = ""

    d = soup.select_one("div.date-container")
    if d:
        raw_date = d.get_text(" ", strip=True)
        try:
            iso_date = parser.parse(raw_date).date().isoformat()
        except:
            iso_date = ""

    # ---------- SPEECH TEXT ----------
    paragraphs = []

    for p in soup.select("div.bodytextlist p"):
        txt = p.get_text(" ", strip=True)
        if txt:
            paragraphs.append(txt)

    full_text = "\n\n".join(paragraphs)

    return {
        "title": title,
        "author": author,
        "date": iso_date,
        "text": full_text,
        "length": len(full_text)
    }


# Test
url = "https://www.bostonfed.org/news-and-events/speeches/opening-remarks-prevention-containment-and-policy-change-ndash-lessons-from-history.aspx"
data = extract_bostonfed_html(url)

print("TITLE:", data["title"])
print("AUTHOR:", data["author"])
print("DATE:", data["date"])
print("LENGTH:", data["length"])
print()
print(data["text"][:600])


TITLE: Opening Remarks: Prevention, Containment, and Policy Change – Lessons from History
AUTHOR: 
DATE: 
LENGTH: 12454

The Federal Reserve Bank of Boston's 54th Economic Conference: " After the Fall - Re-evaluating Supervisory, Regulatory, and Monetary Policy " Chatham, MA

Good afternoon. I'd like to welcome everyone to this conference, which is the Federal Reserve Bank of Boston's 54th economic conference. I'd like to thank my colleagues from the Bank for their work in putting together such a dynamic and timely program.

The decision to move our conference to the fall came temporarily into question when it started snowing this weekend. But the weather on Sunday didn't seem to bother our football team, the Pa
