### Get news from regulators RSS feeds

In [1]:
import feedparser
import pandas as pd
from bs4 import BeautifulSoup
import requests # <-- Import requests
from datetime import datetime
import time # Optional: for adding delays between requests
import ssl # <-- Import ssl for potential context modification if needed
import openai
import re

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', 130)

# List of RSS feed URLs
rss_urls = [
    "https://www.bankingsupervision.europa.eu/rss/press.html",
    "https://www.bankingsupervision.europa.eu/rss/pub.html",
    "https://www.bankingsupervision.europa.eu/rss/speeches.html",
    "https://www.ecb.europa.eu/rss/blog.html",
    "https://www.ecb.europa.eu/rss/statpress.html",
    "https://www.ecb.europa.eu/rss/wppub.html",
    "https://ec.europa.eu/newsroom/eba/feed?item_type_id=1642&lang=en",
    "https://www.eba.europa.eu/news-press/news/rss.xml",
    "https://www.bis.org/doclist/rss_all_categories.rss",
    "https://www.federalreserve.gov/feeds/press_all.xml",
    "https://www.bundesbank.de/service/rss/en/633292/feed.rss",
    "https://www.bundesbank.de/service/rss/en/633306/feed.rss",
    "https://www.dnb.nl/en/rss/16451/6882",
    "https://www.bankofengland.co.uk/rss/statistics",
    "https://www.bankofengland.co.uk/rss/news",
    "https://www.bankofengland.co.uk/rss/prudential-regulation-publications",
    "https://www.bankofengland.co.uk/rss/publications",
    "https://www.rba.gov.au/rss/rss-cb-rdp.xml",
    "https://www.rba.gov.au/rss/rss-cb-speeches.xml",
    "https://www.rba.gov.au/rss/rss-cb-media-releases.xml",
    "https://www.bankofcanada.ca/content_type/technical-reports/feed/",
]

def clean_html(html_content):
    """Removes HTML tags from a string."""
    if not html_content:
        return ""
    # Use 'html.parser' for basic HTML cleaning, 'lxml' is faster if installed
    soup = BeautifulSoup(html_content, 'html.parser')
    # Get text, replace multiple spaces/newlines with a single space, strip ends
    text = soup.get_text(separator=' ', strip=True)
    return ' '.join(text.split()) # Normalize whitespace

def parse_date(entry):
    """Attempts to parse the date from various potential fields."""
    date_obj = None
    # feedparser standardizes published date into published_parsed (if possible)
    # Handles <pubDate>, <dc:date>, etc.
    if hasattr(entry, 'published_parsed') and entry.published_parsed:
        try:
            # struct_time to datetime
            date_obj = datetime(*entry.published_parsed[:6])
        except (ValueError, TypeError):
            pass # Ignore parsing errors here, try next method

    # Fallback to 'published' string if 'published_parsed' failed or missing
    if not date_obj and hasattr(entry, 'published') and entry.published:
        try:
            # Use pandas to handle various string formats robustly
            date_obj = pd.to_datetime(entry.published, errors='coerce')
        except Exception:
             pass # Ignore pandas parsing errors

    # Fallback to 'updated_parsed' or 'updated' if 'published' is missing
    if not date_obj and hasattr(entry, 'updated_parsed') and entry.updated_parsed:
         try:
             date_obj = datetime(*entry.updated_parsed[:6])
         except (ValueError, TypeError):
             pass
    if not date_obj and hasattr(entry, 'updated') and entry.updated:
        try:
            date_obj = pd.to_datetime(entry.updated, errors='coerce')
        except Exception:
             pass

    return date_obj # Returns datetime object or None

def fetch_and_parse_feeds(urls):
    """Fetches RSS feeds using requests, parses them, and returns a list of dictionaries."""
    all_items = []
    # Set a user agent to be polite to servers
    headers = {'User-Agent': 'MyRSSConsolidator/1.0 (+http://example.com)'}

    for url in urls:
        print(f"Fetching feed via requests: {url}...")
        try:
            # --- Use requests to fetch the content ---
            response = requests.get(url, headers=headers, timeout=20) # Increased timeout
            response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)

            # --- Parse the fetched content using feedparser ---
            # Pass response.content (bytes) which feedparser handles
            feed = feedparser.parse(response.content)

            # Check for feedparser-specific errors (e.g., malformed XML)
            # feedparser is generally tolerant, but good to check
            if feed.bozo:
                # Log the warning but continue if possible, as feedparser might still extract data
                print(f"  Warning: Feed XML may be malformed - {feed.bozo_exception}")

            source_name = feed.feed.get('title', url)

            if not feed.entries:
                 print(f"  No entries found in parsed feed content: {url}")
                 continue # Skip to the next URL

            print(f"  Found {len(feed.entries)} entries from '{source_name}'")

            for entry in feed.entries:
                # Extract data, providing defaults for missing fields
                title = entry.get('title', 'N/A')
                link = entry.get('link', 'N/A')
                published_date = parse_date(entry)

                # Get description (could be in 'summary' or 'description')
                # feedparser often normalizes this to entry.summary
                description_html = entry.get('summary', entry.get('description', ''))
                description_clean = clean_html(description_html)

                item_data = {
                    'title': title,
                    'link': link,
                    'published_date': published_date,
                    'description': description_clean,
                    'source_feed': source_name, # Add the source
                    'source_url': url          # Add the original URL for reference
                }
                all_items.append(item_data)

            # Add a small delay to avoid overwhelming servers
            time.sleep(0.5)

        # --- Handle potential errors ---
        except requests.exceptions.RequestException as e:
            # Handles connection errors, timeouts, HTTP errors (4xx, 5xx), etc.
            print(f"  Error fetching {url}: {e}")
        except Exception as e:
            # Catch other unexpected errors during parsing or processing
            print(f"  An unexpected error occurred processing {url}: {e}")

    return all_items

# --- Main Execution ---
if __name__ == "__main__":
    print("Starting RSS feed consolidation...")
    feed_items = fetch_and_parse_feeds(rss_urls)
    
    if not feed_items:
        print("No items were successfully fetched from any feed.")
    else:
        # Create DataFrame
        df = pd.DataFrame(feed_items)

        # Convert 'published_date' column to datetime objects (if not already)
        df['published_date'] = pd.to_datetime(df['published_date'], errors='coerce')

        df = df.sort_values(by='published_date', ascending=False, na_position='last')
        df = df.reset_index(drop=True)

        print(f"\nSuccessfully created DataFrame with {len(df)} items.")

Starting RSS feed consolidation...
Fetching feed via requests: https://www.bankingsupervision.europa.eu/rss/press.html...
  Found 15 entries from 'ECB - European Central Bank'
Fetching feed via requests: https://www.bankingsupervision.europa.eu/rss/pub.html...
  Found 15 entries from 'ECB - European Central Bank'
Fetching feed via requests: https://www.bankingsupervision.europa.eu/rss/speeches.html...
  Found 15 entries from 'ECB - European Central Bank'
Fetching feed via requests: https://www.ecb.europa.eu/rss/blog.html...
  Found 15 entries from 'ECB - European Central Bank'
Fetching feed via requests: https://www.ecb.europa.eu/rss/statpress.html...
  Found 15 entries from 'ECB - European Central Bank'
Fetching feed via requests: https://www.ecb.europa.eu/rss/wppub.html...
  Found 15 entries from 'ECB - European Central Bank'
Fetching feed via requests: https://ec.europa.eu/newsroom/eba/feed?item_type_id=1642&lang=en...
  Found 100 entries from 'EBA external communications'
Fetching 

In [2]:
# Review results
cols = ['title', 'published_date', 'source_feed', 'source_url']
df[cols].sort_values('published_date', ascending=False).drop_duplicates(subset=['published_date', 'title']).head(5)

Unnamed: 0,title,published_date,source_feed,source_url
0,"The risk sensitivity of global liquidity flows: Heterogeneity, evolution and drivers",2025-04-22 12:00:00,All categories,https://www.bis.org/doclist/rss_all_categories.rss
1,Federal Treasury notes – Auction result,2025-04-22 09:45:00,Latest,https://www.bundesbank.de/service/rss/en/633306/feed.rss
2,SS1/25 – Step-in Risk,2025-04-22 09:00:00,Prudential regulation,https://www.bankofengland.co.uk/rss/prudential-regulation-publications
3,PS5/25 – Identification and management of step-in risk,2025-04-22 09:00:00,Prudential regulation,https://www.bankofengland.co.uk/rss/prudential-regulation-publications
4,Consistent commercial real estate market indicators: Methodology and an application to the German office market | Discussion p...,2025-04-22 08:00:00,Discussion Papers,https://www.bundesbank.de/service/rss/en/633292/feed.rss


In [3]:
# --- Configuration ---
HEADINGS_COLUMN_NAME = 'title'
N_TOP_HEADINGS = 10

# --- Initialize OpenAI Client (Requires openai>=1.0.0) ---
client = openai.OpenAI()

# --- Function to Format Headings for OpenAI ---
def format_headings_for_prompt(dataframe: pd.DataFrame, col_name: str) -> str:
    """Formats DataFrame headings with indices for the OpenAI prompt."""
    return "\n".join([f"{index}: {row[col_name]}" for index, row in dataframe.iterrows()])

# --- Function to Call OpenAI API ---
def get_relevant_indices_from_openai(formatted_headings: str, n_results: int) -> list[int]:
    """Sends formatted headings to OpenAI and returns a list of top relevant indices."""
    system_prompt = f"""
You are an expert financial analyst specializing in credit risk.
Analyze the provided list of headlines (index: headline).
Identify headlines most relevant to credit risk and credit risk modelling. Look for words like 
credit risk risk, models, risk assessments, guidelines, fines. Also include impact from AI and Climate Change,
but in this case be very selective as these key words are common. Prioritise articles that
cover the whole system as opposed to individual banks.

Return ONLY a comma-separated list of the indices for the top {n_results} most relevant headlines.

Example output: 15, 42, 3, 88, 101, 5, 23, 9, 67, 12
"""
    completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": formatted_headings}
        ],
        temperature=0.1, # Low temp for deterministic output
        max_tokens=100,
    )
    raw_response = completion.choices[0].message.content.strip()

    # Parse the response - find all numbers and convert to int
    indices_str = re.findall(r'\d+', raw_response)
    indices = [int(idx) for idx in indices_str][:n_results] # Take only up to n_results
    return indices

# --- Main Execution ---
formatted_prompt_text = format_headings_for_prompt(df.head(200), HEADINGS_COLUMN_NAME)
top_indices = get_relevant_indices_from_openai(formatted_prompt_text, N_TOP_HEADINGS)

cols = ['title', 'link' , 'published_date']
df.loc[top_indices, cols].sample(N_TOP_HEADINGS).reset_index(drop=True)

Unnamed: 0,title,link,published_date
0,Federal Reserve Board requests comment on a proposal to reduce the volatility of the capital requirements stemming from the Bo...,https://www.federalreserve.gov/newsevents/pressreleases/bcreg20250417a.htm,2025-04-17 21:30:00
1,Financial Stability in Focus: Artificial intelligence in the financial system,https://www.bankofengland.co.uk/financial-stability-in-focus/2025/april-2025,2025-04-09 09:30:00
2,The EBA publishes its annual assessment of banks’ internal approaches for the calculation of capital requirements,https://www.eba.europa.eu/publications-and-media/press-releases/eba-publishes-its-annual-assessment-banks-internal-approaches-...,2025-04-04 12:00:00
3,ECB sanctions SEB Baltics for breaching ECB requirements on internal models,https://www.bankingsupervision.europa.eu//press/pr/date/2025/html/ssm.pr250418~21f6067cbc.en.html,2025-04-18 16:00:00
4,Systemic Risk Survey Results - 2025 H1,https://www.bankofengland.co.uk/systemic-risk-survey/2025/2025-h1,2025-04-09 09:30:00
5,Credit Conditions Survey - 2025 Q1,https://www.bankofengland.co.uk/credit-conditions-survey/2025/2025-q1,2025-04-17 08:30:00
6,The EBA updates list of indicators used to perform risk assessments,https://www.eba.europa.eu/publications-and-media/press-releases/eba-updates-list-indicators-used-perform-risk-assessments,2025-04-16 13:15:39
7,Stress testing the UK banking system: Guidance on the 2025 stress test for participants,https://www.bankofengland.co.uk/stress-testing/2025/guidance-for-participants,2025-03-24 10:00:00
8,Stress testing the UK banking system: Key elements of the 2025 Bank Capital stress test,https://www.bankofengland.co.uk/stress-testing/2025/key-elements-bank-capital,2025-03-24 10:00:00
9,Bank of England launches the 2025 Bank Capital Stress Test,https://www.bankofengland.co.uk/news/2025/march/boe-launches-the-2025-bank-capital-stress-test,2025-03-24 10:00:00


In [4]:
df

Unnamed: 0,title,link,published_date,description,source_feed,source_url
0,"The risk sensitivity of global liquidity flows: Heterogeneity, evolution and drivers",https://www.bis.org/publ/work1262.htm,2025-04-22 12:00:00,"by Stefan Avdjiev, Leonardo Gambacorta, Linda S Goldberg, Stefano Schiaffi The period after the Global Financial Crisis (GFC) ...",All categories,https://www.bis.org/doclist/rss_all_categories.rss
1,Federal Treasury notes – Auction result,https://www.bundesbank.de/en/press/press-releases/federal-securities/federal-treasury-notes-auction-result-955954,2025-04-22 09:45:00,,Latest,https://www.bundesbank.de/service/rss/en/633306/feed.rss
2,SS1/25 – Step-in Risk,https://www.bankofengland.co.uk/prudential-regulation/publication/2025/april/step-in-risk-supervisory-statement,2025-04-22 09:00:00,Supervisory statement 1/25,Prudential regulation,https://www.bankofengland.co.uk/rss/prudential-regulation-publications
3,PS5/25 – Identification and management of step-in risk,https://www.bankofengland.co.uk/prudential-regulation/publication/2025/april/step-in-risk-shadow-banking-entities-and-groups-o...,2025-04-22 09:00:00,Policy statement 5/25,Prudential regulation,https://www.bankofengland.co.uk/rss/prudential-regulation-publications
4,Consistent commercial real estate market indicators: Methodology and an application to the German office market | Discussion p...,https://www.bundesbank.de/en/publications/research/discussion-papers/consistent-commercial-real-estate-market-indicators-metho...,2025-04-22 08:00:00,Non-technical summary Research Question The need for information on current developments in the commercial real estate markets...,Discussion Papers,https://www.bundesbank.de/service/rss/en/633292/feed.rss
5,Announcement – Federal Treasury discount paper (Bubills),https://www.bundesbank.de/en/press/press-releases/federal-securities/announcement-federal-treasury-discount-paper-bubills--955940,2025-04-22 07:40:00,,Latest,https://www.bundesbank.de/service/rss/en/633306/feed.rss
6,Invitation to bid by auction – Reopening 10-year Federal bond,https://www.bundesbank.de/en/press/press-releases/federal-securities/invitation-to-bid-by-auction-reopening-10-year-federal-bo...,2025-04-22 07:30:00,,Latest,https://www.bundesbank.de/service/rss/en/633306/feed.rss
7,Investor sentiment and dynamic connectedness in European markets: insights from the covid-19 and Russia-Ukraine conflict,https://www.ecb.europa.eu//pub/pdf/scpwps/ecb.wp3050~eb33ab7ed4.en.pdf,2025-04-21 09:00:00,The primary objective of this study is to explore the dynamic relationships between equity returns or volatility and sentiment...,ECB - European Central Bank,https://www.ecb.europa.eu/rss/wppub.html
8,ECB sanctions SEB Baltics for breaching ECB requirements on internal models,https://www.bankingsupervision.europa.eu//press/pr/date/2025/html/ssm.pr250418~21f6067cbc.en.html,2025-04-18 16:00:00,,ECB - European Central Bank,https://www.bankingsupervision.europa.eu/rss/press.html
9,Federal Reserve Board announces approval of application by Capital One Financial Corporation to merge with Discover Financial ...,https://www.federalreserve.gov/newsevents/pressreleases/orders20250418a.htm,2025-04-18 15:30:00,Federal Reserve Board announces approval of application by Capital One Financial Corporation to merge with Discover Financial ...,FRB: Press Release - All Releases,https://www.federalreserve.gov/feeds/press_all.xml
