# Download all of the announcements

## 1. FOMC Statement Download

In [None]:
import os
import requests
import pandas as pd
from datetime import datetime

# Set the path to save the downloaded files
download_folder = "raw_data/announcements"
if not os.path.exists(download_folder):
    os.makedirs(download_folder)

# Function to download the PDF
def download_pdf(url, date_str):
    try:
        response = requests.get(url)
        response.raise_for_status()
        filename = f"monetary{date_str}a1.pdf"
        filepath = os.path.join(download_folder, filename)
        with open(filepath, 'wb') as f:
            f.write(response.content)
        print(f"Downloaded PDF for {date_str}")
    except requests.exceptions.RequestException as e:
        print(f"Failed to download {url}: {e}")

# Function to download the HTML file
def download_html(url, date_str):
    try:
        response = requests.get(url)
        response.raise_for_status()
        filename = f"monetary{date_str}a.html"
        filepath = os.path.join(download_folder, filename)
        with open(filepath, 'wb') as f:
            f.write(response.content)
        print(f"Downloaded HTML for {date_str}")
    except requests.exceptions.RequestException as e:
        print(f"Failed to download {url}: {e}")

# Function to convert the date format from 'MM/DD/YY' to 'YYYYMMDD'
def convert_date_format(date_str):
    try:
        # Convert 'MM/DD/YY' to 'YYYYMMDD'
        date_obj = datetime.strptime(date_str, "%m/%d/%y")
        return date_obj.strftime("%Y%m%d")
    except ValueError as e:
        print(f"Skipping invalid date: {date_str}")
        return None

# Read the CSV file
csv_file_path = 'dates/fomc_statements.csv'  # Replace with the actual CSV file path
df = pd.read_csv(csv_file_path)

# Loop over the dates in the CSV and download corresponding files
for index, row in df.iterrows():
    original_date = row['Date']  # Assuming 'Date' column contains the dates in 'MM/DD/YY' format
    date_str = convert_date_format(original_date)
    
    if date_str:
        year = date_str[:4]
        month = int(date_str[4:6])
        day = int(date_str[6:8])
        
        # Adjust the URL formats based on the specific conditions:
        if int(year) < 2002 or (int(year) == 2002 and month < 3) or (int(year) == 2002 and month == 3 and day <= 19):
            # For dates from Jan 2000 to Mar 19th, 2002, use the 'general' format
            html_url = f"https://www.federalreserve.gov/boarddocs/press/general/{year}/{date_str}/"
        elif int(year) < 2006:
            # For dates from Mar 20th, 2002 to Dec 31st, 2005, use the 'monetary' format
            html_url = f"https://www.federalreserve.gov/boarddocs/press/monetary/{year}/{date_str}/default.htm"
        else:
            # For dates from Jan 1st, 2006 onwards, use the 'newsevents' format
            html_url = f"https://www.federalreserve.gov/newsevents/pressreleases/monetary{date_str}a.htm"
        
        # PDF URL is consistent across all dates
        pdf_url = f"https://www.federalreserve.gov/monetarypolicy/files/monetary{date_str}a1.pdf"
        
        # Download the PDF and HTML
        # download_pdf(pdf_url, date_str)  # Uncomment to download PDFs
        download_html(html_url, date_str)

Downloaded HTML for 20000202
Downloaded HTML for 20000321
Downloaded HTML for 20000516
Downloaded HTML for 20000628
Downloaded HTML for 20000822
Downloaded HTML for 20001003
Downloaded HTML for 20001115
Downloaded HTML for 20001219
Downloaded HTML for 20010131
Downloaded HTML for 20010320
Downloaded HTML for 20010515
Downloaded HTML for 20010627
Downloaded HTML for 20010821
Downloaded HTML for 20011002
Downloaded HTML for 20011106
Downloaded HTML for 20011211
Downloaded HTML for 20020130
Downloaded HTML for 20020319
Downloaded HTML for 20020507
Downloaded HTML for 20020626
Downloaded HTML for 20020813
Downloaded HTML for 20020924
Downloaded HTML for 20021106
Downloaded HTML for 20021210
Downloaded HTML for 20030129
Downloaded HTML for 20030318
Downloaded HTML for 20030506
Downloaded HTML for 20030625
Downloaded HTML for 20030812
Downloaded HTML for 20030916
Downloaded HTML for 20031028
Downloaded HTML for 20031209
Downloaded HTML for 20040128
Downloaded HTML for 20040316
Downloaded HTM

## 2. Intermeeting Transcripts download

In [None]:
# alright