# Download all of the announcements

## 1. FOMC Statement Download

In [26]:
import os
import requests
import pandas as pd
from datetime import datetime

download_folder = "raw_data/announcements"
if not os.path.exists(download_folder):
    os.makedirs(download_folder)

def download_pdf(url, date_str):
    try:
        response = requests.get(url)
        response.raise_for_status()
        filename = f"monetary{date_str}a1.pdf"
        filepath = os.path.join(download_folder, filename)
        with open(filepath, 'wb') as f:
            f.write(response.content)
        print(f"Downloaded PDF for {date_str}")
    except requests.exceptions.RequestException as e:
        print(f"Failed to download {url}: {e}")

def download_html(url, date_str):
    try:
        response = requests.get(url)
        response.raise_for_status()
        filename = f"monetary{date_str}a.html"
        filepath = os.path.join(download_folder, filename)
        with open(filepath, 'wb') as f:
            f.write(response.content)
        print(f"Downloaded HTML for {date_str}")
    except requests.exceptions.RequestException as e:
        print(f"Failed to download {url}: {e}")

def convert_date_format(date_str):
    try:
        date_obj = datetime.strptime(date_str, "%m/%d/%y")
        return date_obj.strftime("%Y%m%d")
    except ValueError as e:
        print(f"Skipping invalid date: {date_str}")
        return None

csv_file_path = 'dates/fomc_statements.csv'  
df = pd.read_csv(csv_file_path)

for index, row in df.iterrows():
    original_date = row['Date']
    date_str = convert_date_format(original_date)
    
    if date_str:
        year = date_str[:4]
        month = int(date_str[4:6])
        day = int(date_str[6:8])
        
        if int(year) < 2002 or (int(year) == 2002 and month < 3) or (int(year) == 2002 and month == 3 and day <= 19):
            html_url = f"https://www.federalreserve.gov/boarddocs/press/general/{year}/{date_str}/"
        elif int(year) < 2006:
            html_url = f"https://www.federalreserve.gov/boarddocs/press/monetary/{year}/{date_str}/default.htm"
        else:
            html_url = f"https://www.federalreserve.gov/newsevents/pressreleases/monetary{date_str}a.htm"
        
        pdf_url = f"https://www.federalreserve.gov/monetarypolicy/files/monetary{date_str}a1.pdf"
        
    
        # download_pdf(pdf_url, date_str)  
        download_html(html_url, date_str)

Downloaded HTML for 20000202
Downloaded HTML for 20000321
Downloaded HTML for 20000516
Downloaded HTML for 20000628
Downloaded HTML for 20000822
Downloaded HTML for 20001003
Downloaded HTML for 20001115
Downloaded HTML for 20001219
Downloaded HTML for 20010131
Downloaded HTML for 20010320
Downloaded HTML for 20010515
Downloaded HTML for 20010627
Downloaded HTML for 20010821
Downloaded HTML for 20011002
Downloaded HTML for 20011106
Downloaded HTML for 20011211
Downloaded HTML for 20020130
Downloaded HTML for 20020319
Downloaded HTML for 20020507
Downloaded HTML for 20020626
Downloaded HTML for 20020813
Downloaded HTML for 20020924
Downloaded HTML for 20021106
Downloaded HTML for 20021210
Downloaded HTML for 20030129
Downloaded HTML for 20030318
Downloaded HTML for 20030506
Downloaded HTML for 20030625
Downloaded HTML for 20030812
Downloaded HTML for 20030916
Downloaded HTML for 20031028
Downloaded HTML for 20031209
Downloaded HTML for 20040128
Downloaded HTML for 20040316
Downloaded HTM

## 2. Intermeeting Transcripts download

In [1]:
import os
import requests
import pandas as pd
from datetime import datetime

download_folder = "raw_data/intermeeting"
if not os.path.exists(download_folder):
    os.makedirs(download_folder)

def download_minutes_html(url, date_str):
    try:
        response = requests.get(url)
        response.raise_for_status()
        filename = f"fomcminutes{date_str}.html"
        filepath = os.path.join(download_folder, filename)
        with open(filepath, 'wb') as f:
            f.write(response.content)
        print(f"Downloaded intermeeting minutes HTML for {date_str}")
    except requests.exceptions.RequestException as e:
        print(f"Failed to download {url}: {e}")

def convert_date_format(date_str):
    try:
        date_obj = datetime.strptime(date_str, "%m/%d/%y")
        return date_obj.strftime("%Y%m%d"), date_obj
    except ValueError as e:
        print(f"Skipping invalid date: {date_str}")
        return None, None

# Read the CSV file
csv_file_path = 'dates/fomc_statements.csv'  
df = pd.read_csv(csv_file_path)

for index, row in df.iterrows():
    original_date = row['Date']  
    date_str, date_obj = convert_date_format(original_date)
    
    if date_str:
        cutoff_date = datetime(2007, 10, 30)

        if date_obj <= cutoff_date:
            minutes_url = f"https://www.federalreserve.gov/fomc/minutes/{date_str}.htm"
        else:
            minutes_url = f"https://www.federalreserve.gov/monetarypolicy/fomcminutes{date_str}.htm"
        
        download_minutes_html(minutes_url, date_str)


Downloaded intermeeting minutes HTML for 20000202
Downloaded intermeeting minutes HTML for 20000321
Downloaded intermeeting minutes HTML for 20000516
Downloaded intermeeting minutes HTML for 20000628
Downloaded intermeeting minutes HTML for 20000822
Downloaded intermeeting minutes HTML for 20001003
Downloaded intermeeting minutes HTML for 20001115
Downloaded intermeeting minutes HTML for 20001219
Downloaded intermeeting minutes HTML for 20010131
Downloaded intermeeting minutes HTML for 20010320
Downloaded intermeeting minutes HTML for 20010515
Downloaded intermeeting minutes HTML for 20010627
Downloaded intermeeting minutes HTML for 20010821
Downloaded intermeeting minutes HTML for 20011002
Downloaded intermeeting minutes HTML for 20011106
Downloaded intermeeting minutes HTML for 20011211
Downloaded intermeeting minutes HTML for 20020130
Downloaded intermeeting minutes HTML for 20020319
Downloaded intermeeting minutes HTML for 20020507
Downloaded intermeeting minutes HTML for 20020626
