In [None]:
import urllib.request
import pandas as pd
from zipfile import ZipFile
from io import BytesIO
from http.client import IncompleteRead
import os

# Legacy Call Reports

In [None]:
years = [f"{i:02d}" for i in range(76, 100)] + [f"{i:02d}" for i in range(0, 11)]
quarters = ["03", "06", "09", "12"]

In [None]:
for year in tqdm(years):
    for quarter in quarters:
        url = f"https://www.chicagofed.org/-/media/others/banking/financial-institution-reports/commercial-bank-data/call{year}{quarter}-zip.zip"
        while True:
            try:
                with ZipFile(BytesIO(urllib.request.urlopen(url).read()), "r") as zipObj:
                    zipObj.extractall("./ChicagoFedCallReportsLegacy/")
                    break
            except (IncompleteRead) as e:
                print(e)


In [None]:
legacyCallReports =  os.listdir("./ChicagoFedCallReportsLegacy/")

for file in tqdm(legacyCallReports):
    os.rename(f"./ChicagoFedCallReportsLegacy/{file}", f"./ChicagoFedCallReportsLegacy/call{file[-8:]}".lower())

In [None]:
years = [f"{i:02d}" for i in range(76, 100)] + [f"{i:02d}" for i in range(0, 11)]
quarters = ["03", "06", "09", "12"]

legacyCallReports = [[f"call{year}{quarter}.xpt" for quarter in quarters] for year in years]
yearsNew = [i for i in range(1976, 2011)]
quarterlyFilePath = "./ChicagoFedCallReportsLegacy/quarterly/"

with tqdm(total = len(legacyCallReports)) as pbar:
    for legacyCallReport, yearNew in zip(legacyCallReports, yearsNew):
        df = pd.concat([pd.read_sas(quarterlyFilePath + file, format="xport") for file in legacyCallReport])
        df.to_csv(f"./ChicagoFedCallReportsLegacy/call{yearNew}.csv")
        pbar.update(1)

# Call Reports

In [8]:
years = [f"{i:02d}" for i in range(2001, 2024)]
quarters = ["03/31/", "06/30/", "09/30/", "12/31/"]

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By

driver = webdriver.Edge("C:\Windows\System32\msedgedriver.exe")
driver.get(url="https://cdr.ffiec.gov/public/PWS/DownloadBulkData.aspx")

# Select Multiple Call Reports option
bulkDownloadPanel = driver.find_element(By.ID, "BulkDownloadPanel")
products = bulkDownloadPanel.find_elements(By.TAG_NAME, "select")[0]
products.find_elements(By.TAG_NAME, "option")[0].click()

# Select years
for year in tqdm(years):
    for quarter in quarters:
        bulkDownloadPanel = driver.find_element(By.ID, "BulkDownloadPanel")
        yearSelect = bulkDownloadPanel.find_elements(By.TAG_NAME, "select")[1]
        yearSelect.send_keys(f"{quarter}{year}")

        # Download
        driver.find_element(By.NAME, "ctl00$MainContentHolder$TabStrip1$Download_0").click()


In [2]:
zipsPath = "./ChicagoFedCallReports/zips/"
callReports = os.listdir(zipsPath)

years = [i for i in range(2001, 2024)]
callReports = {year: [file for file in callReports if str(year) in file] for year in years}

In [3]:
def getCols(file, zipObj):
    try:
        df = pd.read_csv(zipObj.open(file), sep = "\t", index_col= "IDRSSD", engine = "c", low_memory = False, on_bad_lines = "skip")
    except:
        df = pd.read_csv(zipObj.open(file), sep = "\t", index_col= "IDRSSD", engine = "python", on_bad_lines = "skip")

    if df.shape[1] > 0:
        df = df[~df.index.duplicated(keep = False)]
        df.index = pd.to_numeric(df.index, errors = "coerce")
        df = df.loc[~df.index.isna()]
        df = df.loc[:,~df.columns.duplicated()]
        return df
    else:
        return None
    
def getQuarter(quarter):

    with ZipFile(zipsPath + quarter) as zipObj:
        files = zipObj.namelist()
        files.remove("Readme.txt")

        df = pd.concat([getCols(file, zipObj) for file in files], axis = 1)
        df = df[~df.index.isna()]
        df = df[~df.index.duplicated(keep = False)]
        df = df.loc[:,~df.columns.duplicated()]
        df["RCON9999"] = df["RCON9999"].value_counts().index[0]

    return df

In [76]:
for year, callReport in tqdm(callReports.items()):
    df = pd.concat([getQuarter(quarter) for quarter in callReport], axis = 0, join = "outer", sort = False)
    df.to_csv(f"./ChicagoFedCallReports/call{year}.csv")

100%|██████████| 23/23 [42:54<00:00, 111.92s/it]
