api from https://banks.data.fdic.gov/docs/#/Structure/searchInstitutions

based on work from
https://doi.org/10.1016/j.ribaf.2017.07.104 - Predicting bank failure: An improvement by implementing a machine-learning approach to classical financial ratios
https://doi.org/10.1016/j.dss.2012.11.015 - Partial Least Square Discriminant Analysis for bankruptcy prediction
https://doi.org/10.1016/j.eswa.2008.01.053 - Effects of feature construction on classification performance: An empirical study in bank failure prediction

In [2]:
import requests
import pandas as pd
from io import StringIO
from urllib.parse import quote_plus

# pd.set_option('display.max_rows', None)

## Construct API call

In [98]:
# function to get data from BankFind API
def getData(url: str, filter: str, fields: str, sortby: str = 'CERT', order: str = 'ASC', n: int = 10000, k: int = 0, suffix: str = '&format=csv&download=false&filename=data_file') -> pd.DataFrame:
    request = requests.get(url + 'filters=' + quote_plus(filter) + '&fields=' + quote_plus(fields) + '&sort_by=' + sortby + '&sort_order=' + order + '&limit=' + str(n) + '&offset=' + str(k) + suffix).text
    return pd.read_csv(StringIO(request))

In [29]:
# reset index and drop redundant columns for bank dataframe
def cleanbankdata(df: pd.DataFrame) -> None:
    df.set_index('CERT', inplace=True)
    df.drop('ID', axis = 1, inplace = True)

### Get lists of all banks 2006-present

In [203]:
n = 1000

In [204]:
# strings for getting lists of active and failed banks
institutionurl = 'https://banks.data.fdic.gov/api/institutions?'
filtersfailed = 'ACTIVE:0 AND DATEUPDT:[\"2006-01-01\" TO \"2023-12-31\"]'
filtersactive = 'ACTIVE:1 AND DATEUPDT:[\"2006-01-01\" TO \"2023-12-31\"]'
bankfields = 'STALP,NAME,ACTIVE,CERT,DATEUPDT'


# get failed banks
failedbanks = getData(institutionurl, filtersfailed, bankfields, n = n)
cleanbankdata(failedbanks)

# get active banks
activebanks = getData(institutionurl, filtersactive, bankfields, n = n)
cleanbankdata(activebanks)

In [196]:
#failedbanks.head()

In [197]:
#activebanks.head()

In [205]:
print(f'number of failed banks: {len(failedbanks)}')
print(f'number of active banks: {len(activebanks)}')

number of failed banks: 1000
number of active banks: 1000


### Get financials of banks

In [206]:
# global information
# financials

# gets financial data for all banks currently active from their year-end report in 2021
financialsurl = 'https://banks.data.fdic.gov/api/financials?'
# filters = ['ACTIVE:1', 'REPYEAR:[2000 TO 2023]']
featureslist =  ['NAME,RISDATE,CERT,REPYEAR,',
                'LNATRESR,ELNLOS,NIM,EAMINTAN,LNLSGRS,NTLNLS,EQ,AVASSETJ'] #,
                # 'RBCT1,EQTOTR,EQV,LNLSNET,LIAB,LIABEQR,LIABEQ,DEP,',
                # 'NIMY,NIMR,NONIXR,PTAXNETINCR,ITAX,',
                # 'ROA,ROE,NETINC,EEFFR,CHBAL']
features = ''.join(featureslist)

featurenames = {'LNATRESR': 'LOAN LOSS RESERVE/GROSS LN&LS',
                'ELNLOS' : 'PROVISIONS FOR LN & LEASE LOSSES',
                'NIM' : 'NET INTEREST INCOME',
                'EAMINTAN' : 'AMORT & IMPAIR LOSS AST',
                'LNLSGRS' : 'LOANS AND LEASES, GROSS',
                'NTLNLS' : 'TOTAL LN&LS NET CHARGE-OFFS',
                'EQ' : 'Equity Capital',
                'AVASSETJ' : 'AVERAGE ASSETS-ADJUSTED-PCA',
                'RBCT1' : 'TIER 1 RBC-PCA',
                'EQTOTR' : 'TOTAL EQUITY CAPITAL RATIO',
                'EQV' : 'BANK EQUITY CAPITAL/ASSETS',
                'LNLSNET' : 'LOANS AND LEASES-NET',
                'CUSLI' : 'CUSTOMERS ACCEPTANCES',
                'LIAB' : 'TOTAL LIABILITIES',
                'LIABEQR' : 'TOTAL LIABILITIES & CAPITAL RATIO',
                'LIABEQ' : 'TOTAL LIABILITIES & CAPITAL',
                'DEP' : 'Total deposits',
                'NIMY' : 'NET INTEREST MARGIN',
                'NIMR' : 'NET INTEREST INCOME RATIO',
                'IOTHFEE' : 'OTHER FEE INCOME',
                'NONIXR' : 'TOTAL NONINTEREST EXPENSE RATIO',
                'PTAXNETINCR' : 'PRE-TAX NET INCOME OPERATING INCOME RATIO',
                'ITAX' : 'APPLICABLE INCOME TAXES',
                'ROA' : 'Return on assets (ROA)',
                'ROE' : 'Return on equity (ROE)',
                'NETINC' : 'Net income',
                'EEFFR' : 'EFFICIENCY RATIO',
                'CHBAL' : 'CASH & DUE FROM DEPOSITORY INST'

                }

#### Active Banks

In [207]:
activefilters = 'RISDATE:20211231'
activefinancials = getData(financialsurl, activefilters, features)

In [208]:
activefinancials.head()

Unnamed: 0,AVASSETJ,CERT,EAMINTAN,ELNLOS,EQ,ID,LNATRESR,LNLSGRS,NAME,NIM,NTLNLS,REPYEAR,RISDATE
0,290403000.0,14,243000.0,-29000.0,27821000.0,14_20211231,0.266454,32654000,STATE STREET BANK&TRUST CO,1996000.0,2000.0,2021,20211231
1,1070188.0,35,0.0,-600.0,100951.0,35_20211231,1.074301,460499,AUBURNBANK,24035.0,79.0,2021,20211231
2,410344.0,39,0.0,300.0,36189.0,39_20211231,1.574606,267178,ROBERTSON BANKING CO,12091.0,-8.0,2021,20211231
3,277748.0,41,0.0,100.0,36064.0,41_20211231,1.498753,74195,PHENIX-GIRARD BANK,6880.0,41.0,2021,20211231
4,384528.0,46,0.0,24.0,35665.0,46_20211231,1.259482,190237,MERCHANTS BANK OF ALABAMA,11151.0,45.0,2021,20211231


In [210]:
print(f'number of financial reports (banks): {len(activefinancials)}')

number of financial reports (banks): 4904


In [211]:
# join bank data with financials
activedata = activebanks.merge(activefinancials, on = 'CERT', how = 'left', suffixes=['_b', '_f'])
# drop NAs
activedata.dropna(inplace= True)

In [212]:
activedata

Unnamed: 0,CERT,ACTIVE,DATEUPDT,NAME_b,STALP,AVASSETJ,EAMINTAN,ELNLOS,EQ,ID,LNATRESR,LNLSGRS,NAME_f,NIM,NTLNLS,REPYEAR,RISDATE
0,14,1,06/05/2023,State Street Bank and Trust Company,MA,290403000.0,243000.0,-29000.0,27821000.0,14_20211231,0.266454,32654000,STATE STREET BANK&TRUST CO,1996000.0,2000.0,2021,20211231
1,35,1,09/02/2022,AuburnBank,AL,1070188.0,0.0,-600.0,100951.0,35_20211231,1.074301,460499,AUBURNBANK,24035.0,79.0,2021,20211231
2,39,1,03/28/2023,Robertson Banking Company,AL,410344.0,0.0,300.0,36189.0,39_20211231,1.574606,267178,ROBERTSON BANKING CO,12091.0,-8.0,2021,20211231
3,41,1,08/31/2022,Phenix-Girard Bank,AL,277748.0,0.0,100.0,36064.0,41_20211231,1.498753,74195,PHENIX-GIRARD BANK,6880.0,41.0,2021,20211231
4,49,1,08/31/2022,Bank of Evergreen,AL,75044.0,0.0,40.0,8994.0,49_20211231,2.295371,26165,BANK OF EVERGREEN,2205.0,13.0,2021,20211231
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,6954,1,08/31/2022,Cayuga Lake National Bank,NY,234006.0,0.0,904.0,21996.0,6954_20211231,1.152082,146691,CAYUGA LAKE NATIONAL BANK,6761.0,38.0,2021,20211231
996,6959,1,08/31/2022,Ballston Spa National Bank,NY,774458.0,0.0,756.0,60741.0,6959_20211231,1.516075,533572,BALLSTON SPA NATIONAL BANK,21626.0,50.0,2021,20211231
997,6976,1,08/31/2022,Dime Community Bank,NY,12212662.0,2622.0,6212.0,1366797.0,6976_20211231,0.906504,9250154,DIME COMMUNITY BANK,366036.0,9276.0,2021,20211231
998,6985,1,08/31/2022,The Canandaigua National Bank and Trust Company,NY,4095595.0,0.0,7416.0,342695.0,6985_20211231,1.073041,3197082,CANANDAIGUA NB&T CO,129119.0,3851.0,2021,20211231


#### Failed banks

In [213]:
# create a new column containing one year prior to the year of failure

failedbanks['prevYr'] = failedbanks.DATEUPDT.str.rsplit('/', expand = True, n = 1)[1].astype(int) - 1
failedbanks['targetdate'] = failedbanks.prevYr * 10000 + 1231


In [214]:
# get all financial data in the selected date range for the banks in the list of failed banks
bankIDstring = ' OR '.join(failedbanks.index.astype(str))
failedfilter = f'CERT:({bankIDstring}) AND REPYEAR:[2005 TO 2023]'
failedfinancials = getData(financialsurl, failedfilter, features)

## TODO: BankFind cuts off at 10k rows, but since there are 72 quarters in the date range, there will be many more entries. need to segment API call and repeat until all data is collected


In [215]:
failedfinancials

Unnamed: 0,AVASSETJ,CERT,EAMINTAN,ELNLOS,EQ,ID,LNATRESR,LNLSGRS,NAME,NIM,NTLNLS,REPYEAR,RISDATE
0,493443,9,0,-215,33724,9_20050331,1.205722,328351,UNION TRUST CO,4050,21,2005,20050331
1,514500,9,0,-215,34610,9_20050630,1.086249,342739,UNION TRUST CO,8301,42,2005,20050630
2,523842,9,0,-215,34179,9_20050930,1.081034,355863,UNION TRUST CO,12425,46,2005,20050930
3,521004,9,0,-215,34053,9_20051231,1.106170,356003,UNION TRUST CO,16340,41,2005,20051231
4,525932,9,0,0,36862,9_20060331,1.104080,364104,UNION TRUST CO,3904,-34,2006,20060331
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,292443,2824,0,1130,38180,2824_20061231,1.209774,214131,FIRST NB SCOTTSBORO ALABAMA,13463,408,2006,20061231
9996,296123,2824,0,190,39457,2824_20070331,1.257171,216872,FIRST NB SCOTTSBORO ALABAMA,3357,26,2007,20070331
9997,289253,2824,0,371,40109,2824_20070630,1.337247,213430,FNB BANK,6770,108,2007,20070630
9998,290625,2824,0,501,41942,2824_20070930,1.296314,224353,FNB BANK,10254,184,2007,20070930


In [216]:
faileddata = failedbanks.merge(failedfinancials, left_on = ['CERT', 'targetdate'], right_on = ['CERT', 'RISDATE'], how = 'left', suffixes=['_b', '_f'])

In [217]:
faileddata

Unnamed: 0,CERT,ACTIVE,DATEUPDT,NAME_b,STALP,prevYr,targetdate,AVASSETJ,EAMINTAN,ELNLOS,EQ,ID,LNATRESR,LNLSGRS,NAME_f,NIM,NTLNLS,REPYEAR,RISDATE
0,9,0,02/27/2008,Union Trust Company,ME,2007,20071231,534469.0,0.0,520.0,31112.0,9_20071231,1.106118,373378.0,UNION TRUST CO,15562.0,402.0,2007.0,20071231.0
1,46,0,01/07/2022,Merchants Bank of Alabama,AL,2021,20211231,384528.0,0.0,24.0,35665.0,46_20211231,1.259482,190237.0,MERCHANTS BANK OF ALABAMA,11151.0,45.0,2021.0,20211231.0
2,47,0,07/10/2020,Traders & Farmers Bank,AL,2019,20191231,367707.0,0.0,200.0,59782.0,47_20191231,1.227792,164523.0,TRADERS&FARMERS BANK,12351.0,193.0,2019.0,20191231.0
3,57,0,11/21/2006,Community Bank,AL,2005,20051231,551950.0,0.0,796.0,45473.0,57_20051231,1.392045,340221.0,COMMUNITY BANK,20685.0,685.0,2005.0,20051231.0
4,59,0,01/09/2013,The Citizens Bank,AL,2012,20121231,167353.0,0.0,430.0,28947.0,59_20121231,2.873206,43401.0,CITIZENS BANK,5021.0,366.0,2012.0,20121231.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,10186,0,12/21/2015,First State Bank of Fertile,MN,2014,20141231,,,,,,,,,,,,
996,10192,0,12/19/2017,Peoples Bank of Commerce,MN,2016,20161231,,,,,,,,,,,,
997,10196,0,03/26/2021,Northern Sky Bank,MN,2020,20201231,,,,,,,,,,,,
998,10198,0,08/16/2018,State Bank of Delano,MN,2017,20171231,,,,,,,,,,,,
