api from https://banks.data.fdic.gov/docs/#/Structure/searchInstitutions

based on work from
https://doi.org/10.1016/j.ribaf.2017.07.104 - Predicting bank failure: An improvement by implementing a machine-learning approach to classical financial ratios
https://doi.org/10.1016/j.dss.2012.11.015 - Partial Least Square Discriminant Analysis for bankruptcy prediction
https://doi.org/10.1016/j.eswa.2008.01.053 - Effects of feature construction on classification performance: An empirical study in bank failure prediction

In [1]:
import requests
import pandas as pd
from io import StringIO
from urllib.parse import quote_plus

pd.set_option('display.max_rows', None)

## Construct API call

In [98]:
# strings for getting lists of active and failed banks
institutionurl = 'https://banks.data.fdic.gov/api/institutions?'
filtersfailed = 'ACTIVE:0 AND DATEUPDT:[\"2006-01-01\" TO \"2023-12-31\"]'
filtersactive = 'ACTIVE:1 AND DATEUPDT:[\"2006-01-01\" TO \"2023-12-31\"]'
bankfields = 'UNTY,STALP,NAME,ACTIVE,CERT,DATEUPDT'

filtersfailed_string = 'filters=' + quote_plus(filtersfailed)
filtersactive_string = 'filters=' + quote_plus(filtersactive)
bankfields_string = '&fields=' + quote_plus(bankfields)
bank_suffix = '&sort_by=NAME&sort_order=DESC&limit=10000&offset=0&format=csv&download=false&filename=data_file'



In [99]:
# get failed banks
failedbanksGET = requests.get(institutionurl +  filtersfailed_string +  bankfields_string + bank_suffix).text
failedbanks = pd.read_csv(StringIO(failedbanksGET))

# get active banks
activebanksGET = requests.get(institutionurl +  filtersactive_string +  bankfields_string + bank_suffix).text
activebanks = pd.read_csv(StringIO(activebanksGET))

In [100]:
failedbanks.describe()

Unnamed: 0,ACTIVE,CERT,ID
count,4938.0,4938.0,4938.0
mean,0.0,26602.856622,26602.856622
std,0.0,17957.209622,17957.209622
min,0.0,9.0,9.0
25%,0.0,12391.25,12391.25
50%,0.0,24801.5,24801.5
75%,0.0,34662.5,34662.5
max,0.0,91363.0,91363.0


In [101]:
activebanks.describe()

Unnamed: 0,ACTIVE,CERT,ID
count,4620.0,4620.0,4620.0
mean,1.0,20548.378788,20548.378788
std,0.0,17167.994844,17167.994844
min,1.0,14.0,14.0
25%,1.0,8625.0,8625.0
50%,1.0,15935.5,15935.5
75%,1.0,28865.75,28865.75
max,1.0,91325.0,91325.0


In [78]:
url = 'https://banks.data.fdic.gov/api/financials?'
filters = ['ACTIVE:1', 'REPYEAR:[2000 TO 2023]']
features = ['NAME', 'RISDATE', 'CERT',
            'LNATRESR', 'ELNLOS', 'NIM', 'EAMINTAN', 'LNLSGRS', 'NTLNLS', 'EQ', 'AVASSETJ'
            'RBCT1CER', 'EQTOTR', 'EQV', 'LNLSNET', 'CUSLI', 'LIAB', 'LIABEQR', 'LIABEQ', 'DEP',
            'NIMY', 'NIMR', 'IOTHFEE', 'NONIXR', 'PTAXNETINCR', 'ITAX',
            'ROA', 'ROE', 'NETINC', 'EEFFR', 'CHBAL' ]
featurenames = {'LNATRESR': 'LOAN LOSS RESERVE/GROSS LN&LS',
                'ELNLOS' : 'PROVISIONS FOR LN & LEASE LOSSES',
                'NIM' : 'NET INTEREST INCOME',
                'EAMINTAN' : 'AMORT & IMPAIR LOSS AST',
                'LNLSGRS' : 'LOANS AND LEASES, GROSS',
                'NTLNLS' : 'TOTAL LN&LS NET CHARGE-OFFS',
                'EQ' : 'Equity Capital',
                'AVASSETJ' : 'AVERAGE ASSETS-ADJUSTED-PCA',
                'RBCT1CER' : 'COMMON EQUITY TIER 1 RATIO',
                'EQTOTR' : 'TOTAL EQUITY CAPITAL RATIO',
                'EQV' : 'BANK EQUITY CAPITAL/ASSETS',
                'LNLSNET' : 'LOANS AND LEASES-NET',
                'CUSLI' : 'CUSTOMERS ACCEPTANCES',
                'LIAB' : 'TOTAL LIABILITIES',
                'LIABEQR' : 'TOTAL LIABILITIES & CAPITAL RATIO',
                'LIABEQ' : 'TOTAL LIABILITIES & CAPITAL',
                'DEP' : 'Total deposits',
                'NIMY' : 'NET INTEREST MARGIN',
                'NIMR' : 'NET INTEREST INCOME RATIO',
                'IOTHFEE' : 'OTHER FEE INCOME',
                'NONIXR' : 'TOTAL NONINTEREST EXPENSE RATIO',
                'PTAXNETINCR' : 'PRE-TAX NET INCOME OPERATING INCOME RATIO',
                'ITAX' : 'APPLICABLE INCOME TAXES',
                'ROA' : 'Return on assets (ROA)',
                'ROE' : 'Return on equity (ROE)',
                'NETINC' : 'Net income',
                'EEFFR' : 'EFFICIENCY RATIO',
                'CHBAL' : 'CASH & DUE FROM DEPOSITORY INST'

                }



In [79]:
# convert to a URL friendly format for the API call
filterstring = quote_plus(','.join(filters))
featurestring = quote_plus(','.join(features))

In [80]:
response = requests.get(url + 'filters=' + filterstring + '&fields=' + featurestring + '&limit=3000&offset=0&format=csv&download=false&filename=data_file').text
df = pd.read_csv(StringIO(response))

In [82]:
df

Unnamed: 0,"{""errors"":[{""status"":400","links:{""about"":{""href"":""https://banks.data.fdic.gov/docs""","meta:{""section"":""Filter Syntax""}}}","title:""Invalid request input","please double check your search query syntax""","detail:""search_phase_execution_exception: [query_shard_exception] Reason: failed to create query: {\n \""function_score\"" : {\n \""query\"" : {\n \""bool\"" : {\n \""must\"" : [\n {\n \""bool\"" : {\n \""filter\"" : [\n {\n \""query_string\"" : {\n \""query\"" : \""ACTIVE:1","REPYEAR:[2000 TO 2023]\""","\n \""default_field\"" : \""NAME\""","\n \""fields\"" : [ ]","\n \""type\"" : \""best_fields\""",...,"\n \""adjust_pure_negative\"" : true","\n \""boost\"" : 1.0\n }\n }\n ]","\n \""adjust_pure_negative\"" : true.1","\n \""boost\"" : 1.0\n }\n }","\n \""functions\"" : [ ]","\n \""score_mode\"" : \""multiply\""","\n \""max_boost\"" : 3.4028235E38","\n \""boost\"" : 1.0\n }\n}""","source:{""parameter"":""filters""}","meta:{""timestamp"":""2023-11-17T03:29:16.936Z""}}]}"


In [81]:
df[['CERT', 'NAME', "RISDATE"]]

KeyError: "None of [Index(['CERT', 'NAME', 'RISDATE'], dtype='object')] are in the [columns]"

In [51]:
print(df.isna().sum())

CHBAL             0
CUSLI          3000
DEP               0
EAMINTAN          0
EEFFR             0
ELNLOS            0
EQ                0
EQTOTR            0
EQV               0
ID                0
IOTHFEE        3000
ITAX              0
LIAB              0
LIABEQ            0
LIABEQR           0
LNATRESR          0
LNLSGRS           0
LNLSNET           0
NAME              0
NETINC            0
NIM               0
NIMR              0
NIMY              0
NONIXR            0
NTLNLS            0
PTAXNETINCR       0
RISDATE           0
ROA               0
ROE               0
dtype: int64
