In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from ffipy import FFIEC_Client
from io import StringIO
import time

In [2]:
client = FFIEC_Client()
# get a list of RSSD codes for filers on 9/30/2010
filers_RSSD_09302010 = client.retrieve_filers_since_date(ds_name='Call',
                                   reporting_pd_end='9/30/2010',
                                   last_update_date='9/30/2010')
# check how many filers - 5908
len(filers_RSSD_09302010)

7094

In [3]:
# Testing Call report for 1 filer
ds_name = 'Call'    # Pull Call report data
end_date = '9/30/2010'  # Pull data in reporting pd ending 3/31/17
fiID_type = 'ID_RSSD'  # Type of financial inst identifier
fiID = 688556    # Indentifier for Wyomin Bank and Trust
fmt = 'SDF'  # Pull report as PDF
# <facsimileFormat>PDF or XBRL or SDF</facsimileFormat>
outfile = None  # Output file path
return_result = True  # If True, method returns the data
results_bytes = client.retrieve_facsimile(ds_name=ds_name, reporting_pd_end=end_date,
                              fiID_type=fiID_type, fiID=fiID,
                              facsimile_fmt=fmt, outfile=None,
                              return_result=True)
report = StringIO(str(results_bytes, 'utf-8'))
report_df = pd.read_csv(report, sep=';')



In [5]:
report_df.head(10)

Unnamed: 0,Call Date,Bank RSSD Identifier,MDRM #,Value,Last Update,Short Definition,Call Schedule,Line Number
0,20100930,688556,RCON0010,14201,20101012,Cash and balances dues from depository institu...,RCR,34
1,20100930,688556,RCON0020,0,20101012,Cash items in process of collection and unpost...,RCA,1a
2,20100930,688556,RCON0071,12625,20101012,Interest-bearing balances,RC,1b
3,20100930,688556,RCON0073,0,20101012,Foreign branches of other U.S. banks,RCA,3a
4,20100930,688556,RCON0074,0,20101012,Other banks in foreign countries and foreign c...,RCA,3b
5,20100930,688556,RCON0080,1006,20101012,Currency and coin,RCA,1b
6,20100930,688556,RCON0081,1577,20101012,Noninterest-bearing balances and currency and ...,RC,1a
7,20100930,688556,RCON0083,0,20101012,U.S. branches and agencies of foreign banks,RCA,2a
8,20100930,688556,RCON0085,13097,20101012,Other commercial banks in the U.S. and other d...,RCA,2b
9,20100930,688556,RCON0090,98,20101012,Balances due from Federal Reserve Banks,RCA,4


In [3]:
# Prepare dictionary of selected financials that are needed for CAMELS
selected_financials = {
                       'RIAD3210' : 'Total equity capital', # Alt can be RCONG105
                       'RCON2170' : 'Total assets', # Ok
                       'RCON3360' : 'Total loans', #ok
                       'RCON3465' : '1-4 family residential loans', 
                       'RCON3466' : 'Other real estate loans',
                       'RCON3387' : 'Commercial and industrial loans',
                       'RCONB561' : 'Credit cards',
                       'RCON3123' : 'Allowance for loan losses', # OK
                       'RIAD4093' : 'Total noninterest expense', #OK
                       'RIAD4300' : 'Net Income before', # # new for healthy
                       'RCON2215' : 'Total transaction deposits', # ok
                       'RCON2385' : 'Total nontransaction deposits', # ok
                       'RCON1773' : 'Available-for-sale Fair Value',
                       'RIAD4150' : 'Number of full-time employees'# new for healthy
                      } 


In [4]:
# Expected 14 columns, store column names in the list columns
len(selected_financials.keys())
columns = list(selected_financials.keys())
len(columns)

14

In [153]:
# Pull 7000+ reports for filers_RSSD_09302010, drop all values but 13 selected metrics
all_data_df = pd.DataFrame()
#fin_data_dic = {}
report_data_dict = {}

for i in range(len(filers_RSSD_09302010)):

    ds_name = 'Call'    # Pull Call report data
    end_date = '9/30/2010'  # Pull data in reporting pd ending 3/31/17
    fiID_type = 'ID_RSSD'  # Type of financial inst identifier (ID_RSSD, CERT )
    fiID = filers_RSSD_09302010[i]   # Indentifier for Wyomin Bank and Trust
    #print("counter at ", i, " pulling RSSD# ", fiID)
    fmt = 'SDF'  # Pull report as PDF
    # <facsimileFormat>PDF or XBRL or SDF</facsimileFormat>
    #outfile = 'test8.PDF'  # Output file path
    return_result = True  # If True, method returns the data
    try:
        f = client.retrieve_facsimile(ds_name=ds_name, reporting_pd_end=end_date,
                                      fiID_type=fiID_type, fiID=fiID,
                                      facsimile_fmt=fmt, outfile=False,
                                      return_result=True)
        report = StringIO(str(f, 'utf-8'))
        report_df = pd.read_csv(report, sep=';')
        key = filers_RSSD_09302010[i]
        report_data_dict[key] = {}
        for col in columns:
            df = report_df.loc[report_df['MDRM #'] == col]
            value = df.Value.values[0]
            report_data_dict[key][col] = value
        #report_df.columns = ['Call Date', 'Bank RSSD Identifier', 'ValueID', 'Value', 'Last Update',
       #'Short Definition', 'Call Schedule', 'Line Number']
       # values = list(report_df[report_df.ValueID.isin(columns)].Value)
       # fin_data_dic[fiID] = values
        #all_data_df = all_data_df.append(report_df, ignore_index=True)
    except:
        next 

In [165]:
# Inspect the data pulled

healthy_after_2010Q3 = pd.DataFrame.from_dict(report_data_dict, orient='index')
#len(healthy_after_2010Q3.index)
#healthy_after_2010Q3.describe()
#healthy_after_2010Q3.columns
#store the data
#healthy_after_2010Q3.to_csv('data/healthy_after_2010Q3')


In [173]:
healthy_after_2010Q3.columns = selected_financials.values()
#store the data
#healthy_after_2010Q3.to_csv('data/healthy_after_2010Q3')


In [64]:
len(filers_RSSD_09302010)/100

70.94

In [62]:
filers = filers_RSSD_09302010
#all_batch_data_df = pd.DataFrame()
report_data_dict = {}
batches_to_pull = np.array_split(filers,100)

for n in range(len(batches_to_pull)):
    for i in range(len(batches_to_pull[n])):
        ds_name = 'Call'    # Pull Call report data
        end_date = '6/30/2010'  # Pull data in reporting pd ending 3/31/17
        fiID_type = 'ID_RSSD'  # Type of financial inst identifier (ID_RSSD, CERT )
        fiID = batches_to_pull[n][i]   # Indentifier for Wyomin Bank and Trust
        #print("counter at ", i, " pulling RSSD# ", fiID)
        fmt = 'SDF'  # Pull report as PDF
        return_result = True  # If True, method returns the data
        
        try:
            f = client.retrieve_facsimile(ds_name=ds_name, reporting_pd_end=end_date,
                                          fiID_type=fiID_type, fiID=fiID,
                                          facsimile_fmt=fmt, outfile=False,
                                          return_result=True)
            report = StringIO(str(f, 'utf-8'))
            report_df = pd.read_csv(report, sep=';')
            key = fiID #filers_RSSD_09302010[i]    # <---- changed to fiID
            report_data_dict[key] = {}

            for col in columns:
                df = report_df.loc[report_df['MDRM #'] == col]
                value = df.Value.values[0]
                report_data_dict[key][col] = value
            
            batch_data_df = pd.DataFrame.from_dict(report_data_dict, orient='index') 


        except:
            next 
    
    print('Loading batch number ', n)
    batch_data_df = pd.DataFrame.from_dict(report_data_dict, orient='index')  
    #all_batch_data_df = all_batch_data_df.append(batch_data_df)#, ignore_index=True)
   

Loading batch number  0
Loading batch number  1
Loading batch number  2
Loading batch number  3
Loading batch number  4
Loading batch number  5
Loading batch number  6
Loading batch number  7
Loading batch number  8
Loading batch number  9
Loading batch number  10
Loading batch number  11
Loading batch number  12
Loading batch number  13
Loading batch number  14
Loading batch number  15
Loading batch number  16
Loading batch number  17
Loading batch number  18
Loading batch number  19
Loading batch number  20
Loading batch number  21
Loading batch number  22
Loading batch number  23
Loading batch number  24
Loading batch number  25
Loading batch number  26
Loading batch number  27
Loading batch number  28
Loading batch number  29
Loading batch number  30
Loading batch number  31
Loading batch number  32
Loading batch number  33
Loading batch number  34
Loading batch number  35
Loading batch number  36
Loading batch number  37
Loading batch number  38
Loading batch number  39
Loading ba

In [67]:
# Inspect the data pulled


len(batch_data_df.index)
#batch_data_df.describe()
batch_data_df.columns


Index(['RIAD3210', 'RCON2170', 'RCON3360', 'RCON3465', 'RCON3466', 'RCON3387',
       'RCONB561', 'RCON3123', 'RIAD4093', 'RIAD4300', 'RCON2215', 'RCON2385',
       'RCON1773', 'RIAD4150'],
      dtype='object')

In [72]:
selected_financials.values()
batch_data_df.columns = selected_financials.values()

#store the data
batch_data_df.to_csv('data/healthy_after_2010Q2')

In [73]:
# Batch data pull for March 2010, date == 0331201
filers = filers_RSSD_09302010
#all_batch_data_df = pd.DataFrame()
report_data_dict = {}
batches_to_pull = np.array_split(filers,100)

for n in range(len(batches_to_pull)):
    for i in range(len(batches_to_pull[n])):
        ds_name = 'Call'    # Pull Call report data
        end_date = '3/31/2010'  # Pull data in reporting pd ending 3/31/17
        fiID_type = 'ID_RSSD'  # Type of financial inst identifier (ID_RSSD, CERT )
        fiID = batches_to_pull[n][i]   # Indentifier for Wyomin Bank and Trust
        #print("counter at ", i, " pulling RSSD# ", fiID)
        fmt = 'SDF'  # Pull report as PDF
        return_result = True  # If True, method returns the data
        
        try:
            f = client.retrieve_facsimile(ds_name=ds_name, reporting_pd_end=end_date,
                                          fiID_type=fiID_type, fiID=fiID,
                                          facsimile_fmt=fmt, outfile=False,
                                          return_result=True)
            report = StringIO(str(f, 'utf-8'))
            report_df = pd.read_csv(report, sep=';')
            key = fiID #filers_RSSD_09302010[i]    # <---- changed to fiID
            report_data_dict[key] = {}

            for col in columns:
                df = report_df.loc[report_df['MDRM #'] == col]
                value = df.Value.values[0]
                report_data_dict[key][col] = value
            
            batch_data_df = pd.DataFrame.from_dict(report_data_dict, orient='index') 


        except:
            next 
    
    print('Loading batch number ', n)
    batch_data_df = pd.DataFrame.from_dict(report_data_dict, orient='index')  
    #all_batch_data_df = all_batch_data_df.append(batch_data_df)#, ignore_index=True)

Loading batch number  0
Loading batch number  1
Loading batch number  2
Loading batch number  3
Loading batch number  4
Loading batch number  5
Loading batch number  6
Loading batch number  7
Loading batch number  8
Loading batch number  9
Loading batch number  10
Loading batch number  11
Loading batch number  12
Loading batch number  13
Loading batch number  14
Loading batch number  15
Loading batch number  16
Loading batch number  17
Loading batch number  18
Loading batch number  19
Loading batch number  20
Loading batch number  21
Loading batch number  22
Loading batch number  23
Loading batch number  24
Loading batch number  25
Loading batch number  26
Loading batch number  27
Loading batch number  28
Loading batch number  29
Loading batch number  30
Loading batch number  31
Loading batch number  32
Loading batch number  33
Loading batch number  34
Loading batch number  35
Loading batch number  36
Loading batch number  37
Loading batch number  38
Loading batch number  39
Loading ba

In [80]:
# Inspect the data pulled


len(batch_data_df.index)
batch_data_df.describe()
batch_data_df.columns


#selected_financials.values()
batch_data_df.columns = selected_financials.values()

#store the data
batch_data_df.to_csv('data/healthy_after_2010Q1')



#batch_data_df

In [81]:
# Batch data pull for December 2002, date == 12312009
filers = filers_RSSD_09302010
#all_batch_data_df = pd.DataFrame()
report_data_dict = {}
batches_to_pull = np.array_split(filers,100)

for n in range(len(batches_to_pull)):
    for i in range(len(batches_to_pull[n])):
        ds_name = 'Call'    # Pull Call report data
        end_date = '12/31/2009'  # Pull data in reporting pd ending 3/31/17
        fiID_type = 'ID_RSSD'  # Type of financial inst identifier (ID_RSSD, CERT )
        fiID = batches_to_pull[n][i]   # Indentifier for Wyomin Bank and Trust
        #print("counter at ", i, " pulling RSSD# ", fiID)
        fmt = 'SDF'  # Pull report as PDF
        return_result = True  # If True, method returns the data
        
        try:
            f = client.retrieve_facsimile(ds_name=ds_name, reporting_pd_end=end_date,
                                          fiID_type=fiID_type, fiID=fiID,
                                          facsimile_fmt=fmt, outfile=False,
                                          return_result=True)
            report = StringIO(str(f, 'utf-8'))
            report_df = pd.read_csv(report, sep=';')
            key = fiID #filers_RSSD_09302010[i]    # <---- changed to fiID
            report_data_dict[key] = {}

            for col in columns:
                df = report_df.loc[report_df['MDRM #'] == col]
                value = df.Value.values[0]
                report_data_dict[key][col] = value
            
            batch_data_df = pd.DataFrame.from_dict(report_data_dict, orient='index') 


        except:
            next 
    
    print('Loading batch number ', n)
    batch_data_df = pd.DataFrame.from_dict(report_data_dict, orient='index')  
    #all_batch_data_df = all_batch_data_df.append(batch_data_df)#, ignore_index=True)

Loading batch number  0
Loading batch number  1
Loading batch number  2
Loading batch number  3
Loading batch number  4
Loading batch number  5
Loading batch number  6
Loading batch number  7
Loading batch number  8
Loading batch number  9
Loading batch number  10
Loading batch number  11
Loading batch number  12
Loading batch number  13
Loading batch number  14
Loading batch number  15
Loading batch number  16
Loading batch number  17
Loading batch number  18
Loading batch number  19
Loading batch number  20
Loading batch number  21
Loading batch number  22
Loading batch number  23
Loading batch number  24
Loading batch number  25
Loading batch number  26
Loading batch number  27
Loading batch number  28
Loading batch number  29
Loading batch number  30
Loading batch number  31
Loading batch number  32
Loading batch number  33
Loading batch number  34
Loading batch number  35
Loading batch number  36
Loading batch number  37
Loading batch number  38
Loading batch number  39
Loading ba

In [86]:
# # Inspect the data pulled


len(batch_data_df.index)
batch_data_df.describe()
# batch_data_df.columns


# selected_financials.values()
batch_data_df.columns = selected_financials.values()

# #store the data
batch_data_df.to_csv('data/healthy_after_2009Q4')

In [89]:
# Batch data pull for December 2002, date == 12312009
filers = filers_RSSD_09302010
#all_batch_data_df = pd.DataFrame()
report_data_dict = {}
batches_to_pull = np.array_split(filers,100)

for n in range(len(batches_to_pull)):
    for i in range(len(batches_to_pull[n])):
        ds_name = 'Call'    # Pull Call report data
        end_date = '12/31/2009'  # Pull data in reporting pd ending 3/31/17
        fiID_type = 'ID_RSSD'  # Type of financial inst identifier (ID_RSSD, CERT )
        fiID = batches_to_pull[n][i]   # Indentifier for Wyomin Bank and Trust
        #print("counter at ", i, " pulling RSSD# ", fiID)
        fmt = 'SDF'  # Pull report as PDF
        return_result = True  # If True, method returns the data
        
        try:
            f = client.retrieve_facsimile(ds_name=ds_name, reporting_pd_end=end_date,
                                          fiID_type=fiID_type, fiID=fiID,
                                          facsimile_fmt=fmt, outfile=False,
                                          return_result=True)
            report = StringIO(str(f, 'utf-8'))
            report_df = pd.read_csv(report, sep=';')
            key = fiID #filers_RSSD_09302010[i]    # <---- changed to fiID
            report_data_dict[key] = {}

            for col in columns:
                df = report_df.loc[report_df['MDRM #'] == col]
                value = df.Value.values[0]
                report_data_dict[key][col] = value
            
            batch_data_df = pd.DataFrame.from_dict(report_data_dict, orient='index') 


        except:
            next 
    
    print('Loading batch number ', n)
    batch_data_df = pd.DataFrame.from_dict(report_data_dict, orient='index')  

Loading batch number  0
Loading batch number  1
Loading batch number  2
Loading batch number  3
Loading batch number  4
Loading batch number  5
Loading batch number  6
Loading batch number  7
Loading batch number  8
Loading batch number  9
Loading batch number  10
Loading batch number  11
Loading batch number  12
Loading batch number  13
Loading batch number  14
Loading batch number  15
Loading batch number  16
Loading batch number  17
Loading batch number  18
Loading batch number  19
Loading batch number  20
Loading batch number  21
Loading batch number  22
Loading batch number  23
Loading batch number  24
Loading batch number  25
Loading batch number  26
Loading batch number  27
Loading batch number  28
Loading batch number  29
Loading batch number  30
Loading batch number  31
Loading batch number  32
Loading batch number  33
Loading batch number  34
Loading batch number  35
Loading batch number  36
Loading batch number  37
Loading batch number  38
Loading batch number  39
Loading ba

In [94]:
# # Inspect the data pulled


#len(batch_data_df.index)
#batch_data_df.describe()
# batch_data_df.columns


# selected_financials.values()
batch_data_df.columns = selected_financials.values()

# #store the data
#batch_data_df.to_csv('data/healthy_after_2010Q3_4Q')

In [13]:
# Prepare dictionary of selected financials that are needed for CAMELS
selected_financials = {
                       'RIAD3210' : 'Total equity capital', # Alt can be RCONG105
                       'RCON2170' : 'Total assets', # Ok
                       'RCON3360' : 'Total loans', #ok
                       'RCON3465' : '1-4 family residential loans', 
                       'RCON3466' : 'Other real estate loans',
                       'RCON3387' : 'Commercial and industrial loans',
                       'RCONB561' : 'Credit cards',
                       'RCON3123' : 'Allowance for loan losses', # OK
                       'RIAD4093' : 'Total noninterest expense', #OK
                       'RIAD4300' : 'Net Income before', # # new for healthy
                       'RCON2215' : 'Total transaction deposits', # ok
                       'RCON2385' : 'Total nontransaction deposits', # ok
                       'RCON1773' : 'Available-for-sale Fair Value',
                       'RIAD4150' : 'Number of full-time employees'# new for healthy
                      } 

In [14]:
#Dataset for validation out of sample 

In [17]:
client = FFIEC_Client()
# get a list of RSSD codes for filers on 6/30/2019
filers_RSSD_09302011 = client.retrieve_filers_since_date(ds_name='Call',
                                   reporting_pd_end='09/30/2011',
                                   last_update_date='09/30/2011')
# check how many filers - 5908
len(filers_RSSD_09302011)

6820

In [18]:
# Batch data pull for December 2002, date == 12312009
filers = filers_RSSD_09302011
#all_batch_data_df = pd.DataFrame()
report_data_dict = {}
batches_to_pull = np.array_split(filers,100)

for n in range(len(batches_to_pull)):
    for i in range(len(batches_to_pull[n])):
        ds_name = 'Call'    # Pull Call report data
        end_date = '09/30/2011'  # Pull data in reporting pd ending 3/31/17
        fiID_type = 'ID_RSSD'  # Type of financial inst identifier (ID_RSSD, CERT )
        fiID = batches_to_pull[n][i]   # Indentifier for Wyomin Bank and Trust
        #print("counter at ", i, " pulling RSSD# ", fiID)
        fmt = 'SDF'  # Pull report as PDF
        return_result = True  # If True, method returns the data
        
        try:
            f = client.retrieve_facsimile(ds_name=ds_name, reporting_pd_end=end_date,
                                          fiID_type=fiID_type, fiID=fiID,
                                          facsimile_fmt=fmt, outfile=False,
                                          return_result=True)
            report = StringIO(str(f, 'utf-8'))
            report_df = pd.read_csv(report, sep=';')
            key = fiID #filers_RSSD_09302010[i]    # <---- changed to fiID
            report_data_dict[key] = {}

            for col in columns:
                df = report_df.loc[report_df['MDRM #'] == col]
                value = df.Value.values[0]
                report_data_dict[key][col] = value
            
            batch_data_df = pd.DataFrame.from_dict(report_data_dict, orient='index') 


        except:
            next 
    
    print('Loading batch number ', n)
    batch_data_df = pd.DataFrame.from_dict(report_data_dict, orient='index')  

Loading batch number  0
Loading batch number  1
Loading batch number  2
Loading batch number  3
Loading batch number  4
Loading batch number  5
Loading batch number  6
Loading batch number  7
Loading batch number  8
Loading batch number  9
Loading batch number  10
Loading batch number  11
Loading batch number  12
Loading batch number  13
Loading batch number  14
Loading batch number  15
Loading batch number  16
Loading batch number  17
Loading batch number  18
Loading batch number  19
Loading batch number  20
Loading batch number  21
Loading batch number  22
Loading batch number  23
Loading batch number  24
Loading batch number  25
Loading batch number  26
Loading batch number  27
Loading batch number  28
Loading batch number  29
Loading batch number  30
Loading batch number  31
Loading batch number  32
Loading batch number  33
Loading batch number  34
Loading batch number  35
Loading batch number  36
Loading batch number  37
Loading batch number  38
Loading batch number  39
Loading ba

In [19]:
len(batch_data_df.index)

6820

In [23]:
# # Inspect the data pulled
#len(batch_data_df.index)
batch_data_df.describe()
#batch_data_df.columns


# selected_financials.values()
batch_data_df.columns = selected_financials.values()

# #store the data
#batch_data_df.to_csv('data/healthy_after_2011Q3_OOS')

In [22]:
batch_data_df.head()

Unnamed: 0,Total equity capital,Total assets,Total loans,1-4 family residential loans,Other real estate loans,Commercial and industrial loans,Credit cards,Allowance for loan losses,Total noninterest expense,Net Income before,Total transaction deposits,Total nontransaction deposits,Available-for-sale Fair Value,Number of full-time employees
37,17998,84857,42000,20420,8043,2583,0,1087,1687,472,13794,52568,30760,25
242,4626,34645,21550,7552,6917,1492,0,169,669,390,10099,15792,9384,12
279,24088,147024,73834,47475,18254,2613,0,477,3470,801,21428,85072,13142,39
354,1062,9967,4088,1097,1189,387,0,194,269,7,2276,6618,5407,4
457,3611,37575,31413,7778,8650,5877,0,464,734,712,6648,24687,0,9


In [None]:
# # Inspect the data pulled


#len(batch_data_df.index)
#batch_data_df.describe()
# batch_data_df.columns


# selected_financials.values()
batch_data_df.columns = selected_financials.values()

# #store the data
#batch_data_df.to_csv('data/healthy_after_2019Q3_OOS')