In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from ffipy import FFIEC_Client
from io import StringIO
import time

In [2]:
client = FFIEC_Client()
# get a list of RSSD codes for filers on 9/30/2010
filers_RSSD_09302010 = client.retrieve_filers_since_date(ds_name='Call',
                                   reporting_pd_end='9/30/2010',
                                   last_update_date='9/30/2010')
# check how many filers - 5908
len(filers_RSSD_09302010)

7094

In [3]:
# Testing Call report for 1 filer
ds_name = 'Call'    # Pull Call report data
end_date = '9/30/2010'  # Pull data in reporting pd ending 3/31/17
fiID_type = 'ID_RSSD'  # Type of financial inst identifier
fiID = 688556    # Indentifier for Wyomin Bank and Trust
fmt = 'SDF'  # Pull report as PDF
# <facsimileFormat>PDF or XBRL or SDF</facsimileFormat>
outfile = None  # Output file path
return_result = True  # If True, method returns the data
results_bytes = client.retrieve_facsimile(ds_name=ds_name, reporting_pd_end=end_date,
                              fiID_type=fiID_type, fiID=fiID,
                              facsimile_fmt=fmt, outfile=None,
                              return_result=True)
report = StringIO(str(results_bytes, 'utf-8'))
report_df = pd.read_csv(report, sep=';')



In [4]:
report_df.head()

Unnamed: 0,Call Date,Bank RSSD Identifier,MDRM #,Value,Last Update,Short Definition,Call Schedule,Line Number
0,20100930,688556,RCON0010,14201,20101012,Cash and balances dues from depository institu...,RCR,34
1,20100930,688556,RCON0020,0,20101012,Cash items in process of collection and unpost...,RCA,1a
2,20100930,688556,RCON0071,12625,20101012,Interest-bearing balances,RC,1b
3,20100930,688556,RCON0073,0,20101012,Foreign branches of other U.S. banks,RCA,3a
4,20100930,688556,RCON0074,0,20101012,Other banks in foreign countries and foreign c...,RCA,3b


In [14]:
# Prepare dictionary of selected financials that are needed for CAMELS
selected_financials = {'RIAD3210' : 'Total equity capital', # Alt can be RCONG105
                       'RCON2170' : 'Total assets', # Ok
                       'RCON3360' : 'Total loans', #ok
                       'RCON3465' : '1-4 family residential loans', 
                       'RCON3466' : 'Other real estate loans',
                       'RCON3387' : 'Commercial and industrial loans',
                       'RCONB561' : 'Credit cards',
                       'RCON3123' : 'Allowance for loan losses', # OK
                       'RIAD4093' : 'Total noninterest expense', #OK
                       'RIAD4340' : 'Net Income attributable', # ok
                       'RIAD4300' : 'Net Income before', # new for healthy
                       'RCON2215' : 'Total transaction deposits', # ok
                       'RCON2385' : 'Total nontransaction deposits', # ok
                       'RCON1773' : 'Available-for-sale Fair Value',
                       'RIAD4150' : 'Number of full-time equivalent employees' # new for healthy
                      } 


In [15]:
# Expected 13 columns, store column names in the list columns
len(selected_financials.keys())
columns = list(selected_financials.keys())
len(columns)

15

In [16]:
temp = filers_RSSD_09302010[0:3]

In [None]:
# Pull 7000 reports for filers_RSSD_09302010, drop all values but 13 selected metrics -- returns only 27
all_data_df = pd.DataFrame()
fin_data_dic = {}
for i in range(len(filers_RSSD_09302010)):

    ds_name = 'Call'    # Pull Call report data
    end_date = '9/30/2010'  # Pull data in reporting pd ending 3/31/17
    fiID_type = 'ID_RSSD'  # Type of financial inst identifier (ID_RSSD, CERT )
    fiID = filers_RSSD_09302010[i]   # Indentifier for Wyomin Bank and Trust
    print("counter at ", i, " pulling RSSD# ", fiID)
    fmt = 'SDF'  # Pull report as PDF
    # <facsimileFormat>PDF or XBRL or SDF</facsimileFormat>
    #outfile = 'test8.PDF'  # Output file path
    return_result = True  # If True, method returns the data
    try:
        f = client.retrieve_facsimile(ds_name=ds_name, reporting_pd_end=end_date,
                                      fiID_type=fiID_type, fiID=fiID,
                                      facsimile_fmt=fmt, outfile=False,
                                      return_result=True)
        report = StringIO(str(f, 'utf-8'))
        report_df = pd.read_csv(report, sep=';')
        report_df.columns = ['Call Date', 'Bank RSSD Identifier', 'ValueID', 'Value', 'Last Update',
       'Short Definition', 'Call Schedule', 'Line Number']
        values = list(report_df[report_df.ValueID.isin(columns)].Value)
        fin_data_dic[fiID] = values
        #all_data_df = all_data_df.append(report_df, ignore_index=True)
    except:
        next 

counter at  0  pulling RSSD#  175458
counter at  1  pulling RSSD#  660066
counter at  2  pulling RSSD#  856159
counter at  3  pulling RSSD#  1001451
counter at  4  pulling RSSD#  723158
counter at  5  pulling RSSD#  522669
counter at  6  pulling RSSD#  431846
counter at  7  pulling RSSD#  854351
counter at  8  pulling RSSD#  178057
counter at  9  pulling RSSD#  652753
counter at  10  pulling RSSD#  835743
counter at  11  pulling RSSD#  980438
counter at  12  pulling RSSD#  1216992
counter at  13  pulling RSSD#  371849
counter at  14  pulling RSSD#  913641
counter at  15  pulling RSSD#  734949
counter at  16  pulling RSSD#  97943
counter at  17  pulling RSSD#  365875
counter at  18  pulling RSSD#  24659
counter at  19  pulling RSSD#  898850
counter at  20  pulling RSSD#  630340
counter at  21  pulling RSSD#  910659
counter at  22  pulling RSSD#  595038
counter at  23  pulling RSSD#  116732
counter at  24  pulling RSSD#  489249
counter at  25  pulling RSSD#  386357
counter at  26  pullin

counter at  214  pulling RSSD#  453446
counter at  215  pulling RSSD#  504647
counter at  216  pulling RSSD#  51253
counter at  217  pulling RSSD#  376068
counter at  218  pulling RSSD#  3649624
counter at  219  pulling RSSD#  272151
counter at  220  pulling RSSD#  111559
counter at  221  pulling RSSD#  685733
counter at  222  pulling RSSD#  340443
counter at  223  pulling RSSD#  563457
counter at  224  pulling RSSD#  121651
counter at  225  pulling RSSD#  141350
counter at  226  pulling RSSD#  3571015
counter at  227  pulling RSSD#  242949
counter at  228  pulling RSSD#  85454
counter at  229  pulling RSSD#  641252
counter at  230  pulling RSSD#  601032
counter at  231  pulling RSSD#  472045
counter at  232  pulling RSSD#  856542
counter at  233  pulling RSSD#  976347
counter at  234  pulling RSSD#  552956
counter at  235  pulling RSSD#  825847
counter at  236  pulling RSSD#  426141
counter at  237  pulling RSSD#  715032
counter at  238  pulling RSSD#  924058
counter at  239  pulling 

In [10]:
fin_data_dic

{175458: ['3902',
  '21653',
  '4651',
  '14104',
  '186',
  '12287',
  '1896',
  '4825',
  '1176',
  '0',
  '2405',
  '423',
  '177'],
 660066: ['27329',
  '54633',
  '14683',
  '32036',
  '20',
  '21577',
  '625',
  '3941',
  '12662',
  '0',
  '7822',
  '635',
  '1134'],
 856159: ['3342',
  '17735',
  '5350',
  '9925',
  '171',
  '11137',
  '1074',
  '3899',
  '3894',
  '0',
  '2419',
  '404',
  '210']}

In [11]:
# Inspect the data pulled
healthy_after_2010Q3 = pd.DataFrame.from_dict(fin_data_dic, orient='index')
healthy_after_2010Q3.columns = columns
values = list(selected_financials.values())
healthy_after_2010Q3.columns = values
healthy_after_2010Q3


#store the data
#healthy_after_2010Q3.to_csv('data/healthy_after_2010Q3')

Unnamed: 0,Total equity capital,Total assets,Total loans,1-4 family residential loans,Other real estate loans,Commercial and industrial loans,Credit cards,Allowance for loan losses,Total noninterest expense,Net Income attributable,Total transaction deposits,Total nontransaction deposits,Available-for-sale Fair Value
175458,3902,21653,4651,14104,186,12287,1896,4825,1176,0,2405,423,177
660066,27329,54633,14683,32036,20,21577,625,3941,12662,0,7822,635,1134
856159,3342,17735,5350,9925,171,11137,1074,3899,3894,0,2419,404,210
