In [22]:
# Load packages
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [23]:
path = 'C:/Users/angel/Documents/Economics/Research/Banking Project/data/intermediate/call_reports_legacy'

# set path as working directory:
os.chdir(path)

In [24]:
# Load data
riad = pd.read_csv('RIAD_legacy.csv')
rcfd = pd.read_csv('RCFD_legacy.csv')

In [25]:
# Rename column RSSD9999 to Date:
riad.rename(columns={'RSSD9999':'Date', 'RSSD9010': 'Financial Institution Name', 
                     'RSSD9001': 'IDRSSD'}, inplace=True)

rcfd.rename(columns={'RSSD9999':'Date', 'RSSD9010': 'Financial Institution Name', 
                     'RSSD9001': 'IDRSSD'}, inplace=True)

In [26]:
riad['Date'] = pd.to_datetime(riad['Date'], format='%Y%m%d')
rcfd['Date'] = pd.to_datetime(riad['Date'], format='%Y%m%d')

In [27]:
# concatenate the two dataframes:
df = pd.merge(riad, rcfd, on=['Date', 'IDRSSD', 'Financial Institution Name'], how='outer')

In [36]:
# create a 'Year' column:
df['Year'] = df['Date'].dt.year

# create a 'Quarter' column:
df['Quarter'] = df['Date'].dt.quarter

In [28]:
df.rename(columns={ # RCFD variables (Balance Sheet):
                'RCFD2170':'Total Assets', 'RCFD1400': 'Loans', 'RCFD2200': 'Deposits', 
                'RCFD1403': 'Loans Non-Accrual', 'RCFD1407':'Loans Past Due 90 Days',
                'RCFD0010': 'Cash', 
                # RIAD variables (Income Statement):
                'RIAD4170':'Interest Exp. Deposits', 
                'RIAD4180': 'Interest Exp. FedFunds', 'RIAD4635': 'Charge Off Loans', 
                'RIAD4605': 'Recovery Loans', 'RIAD4130': 'Total Expenses', 
                'RIAD4217': 'Expenses on premises and fixed assets', 
                'RIAD4135': 'Labor expenses', 'RIAD4079': 'Total Non-interest Income'}, inplace=True)

In [29]:
df['Interest Income Loans'] = df['RIAD4010']+df['RIAD4065']
df['Dividends'] = df['RIAD4470']+df['RIAD4460']

In [30]:
df['Federal Funds Purchased'] = np.where(df['Date'].dt.year < 2002, df['RCFD2800'], df['RCFDB993']+df['RCFDB995'])
df['Other Borrowings'] = np.where(df['Date'].dt.year < 2001, df['RCFD2835'], df['RCFD3190'])
df['FedFunds Sold'] = np.where(df['Date'].dt.year < 2002, df['RCFD1350'], df['RCFDB987']+df['RCFDB989'])
df['US Treasury Securities'] = np.where(df['Date'].dt.year < 1994, df['RCFD0400'], df['RCFD0211']+df['RCFD1287'])
# missing US agency obligations

In [31]:
df['Interest Income Safe Securities'] = np.where(df['Date'].dt.year<2001, df['RIAD4027'], df['RIADB488'])
df['Equity Issuance'] = np.where(df['Date'].dt.year<2001, df['RIAD4346']+df['RIADB510'], 
                                 df['RIADB509']+df['RIADB510'])

In [32]:
# For 'US Agency Obligations' we need a np.where with 3 time windows (1984-1993, 1994-2008, 2009-2010, 2011-2021):
df['US Agency Obligations'] = np.where(
                            df['Date'].dt.year < 1994, df['RCFD0600'],
                            np.where(
                            df['Date'].dt.year < 2009, 
                            df['RCFD1289']+df['RCFD1294']+df['RCFD1293']+df['RCFD1298']+df['RCFD1698']+df['RCFD1702']+
                            df['RCFD1703']+df['RCFD1707']+df['RCFD1714']+df['RCFD1717']+df['RCFD1718']+df['RCFD1732'],
                            np.where(
                            df['Date'].dt.year < 2011, 
                            df['RCFD1289']+df['RCFD1294']+df['RCFD1293']+df['RCFD1298']+df['RCFDG300']+df['RCFDG303']+
                            df['RCFDG304']+df['RCFDG307']+df['RCFDG312']+df['RCFDG315']+df['RCFDG316']+df['RCFDG319']+
                            df['RCFDG324']+df['RCFDG327'],
                            df['RCFD1289']+df['RCFD1294']+df['RCFD1293']+df['RCFD1298']+df['RCFDG300']+df['RCFDG303']+
                            df['RCFDG304']+df['RCFDG307']+df['RCFDG312']+df['RCFDG315']+df['RCFDG316']+df['RCFDG319'] 
# Missing here RCFDK142 and RCFDK145.
                            )
                            )
)

In [34]:
# Derived variables (the ones that don't require any RCFA variable):
df['Interest Return on Loans'] = df['Interest Income Loans']/df['Loans']
df['Interest Cost Deposits'] = df['Interest Exp. Deposits']/df['Deposits']
df['Loan Interest Margin'] = df['Interest Income Loans']-df['Interest Exp. Deposits']
df['Cost Fed Funds'] = df['Interest Exp. FedFunds']/df['Federal Funds Purchased']
df['Charge Off Rate Loans']  = (df['Charge Off Loans']-df['Recovery Loans'])/df['Loans']
df['Delinquency Rate Loans'] = (df['Loans Non-Accrual']+df['Loans Past Due 90 Days'])/df['Loans']
df['Safe Securities'] = df['US Treasury Securities']+df['US Agency Obligations']
df['Cost of Funds'] = (df['Interest Exp. Deposits']+df['Interest Exp. FedFunds'])/(df['Deposits']+df['Federal Funds Purchased'])
df['Interest Return on Safe Assets'] = df['Interest Income Safe Securities'] / df['Safe Securities']
df['Return Safe Securities'] = df['Interest Return on Safe Assets'] - df['Cost of Funds']
df['Return on Loans'] = df['Interest Return on Loans'] - df['Charge Off Rate Loans']