In [91]:
# Load packages
import os
import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
import matplotlib.pyplot as plt

In [92]:
path = 'C:/Users/angel/Documents/Economics/Research/Banking Project/data/intermediate/call_reports_wrds'

# set path as working directory:
os.chdir(path)

In [154]:
# load fredgraph data:
aggregates = pd.read_csv('fredgraph.csv')

# rename some variables
aggregates.rename(columns = {'DATE':'Date', 
                             'NGDPSAXDCUSQ': 'Nominal GDP', 
                             'GDPDEF': 'Deflator'}, inplace = True)

# drop the first row:
aggregates = aggregates.iloc[1:,:]

# for all entries with '.' substitute with NaN:
aggregates = aggregates.replace('.', np.nan)

# make 'Deflator' and 'Nominal GDP' floats:
aggregates['Deflator'] = aggregates['Deflator'].astype(float)
aggregates['Nominal GDP'] = aggregates['Nominal GDP'].astype(float)

# make 'Date' a datetime object:
aggregates['Date'] = pd.to_datetime(aggregates['Date'])

# create 'Real GDP' variable:
aggregates['Real GDP'] = aggregates['Nominal GDP'] / aggregates['Deflator']

# adjust the FF rate: 
aggregates['FEDFUNDS'] = aggregates['FEDFUNDS'] / 100
aggregates['Date'] = aggregates['Date'] - pd.DateOffset(days = 1)

# compute the growth rate of Nominal GDP and the Deflator:
aggregates['Growth Rate Nominal GDP'] = aggregates['Nominal GDP'].pct_change(fill_method=None)
aggregates['Growth Rate Deflator'] = aggregates['Deflator'].pct_change(fill_method=None)

# compute the growth rate of Real GDP by subtracting the growth rate of the Deflator from the growth rate of Nominal GDP:
aggregates['Growth Rate Real GDP'] = aggregates['Growth Rate Nominal GDP'] - aggregates['Growth Rate Deflator']

In [None]:
# load call reports data:
loans = pd.read_csv('loans_unbalanced.csv', low_memory=False)
int_income = pd.read_csv('interest_income_unbalanced.csv', low_memory=False)
assets = pd.read_csv('assets_unbalanced.csv', low_memory=False)

# merge the datasets
df = pd.merge(loans[['Date', 'IDRSSD', 'RCON2122', 'Flag']], int_income, 
              on=['IDRSSD', 'Date'], how='outer', suffixes=('_RCON', '_RIAD'))

# adjust the dates
df['Date'] = pd.to_datetime(df['Date'])

# create a 'Flag' that is zero if both are zero, and 1 if either is 1:
df['Flag'] = np.where(df['Flag_RCON'] + df['Flag_RIAD'] > 0, 1, 0)

# drop the two flag columns:
df = df.drop(columns=['Flag_RCON', 'Flag_RIAD'])

# create an 'Agg. Loans' summing all the loans in a given Date:
df['Agg. Loans'] = df.groupby('Date')['RCON2122'].transform('sum')

In [None]:
# merge df with aggregates:
df = pd.merge(df[['Date', 'IDRSSD', 'Agg. Loans', 'RCON2122', 
                  'Int. Income Loans']], aggregates[['Date', 'FEDFUNDS', 'Real GDP', 'Deflator']], on='Date', how='left')

# create 'Agg. Real Loans':
df['Agg. Real Loans'] = df['Agg. Loans'] / df['Deflator']

In [None]:
# create a 'Time' variable grouping 'Dates':
df['Time'] = df.groupby('Date').ngroup()

# run regressions to detrend variables of interest 
df['De-trended Real Loans'] = sm.OLS(np.log(df['Agg. Real Loans']), sm.add_constant(df['Time'])).fit().resid
df['De-trended Real GDP'] = sm.OLS(np.log(df['Real GDP']), sm.add_constant(df['Time'])).fit().resid

In [None]:
# plot the de-trended loans using seaborn:
sns.lineplot(x='Date', y='De-trended Real Loans', data=df, linewidth=2, label='Loans')
sns.lineplot(x='Date', y='De-trended Real GDP', data=df, linewidth=2, label='GDP')
sns.lineplot(x='Date', y='FEDFUNDS', data=df, linewidth=2, label='FF Rate', color='black', linestyle='--')
# add a line at zero:
plt.axhline(y=0, color='r', linestyle='--', linewidth=2)
plt.title('De-trended Loans')
plt.xlabel('Date')
plt.ylabel('Deviations from the trend, %')
plt.legend()
plt.tight_layout()
# add the grids:
plt.grid(True, which='both', linestyle='--', lw=0.5, alpha=0.5, color='lightgrey')
plt.show()

### Computing Betas

In [195]:
df = pd.merge(int_income, assets[['Date', 'IDRSSD', 'RCON2170', 'Flag']], on=['IDRSSD', 'Date'], 
              how='outer', suffixes=('_Inc', '_Assets'))

df.rename(columns={'RCON2170':'Total Assets'}, inplace=True)
df['Date'] = pd.to_datetime(df['Date'])

df['Int. Income Assets'] = 4*df['Int. Income Loans'] / df['Total Assets']

In [196]:
# compute the differente of Int. Income Assets per ID accross time:
df['Int. Income Assets Diff'] = df.groupby('IDRSSD')['Int. Income Assets'].diff()

In [197]:
# select the banks that show up at least in 60 dates in the sample:
df = df[(df['Date'].dt.year < 2018) & (df['Total Assets']>0)]
banks = df['IDRSSD'].value_counts()
banks = banks[banks >= 60].index
df = df[(df['IDRSSD'].isin(banks))]
print('Sample of banks:', df['IDRSSD'].nunique())

Sample of banks: 9295


In [198]:
df = pd.merge(df[['Date', 'IDRSSD', 'Int. Income Assets Diff']].dropna(), aggregates[['Date', 'FEDFUNDS']], 
         on='Date', how='left')

In [199]:
# Step 1: Sort by bank and date to ensure correct ordering
df = df.sort_values(['IDRSSD', 'Date'])
df['Delta FEDFUNDS'] = df['FEDFUNDS'].diff()

# Step 3: Create lagged values of 'Delta FEDFUNDS' for up to 4 lags
for lag in range(1, 4):
    df[f'Delta FEDFUNDS_lag{lag}'] = df['Delta FEDFUNDS'].shift(lag)

# Drop rows with NaN values created by differencing and lagging
df = df.dropna().reset_index(drop=True)

# Step 4: Run the regression for each bank and store the coefficients
results = []

for bank_id, bank_data in df.groupby('IDRSSD'):
    
    # Set up the regression model
    X = bank_data[['Delta FEDFUNDS', 'Delta FEDFUNDS_lag1', 'Delta FEDFUNDS_lag2', 'Delta FEDFUNDS_lag3']]
    X = sm.add_constant(X)  # Add intercept (constant term)
    y = bank_data['Int. Income Assets Diff']
    
    # Fit the model
    model = sm.OLS(y, X).fit()
    
    # Store the coefficients with the bank ID
    coeffs = model.params
    coeffs['IDRSSD'] = bank_id
    results.append(coeffs)

In [200]:
# Convert the results to a DataFrame
results_df = pd.DataFrame(results)

# Sum all the coefficients for the lags of 'Delta FEDFUNDS', put it in a column named 'Deposit Beta':
results_df['Interest Income Beta'] = results_df[['Delta FEDFUNDS', 'Delta FEDFUNDS_lag1', 'Delta FEDFUNDS_lag2', 'Delta FEDFUNDS_lag3']].sum(axis=1)

# merge results_df[['IDRSSD', 'Deposit Beta']] with df on 'IDRSSD':
df = df.merge(results_df[['IDRSSD', 'Interest Income Beta']], on='IDRSSD')

  return umr_sum(a, axis, dtype, out, keepdims, initial, where)


In [204]:
df[(df['Interest Income Beta']<df['Interest Income Beta'].quantile(0.95)) &
   (df['Interest Income Beta']>df['Interest Income Beta'].quantile(0.05))]['Interest Income Beta'].describe()

count    866482.000000
mean          0.158023
std           0.109566
min          -0.035578
25%           0.085900
50%           0.138125
75%           0.204686
max           0.543505
Name: Interest Income Beta, dtype: float64