# Call Reports Analysis

##### Housekeeping and loading data

In [1]:
# Load packages
import os
import statsmodels.api as sm
import statsmodels.formula.api as smf
from linearmodels import PanelOLS
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
path = 'C:/Users/angel/Documents/Economics/Research/Banking Project/data/clean'
path_output = 'C:/Users/angel/Documents/Economics/Research/Banking Project/data/output'

# set colorblind theme for plots:
sns.set_theme(context='notebook', style="ticks", palette='colorblind')
sns.set_color_codes(palette='colorblind')

# Set path to be the directory:
os.chdir(path)

In [3]:
# read just the first row in the 'call_reports.csv' file:
sample = pd.read_csv('call_reports.csv', nrows=1)

# list the columns that have '_x' and '_y' in them:
cols_x = [col for col in sample.columns if '_x' in col]
cols_y = [col for col in sample.columns if '_y' in col]

problem_cols = cols_x + cols_y
problem_cols

['RCFD1754_x',
 'RCFD1773_x',
 'RCON1754_x',
 'Unnamed: 79_x',
 'Unnamed: 241_x',
 'Unnamed: 88_x',
 'RCFD1754_y',
 'RCFD1773_y',
 'RCON1754_y',
 'Unnamed: 79_y',
 'Unnamed: 241_y',
 'Unnamed: 88_y']

In [4]:
# define the list of variables that will be used
variables = ['RCON2170', 'RCFD2170', 'RCON2122', 'RCFD2122', 'RCON2122', 'RCON2200', 'RCON1754', 'RCFD1754',
             'RCON1772', 'RCON0010', 'RCON0071', 'RCON0081', 'RCFD1754_x', 'RCFD1754_y', 'RCON1754_x', 'RCON1754_y',
             'RIAD4073', 'RIAD4200', 'RIAD4185', 'RIAD4180', 'RIAD4172', 
             'Date', 'IDRSSD', 'Financial Institution Name'] 

In [5]:
# Read file that contains only the variables of interest, specify that the column 'Date' is a date:
main = pd.read_csv('call_reports.csv', parse_dates = ['Date'], usecols=variables)
main['Year'] = main['Date'].dt.year

##### Checking important variables

In [6]:
# print the latest 'Date' for which either 'RCON1754_x' or 'RCON1754_y' is reported:
print(main[main['RCFD1754_x'].notnull() | main['RCFD1754_y'].notnull()]['Date'].min())
print(main[main['RCFD1754_x'].notnull() | main['RCFD1754_y'].notnull()]['Date'].max())

print(main[main['RCFD1754'].notnull()]['Date'].min())
print(main[main['RCFD1754'].notnull()]['Date'].max())

2001-03-31 00:00:00
2018-12-31 00:00:00
2019-03-31 00:00:00
2024-09-30 00:00:00


In [7]:
# print the latest 'Date' for which either 'RCON1754_x' or 'RCON1754_y' is reported:
print(main[main['RCON1754_x'].notnull() | main['RCON1754_y'].notnull()]['Date'].min())
print(main[main['RCON1754_x'].notnull() | main['RCON1754_y'].notnull()]['Date'].max())

print(main[main['RCON1754'].notnull()]['Date'].min())
print(main[main['RCON1754'].notnull()]['Date'].max())

2001-03-31 00:00:00
2018-12-31 00:00:00
2019-03-31 00:00:00
2024-09-30 00:00:00


In [8]:
print('The number of rows in which RCON1754_x and RCON1754_y are both reported and different is:',
    len(main[main['RCON1754_x'].notnull() & main['RCON1754_y'].notnull() & (main['RCON1754_x']-main['RCON1754_y'] != 0)]))

print('The number of rows in which RCFD1754_x and RCFD1754_y are both reported and different is:',
    len(main[main['RCFD1754_x'].notnull() & main['RCFD1754_y'].notnull() & (main['RCFD1754_x']-main['RCFD1754_y'] != 0)]))

The number of rows in which RCON1754_x and RCON1754_y are both reported and different is: 0
The number of rows in which RCFD1754_x and RCFD1754_y are both reported and different is: 0


##### Basic Definitions

###### Balance sheet definitions

In [9]:
# create 'Total Assets' variable. If the bank fills the FFIEC41 (only RCON series available). Large banks may also have the 
# RCFD series available. If both are NaN, fill with NaN. If both are available, use the RCON series. If only RCFD is available,
# use RCFD. 
main['Total Assets'] = main['RCON2170'].combine_first(main['RCFD2170'])

print(main['RCON2170'].describe())
print(main['RCFD2170'].describe())
print(main['Total Assets'].describe())

main['Total Assets'] = main['Total Assets'].mask(main['Total Assets'] == 0, np.nan)

count    6.351330e+05
mean     9.755156e+05
std      1.244043e+07
min      0.000000e+00
25%      6.975000e+04
50%      1.522690e+05
75%      3.686870e+05
max      1.445545e+09
Name: RCON2170, dtype: float64
count    9.589000e+03
mean     1.036920e+08
std      3.150145e+08
min      4.264000e+03
25%      1.455077e+06
50%      1.235404e+07
75%      6.758438e+07
max      3.584105e+09
Name: RCFD2170, dtype: float64
count    6.399020e+05
mean     2.114301e+06
std      3.944563e+07
min      0.000000e+00
25%      7.020225e+04
50%      1.539290e+05
75%      3.772038e+05
max      3.584105e+09
Name: Total Assets, dtype: float64


In [10]:
main['Total Loans'] = main['RCON2122'].combine_first(main['RCFD2122'])

print(main['RCON2122'].describe())
print(main['RCFD2122'].describe())
print(main['Total Loans'].describe())

main['Total Loans'] = main['Total Loans'].mask(main['Total Loans'] == 0, np.nan)
print(main['Total Loans'].describe())

count    6.399020e+05
mean     1.115855e+06
std      1.638202e+07
min      0.000000e+00
25%      3.991900e+04
50%      9.623600e+04
75%      2.468268e+05
max      1.219816e+09
Name: RCON2122, dtype: float64
count    9.589000e+03
mean     5.150065e+07
std      1.402738e+08
min      0.000000e+00
25%      7.367520e+05
50%      7.377421e+06
75%      3.715684e+07
max      1.353071e+09
Name: RCFD2122, dtype: float64
count    6.399020e+05
mean     1.115855e+06
std      1.638202e+07
min      0.000000e+00
25%      3.991900e+04
50%      9.623600e+04
75%      2.468268e+05
max      1.219816e+09
Name: Total Loans, dtype: float64
count    6.294910e+05
mean     1.134310e+06
std      1.651630e+07
min      1.000000e+00
25%      4.195200e+04
50%      9.887900e+04
75%      2.515910e+05
max      1.219816e+09
Name: Total Loans, dtype: float64


  main['Total Loans'] = main['RCON2122'].combine_first(main['RCFD2122'])


In [11]:
# create 'Total Deposits'
main.rename(columns = {'RCON2200':'Total Deposits'}, inplace = True)
main['Total Deposits'] = main['Total Deposits'].mask(main['Total Deposits'] == 0, np.nan)
print(main['Total Deposits'].describe())

count    6.333120e+05
mean     1.459541e+06
std      2.552438e+07
min      1.000000e+00
25%      5.953400e+04
50%      1.298275e+05
75%      3.129310e+05
max      2.201118e+09
Name: Total Deposits, dtype: float64


In [12]:
# create 'Cash' summing RCON0020 and RCON0080:
main['Cash'] = main['RCON0010'].combine_first(main['RCON0071']+main['RCON0081'])
main['Cash'] = main['Cash'].mask(main['Cash'] <= 0, np.nan)
print(main['Cash'].describe())

count    6.390710e+05
mean     1.715995e+05
std      4.239690e+06
min      1.000000e+00
25%      3.374000e+03
50%      8.435000e+03
75%      2.253550e+04
max      5.436850e+08
Name: Cash, dtype: float64


In [21]:
main['RCON1754_right'] = main['RCON1754_x'].combine_first(main['RCON1754'])
main['RCFD1754_right'] = main['RCFD1754_x'].combine_first(main['RCFD1754'])

# Create the new column '1754_right' and initialize with NaN
main['1754_right'] = np.nan

# Case 1: Both columns are not null
mask_both_notnull = main['RCFD1754_right'].notna() & main['RCON1754_right'].notna()
main.loc[mask_both_notnull, '1754_right'] = main.loc[mask_both_notnull, ['RCFD1754_right', 'RCON1754_right']].max(axis=1)

# Case 2: Only one column is not null
mask_col1_notnull = main['RCFD1754_right'].notna() & main['RCON1754_right'].isna()
main.loc[mask_col1_notnull, '1754_right'] = main.loc[mask_col1_notnull, 'RCFD1754_right']

mask_col2_notnull = main['RCON1754_right'].notna() & main['RCFD1754_right'].isna()
main.loc[mask_col2_notnull, '1754_right'] = main.loc[mask_col2_notnull, 'RCON1754_right']

# Case 3: Both columns are null (already handled by initialization to NaN)


In [25]:
main[main['RCON1754_right'].notnull() & 
     main['RCFD1754_right'].isnull() & 
     (main['RCON1754_right']-main['RCFD1754_right'] != 0)][['Date', 'RCON1754_right', 'RCFD1754_right', '1754_right']]

Unnamed: 0,Date,RCON1754_right,RCFD1754_right,1754_right
0,2001-03-31,0.0,,0.0
1,2001-03-31,38742.0,,38742.0
2,2001-03-31,116.0,,116.0
3,2001-03-31,3191.0,,3191.0
4,2001-03-31,5298.0,,5298.0
...,...,...,...,...
639897,2023-12-31,0.0,,0.0
639898,2023-12-31,0.0,,0.0
639899,2023-12-31,0.0,,0.0
639900,2023-12-31,0.0,,0.0


In [14]:
# create 'Securities AC' summing RCON1754 and RCON1772. If both are NaN, fill with NaN. If only one is available, use that one. If both 
# are available sum them:
main['Securities AC'] = main[['1754_right', 'RCON1772']].apply(
    lambda row: np.nan if pd.isna(row['1754_right']) and pd.isna(row['RCON1772']) else (row.fillna(0).sum()),
    axis=1
)
main['Securities AC'] = main['Securities AC'].mask(main['Securities AC'] <= 0, np.nan)
print(main['Securities AC'].describe())

count    6.165940e+05
mean     2.119523e+05
std      4.478091e+06
min      1.000000e+00
25%      1.113100e+04
50%      2.871750e+04
75%      7.480675e+04
max      6.830540e+08
Name: Securities AC, dtype: float64


In [None]:
# compute the 'Deposit Share' of each bank, per 'Date':
main.loc[:,'Deposit Share'] = main['Total Deposits']/main.groupby('Date')['Total Deposits'].transform('sum')
print(main['Deposit Share'].describe())

###### Income Definitions

In [29]:
# compute the variable above avoiding the SettingWithCopyWarning:
main.loc[:, 'Deposit Expenditure'] = (main['RIAD4073'].fillna(0) - main['RIAD4200'].fillna(0) - main['RIAD4185'].fillna(0) - 
                                      main['RIAD4180'].fillna(0) - main['RIAD4172'].fillna(0))

In [None]:
main.loc[:, 'Actual Deposit Exp'] = main.groupby(['IDRSSD', 'Year'])['Deposit Expenditure'].diff().fillna(main['Deposit Expenditure'])
main.loc[:, 'Actual Deposit Exp'] = main['Actual Deposit Exp'].mask(main['Actual Deposit Exp'] <= 0, np.nan)
main['Actual Deposit Exp'].describe()

In [None]:
# Compute deposit rates:
main.loc[:,'Deposit Rate'] = main['Actual Deposit Exp'] / main['Total Deposits']
print(main['Deposit Rate'].describe())

In [None]:
# Create an extra column in df3 with the avg. deposit rate per Date:
main.loc[:,'Avg. Deposit Rate'] = main.groupby('Date')['Deposit Rate'].transform('mean')
main.loc[:,'Std. Deposit Rate'] = main.groupby('Date')['Deposit Rate'].transform('std')
main.loc[:,'R_hat'] = (main['Deposit Rate'] - main['Avg. Deposit Rate']) / main['Std. Deposit Rate']
main['R_hat'].describe()

In [37]:
balanced_panel = 0

if balanced_panel == 1:

    # Step 1: Count the total number of unique dates in the dataset
    total_dates = main['Date'].nunique()
    print('Number of dates:', total_dates)

    # Step 2: Count the number of unique dates each bank shows up
    bank_date_counts = main.groupby('IDRSSD')['Date'].nunique()
    print('Banks that show up in all dates:', sum(bank_date_counts==total_dates))

    # Step 3: Identify banks that are present in all dates with positive 'Total Deposit':
    banks = bank_date_counts[bank_date_counts == total_dates].index
    main = main[main['IDRSSD'].isin(banks)]


## Aggregate Data

In [None]:
# Load monetary policy shock data:
mp_shocks = pd.read_csv('brw-shock-series.csv')

# Drop column that has 'Unnamed' in it:
mp_shocks = mp_shocks.loc[:, ~mp_shocks.columns.str.contains('^Unnamed')]

# keep only the first 4 columns:
mp_shocks = mp_shocks.iloc[:, 0:4]

# rename columns:
mp_shocks.columns = mp_shocks.columns.str.replace(' (updated)', '')
mp_shocks.loc[:, 'month'] = mp_shocks['month'].str.replace('m', '-')
mp_shocks = mp_shocks[['month', 'BRW_monthly']]

# drop the 'NaT' row:
mp_shocks = mp_shocks.dropna()
mp_shocks['month'] = pd.to_datetime(mp_shocks['month'], format='%Y-%m')
# compute the quarterly average of the monthly shocks:
mp_shocks['quarter'] = mp_shocks['month'].dt.to_period('Q')
mp_shocks['BRW_quarterly'] = mp_shocks.groupby('quarter')['BRW_monthly'].transform('sum')

# keep only one observation for 'month' and 'BRW_quarterly' for each quarter:
mp_shocks = mp_shocks.drop_duplicates(subset='quarter', keep='first')
mp_shocks['Date'] = mp_shocks['month'] - pd.DateOffset(days = 1)

mp_shocks = mp_shocks[['Date', 'BRW_quarterly']]
mp_shocks.rename(columns={'BRW_quarterly': 'MP Shocks'}, inplace=True)

mp_shocks.loc[:,'Positive FF Shock'] = 0
mp_shocks.loc[mp_shocks['MP Shocks'] > 0, 'Positive FF Shock'] = 1

In [None]:
# Load the Excess Bond Premium data:
ebp = pd.read_csv('ebp_csv.csv')

In [None]:
# load fredgraph data:
aggregates = pd.read_csv('fredgraph.csv')

# rename some variables
aggregates.rename(columns = {'DATE':'Date', 
                             'NGDPSAXDCUSQ': 'Nominal GDP', 
                             'CPIAUCSL_NBD19840101': 'CPI',
                             'GDPDEF': 'Deflator'}, inplace = True)

# drop the first row:
aggregates = aggregates.iloc[1:,:]

# for all entries with '.' substitute with NaN:
aggregates = aggregates.replace('.', np.nan)

# make 'Deflator' and 'Nominal GDP' floats:
aggregates['Deflator'] = aggregates['Deflator'].astype(float)
aggregates['Nominal GDP'] = aggregates['Nominal GDP'].astype(float)
aggregates['FEDFUNDS'] = aggregates['FEDFUNDS'].astype(float)
aggregates['CPI'] = aggregates['CPI'].astype(float)

# make 'Date' a datetime object:
aggregates['Date'] = pd.to_datetime(aggregates['Date'])

# create 'Real GDP' variable:
aggregates['Real GDP'] = aggregates['Nominal GDP'] / aggregates['CPI'] * 100
aggregates = aggregates.dropna()

# adjust the FF rate: 
aggregates['FEDFUNDS'] = aggregates['FEDFUNDS'] / 100
aggregates['Date'] = aggregates['Date'] - pd.DateOffset(days = 1)

# create 'Inflation' as the growth rate of the CPI:
aggregates['Inflation'] = aggregates['CPI'].pct_change(fill_method=None)

# create a variable named 'High Inflation' as 1 if the inflation is above the average, and 0 otherwise:
aggregates['High Inflation'] = 0
aggregates.loc[aggregates['Inflation'] > aggregates['Inflation'].mean(), 'High Inflation'] = 1

# create a variable named 'FF Hike' as 1 if the FF rate increased, and 0 otherwise:
aggregates['FF Hike'] = 0
aggregates.loc[aggregates['FEDFUNDS'] > aggregates['FEDFUNDS'].shift(1), 'FF Hike'] = 1

param = 1600

# De-trend the variables using an HP filter:
aggregates['De-trended Real GDP'] = sm.tsa.filters.hpfilter(np.log(aggregates['Real GDP']), lamb=param)[0]

# create a variable named 'Boom' that is 1, if the De-trended Real GDP is above 0, and 0 otherwise:
aggregates['Recession'] = 0
aggregates.loc[aggregates['De-trended Real GDP'] < 0, 'Recession'] = 1

In [None]:
# load data on mergers and acquisitions:
transformations = pd.read_csv('CSV_TRANSFORMATIONS.csv')

# rename the columns:
transformations = transformations.rename(columns={'#ID_RSSD_PREDECESSOR': 'Predecessor ID', 
                                                  'ID_RSSD_SUCCESSOR': 'Successor ID',
                                                 'DT_TRANS': 'Transaction Date',
                                                 'TRNSFM_CD': 'Transaction Code',})

# convert the 'transaction_date' to datetime format:
transformations['Transaction Date'] = pd.to_datetime(transformations['Transaction Date'], format='%Y%m%d')
transformations = transformations[['Predecessor ID', 'Successor ID', 'Transaction Date', 'Transaction Code']]

In [None]:
#main = pd.merge(main, aggregates, how = 'left', on = 'Date').sort_values(by = ['IDRSSD', 'Date'])
#main = pd.merge(main, mp_shocks, how = 'left', on = 'Date').sort_values(by = ['IDRSSD', 'Date'])

## Start Analysis

In [None]:
def compute_growth(df, transformations, window=1):
    """
    Compute the log difference of deposits while accounting for mergers, acquisitions, and failures.

    Args:
    - df (pd.DataFrame): Dataframe containing bank-level balance sheet data.
    - transformations (pd.DataFrame): Dataframe containing information about transformations.
    - window (int): Number of quarters before and after a transaction to set as NaN for growth rate.

    Returns:
    - pd.DataFrame: The updated 'df' with a 'deposit_growth' column.
    """
    # Sort the dataframe by bank ID and date to ensure proper lag calculation
    df = df.sort_values(by=['IDRSSD', 'Date'])
    df['Real Deposits'] = df['Total Deposits'] / df['CPI'] * 100
    df['Real Loans'] = df['Total Loans'] / df['CPI'] * 100


    # Calculate log difference (log growth rate) of deposits
    df['Deposit_Growth'] = df.groupby('IDRSSD')['Real Deposits'].transform(lambda x: np.log(x).diff())
    df['Loan_Growth'] = df.groupby('IDRSSD')['Real Loans'].transform(lambda x: np.log(x).diff())
    
    # Create a mask to identify rows to be set to NaN
    for _, row in transformations.iterrows():
        predecessor_id = row['Predecessor ID']
        successor_id = row['Successor ID']
        transaction_date = row['Transaction Date']
        
        # Identify date range around the transaction
        start_date = transaction_date - pd.DateOffset(months=3 * window)
        end_date = transaction_date + pd.DateOffset(months=3 * window)
        
        # Apply NaN to the specified window for predecessor and successor banks
        df.loc[
            ((df['IDRSSD'] == predecessor_id) | (df['IDRSSD'] == successor_id)) & 
            (df['Date'].between(start_date, end_date)),
            'Deposit_Growth'
        ] = np.nan

        df.loc[
            ((df['IDRSSD'] == predecessor_id) | (df['IDRSSD'] == successor_id)) & 
            (df['Date'].between(start_date, end_date)),
            'Loan_Growth'
        ] = np.nan
    
    return df


# Define function to compute binned averages using qcut
def binned_scatter(x, y, q, label):
    # Create quantile bins using qcut
    x_binned, bin_edges = pd.qcut(x, q=q, retbins=True, duplicates='drop')
    bin_centers = [x[(x_binned == interval)].mean() for interval in x_binned.unique()]
    binned_means = [y[x_binned == interval].mean() for interval in x_binned.unique()]
    
    # Plot the binned means
    plt.scatter(bin_centers, binned_means, label=label, alpha=0.7, s=50)

    # Add a 45-degree line for reference:
    #plt.plot([min(x), max(x)], [min(x), max(x)], linestyle='--', color='black', alpha=0.5)


In [40]:
df = main[[ 'IDRSSD', 'Date', 'Financial Institution Name',             # Dates and IDS 
            'Total Deposits', 'Deposit Share', 'Total Assets',          # Bank specific quantity variables
            'Total Loans',
            'Deposit Rate', 'Deposit Spread',                           # Bank specific rate variable
            'Cash', 'Securities AC',                                    # Bank specific asset variables
            'FEDFUNDS', 'High Inflation', 'FF Hike', 'Recession',
            'CPI', 'Positive FF Shock'                                  # Macro variables
            ]]

In [None]:
df.loc[:,'Securities Share'] = df['Securities AC'] / df['Total Assets']

In [43]:
svb_securities = df[(df['IDRSSD'] == 802866) & (df['Date'] == '2022-12-31')]['Securities Share'].values[0]

In [44]:
# tell me the IDs of the banks that have a 'Securities Share' above svb_securities:
right_tail_banks = df[(df['Date'] == '2022-12-31') & 
                      (df[df['Date'] == '2022-12-31']['Securities Share'] > svb_securities)]['IDRSSD'].unique()

In [53]:
right_tail_df = df[ (df['Date'] == '2022-12-31') & 
    (df['IDRSSD'].isin(right_tail_banks))][['Financial Institution Name', 'Total Assets', 'Total Deposits']]

In [None]:
df[(df['Date'] == '2022-12-31')][['Financial Institution Name', 'Total Assets', 'Total Deposits']].describe()

In [None]:
right_tail_df.describe()

In [None]:
# plot the distribution of securities share for 03/31/2023:
sns.histplot(df[df['Date'] == '2022-12-31']['Securities Share'], bins=20, kde=True)
# add a vertical line for the bank with IDRSSD=802866:
plt.axvline(x=df[(df['IDRSSD'] == 802866) & (df['Date'] == '2022-12-31')]['Securities Share'].values[0], 
            color='red', linestyle='--', linewidth=2)       
plt.title('Distribution of Securities Share on 12/31/2022')
plt.show()


In [None]:
# compute the correlation between FF Hike and Positive FF Shock:
df[['FF Hike', 'Positive FF Shock']].corr()

In [95]:
df = compute_growth(df, transformations, window=1)

In [None]:
print(df['Loan_Growth'].describe())
print(df['Loan_Growth'].quantile([0.01, 0.99]))

In [None]:
print(df['Deposit_Growth'].describe())
print(df['Deposit_Growth'].quantile([0.01, 0.99]))

In [98]:
# mask the outliers (top and bottom 0.1%) of the distribution of 'Deposit Growth':
df['Deposit_Growth'] = df['Deposit_Growth'].mask((df['Deposit_Growth'] < df['Deposit_Growth'].quantile(0.01)) | 
                                                 (df['Deposit_Growth'] > df['Deposit_Growth'].quantile(0.99)), np.nan)

df['Loan_Growth'] = df['Loan_Growth'].mask((df['Loan_Growth'] < df['Loan_Growth'].quantile(0.01)) | 
                                                 (df['Loan_Growth'] > df['Loan_Growth'].quantile(0.99)), np.nan)


In [None]:
df['Securities Growth'] = df.groupby('IDRSSD')['Securities AC'].pct_change(fill_method=None)
print(df['Securities Growth'].describe())
# if the 'Securities Growth' is in either top or bottom 1% of the distribution, set it to NaN:
df['Securities Growth'] = df['Securities Growth'].mask((df['Securities Growth'] < df['Securities Growth'].quantile(0.01)) | (df['Securities Growth'] > df['Securities Growth'].quantile(0.99)))
print(df['Securities Growth'].describe())

In [None]:
df['Real Cash'] = df['Cash'] / df['CPI'] * 100
df['Cash Growth'] = df.groupby('IDRSSD')['Real Cash'].pct_change(fill_method=None)
print(df['Cash Growth'].describe())
# if the 'Securities Growth' is in either top or bottom 1% of the distribution, set it to NaN:
df['Cash Growth'] = df['Cash Growth'].mask((df['Cash Growth'] < df['Cash Growth'].quantile(0.01)) | (df['Cash Growth'] > df['Cash Growth'].quantile(0.99)))
print(df['Cash Growth'].describe())

In [101]:
def cross_sectional_plots(  df,                     # Dataframe with bank-level data 
                            criteria,               # Criteria to define the interest rate shock
                            num_quantiles=100):     # Number of quantiles for the binned scatter plot

    # Define the interest rate criteria:
    # 1. 'ff_hike': FF Hike is a simple increase in the FFR.
    # 2. 'mp_shock': MP Shock is a positive shock to the FFR.
    if criteria == 'ff_hike':
        # Filter conditions for Expansion and Contraction
        expansion_condition = (
                              (df['Recession'] == 0) & 
                              (df['FF Hike'] == 1) &
                              (df['High Inflation'] == 1)
                              )
        contraction_condition = (
                                (df['Recession'] == 1) &
                                (df['FF Hike'] == 1) &
                                (df['High Inflation'] == 1)
                                )

    elif criteria == 'mp_shock':
        # Filter conditions for Expansion and Contraction
        expansion_condition = (
                            (df['Recession'] == 0) &
                            (df['Positive FF Shock'] == 1) &
                            (df['High Inflation'] == 1)
                            )
        contraction_condition = (
                            (df['Recession'] == 1) &
                            (df['Positive FF Shock'] == 1) & 
                            (df['High Inflation'] == 1)
                            )

    else:
        print('Criteria not found')
        return None
    
    # --------------------------------- Histograms --------------------------------- #

    # make a histogram of the de-trended loans when the inflation is high, FF hike and Boom vs. Recession:
    lin = np.linspace(-0.25, 0.25, 150)
    plt.figure(figsize=(12, 6))
    sns.histplot(df[expansion_condition]['Loan_Growth'], 
                label='Expansion', bins=lin, stat='density', alpha=0.6)
    plt.axvline(df[expansion_condition]['Loan_Growth'].mean(), linestyle='--', linewidth=2, color='blue')
    sns.histplot(df[contraction_condition]['Loan_Growth'], 
                label='Contraction', bins=lin, alpha=0.6, stat='density')
    plt.axvline(df[contraction_condition]['Loan_Growth'].mean(), linestyle='--', linewidth=2, color='orange')
    plt.title('Loan Growth (High Inflation, FF Hike)')
    plt.xlabel('Loan Growth')
    plt.ylabel('Density')
    plt.legend()
    # print the std deviation of both distributions in the (-0.2,8) coordinate:
    plt.text(0.1, 7, 'Mean Expansion:             ' + str(round(df[expansion_condition]['Loan_Growth'].mean(), 4)))
    plt.text(0.1, 6.5, 'Std. Expansion:             ' + str(round(df[expansion_condition]['Loan_Growth'].std(), 4)))
    plt.text(0.1, 6, 'Mean Contraction:           ' + str(round(df[contraction_condition]['Loan_Growth'].mean(), 4)))
    plt.text(0.1, 5.5, 'Std. Contraction:           ' + str(round(df[contraction_condition]['Loan_Growth'].std(), 4)))
    plt.grid(True, which='both', linestyle='--', lw=0.5, alpha=0.5, color='lightgrey')
    plt.xlim(-0.13, 0.22)
    plt.show()

    #-------------------------------------------------------------------------------------------------#

    lin = np.linspace(-0.25, 0.25, 150)
    plt.figure(figsize=(12, 6))
    sns.histplot(df[expansion_condition]['Deposit_Growth'], 
                label='Expansion', bins=lin, stat='density', alpha=0.6)
    plt.axvline(df[expansion_condition]['Deposit_Growth'].mean(), linestyle='--', linewidth=2, color='blue')
    sns.histplot(df[contraction_condition]['Deposit_Growth'], 
                label='Contraction', bins=lin, alpha=0.6, stat='density')
    plt.axvline(df[contraction_condition]['Deposit_Growth'].mean(), linestyle='--', linewidth=2, color='orange')
    plt.title('Deposits Growth (High Inflation, FF Hike)')
    plt.xlabel('Deposits Growth')
    plt.ylabel('Density')
    plt.legend()
    # print the std deviation of both distributions in the (-0.2,8) coordinate:
    plt.text(0.1, 7, 'Mean Expansion:             ' + str(round(df[expansion_condition]['Deposit_Growth'].mean(), 4)))
    plt.text(0.1, 6.5, 'Std. Expansion:             ' + str(round(df[expansion_condition]['Deposit_Growth'].std(), 4)))
    plt.text(0.1, 6, 'Mean Contraction:           ' + str(round(df[contraction_condition]['Deposit_Growth'].mean(), 4)))
    plt.text(0.1, 5.5, 'Std. Contraction:           ' + str(round(df[contraction_condition]['Deposit_Growth'].std(), 4)))
    plt.grid(True, which='both', linestyle='--', lw=0.5, alpha=0.5, color='lightgrey')
    plt.xlim(-0.13, 0.22)
    plt.show()

    #-------------------------------------------------------------------------------------------------#

    # make a histogram of the de-trended loans when the inflation is high, FF hike and Boom vs. Recession:
    plt.figure(figsize=(12, 6))
    sns.histplot(df[expansion_condition]['Securities Growth'], 
                label='Expansion', bins=lin, stat='density', alpha=0.6)
    plt.axvline(df[expansion_condition]['Securities Growth'].mean(), linestyle='--', linewidth=2, color='blue')
    sns.histplot(df[contraction_condition]['Securities Growth'], 
                label='Contraction', bins=lin, alpha=0.6, stat='density')
    plt.axvline(df[contraction_condition]['Securities Growth'].mean(), linestyle='--', linewidth=2, color='orange')
    plt.title('Securities Growth (High Inflation, FF Hike)')
    plt.xlabel('Securities Growth')
    plt.ylabel('Density')
    plt.legend()
    # print the std deviation of both distributions in the (-0.2,8) coordinate:
    plt.text(0.1, 7.5, 'Mean Expansion:             ' + str(round(df[expansion_condition]['Securities Growth'].mean(), 4)))
    plt.text(0.1, 6.5, 'Std. Expansion:             ' + str(round(df[expansion_condition]['Securities Growth'].std(), 4)))
    plt.text(0.1, 5.5, 'Mean Contraction:           ' + str(round(df[contraction_condition]['Securities Growth'].mean(), 4)))
    plt.text(0.1, 4.5, 'Std. Contraction:           ' + str(round(df[contraction_condition]['Securities Growth'].std(), 4)))
    plt.grid(True, which='both', linestyle='--', lw=0.5, alpha=0.5, color='lightgrey')
    plt.xlim(-0.25, 0.25)
    plt.show()

    #-------------------------------------------------------------------------------------------------#

    # make a histogram of the de-trended loans when the inflation is high, FF hike and Boom vs. Recession:
    plt.figure(figsize=(12, 6))
    lin = np.linspace(df['Cash Growth'].min(), 2, 150)
    sns.histplot(df[expansion_condition]['Cash Growth'], 
                label='Expansion', bins=lin, stat='density', alpha=0.6)
    plt.axvline(df[expansion_condition]['Cash Growth'].mean(), linestyle='--', linewidth=2, color='blue')
    sns.histplot(df[contraction_condition]['Cash Growth'], 
                label='Contraction', bins=lin, alpha=0.6, stat='density')
    plt.axvline(df[contraction_condition]['Cash Growth'].mean(), linestyle='--', linewidth=2, color='orange')
    plt.title('Cash Growth (High Inflation, FF Hike)')
    plt.xlabel('Cash Growth')
    plt.ylabel('Density')
    plt.legend()
    # print the std deviation of both distributions in the (-0.2,8) coordinate:
    # print the std deviation of both distributions in the (-0.2,8) coordinate:
    plt.text(1, 1.4, 'Mean Expansion:             ' + str(round(df[expansion_condition]['Cash Growth'].mean(), 4)))
    plt.text(1, 1.3, 'Std. Expansion:             ' + str(round(df[expansion_condition]['Cash Growth'].std(), 4)))
    plt.text(1, 1.2, 'Mean Contraction:           ' + str(round(df[contraction_condition]['Cash Growth'].mean(), 4)))
    plt.text(1, 1.1, 'Std. Contraction:           ' + str(round(df[contraction_condition]['Cash Growth'].std(), 4)))
    plt.grid(True, which='both', linestyle='--', lw=0.5, alpha=0.5, color='lightgrey')
    plt.show()


    # --------------------------------- Scatter Plots --------------------------------- #
    #-------------------------------------------------------------------------------------------------#

    # Create the figure
    plt.figure(figsize=(12, 6))

    # Binned scatter for expansion
    binned_scatter(
    df.loc[expansion_condition, 'Loan_Growth'], 
    df.loc[expansion_condition, 'Securities Growth'], 
    q=num_quantiles, 
    #color='blue', 
    label='Expansion'
    )

    # Binned scatter for contraction
    binned_scatter(
    df.loc[contraction_condition, 'Loan_Growth'], 
    df.loc[contraction_condition, 'Securities Growth'], 
    q=num_quantiles, 
    #color='orange', 
    label='Contraction'
    )

    # Plot settings
    plt.title('Binned Loan Growth vs. Securities Growth (Quantile Bins)')
    plt.ylabel('Loan Growth')
    plt.xlabel('Securities Growth')
    plt.ylim(-0.05, 0.12)
    plt.legend()
    plt.grid(True, which='both', linestyle='--', lw=0.5, alpha=0.5, color='lightgrey')
    plt.show()

    #-------------------------------------------------------------------------------------------------#
    # Create the figure
    plt.figure(figsize=(12, 6))

    # Binned scatter for expansion
    binned_scatter(
    df.loc[expansion_condition, 'Deposit_Growth'], 
    df.loc[expansion_condition, 'Loan_Growth'], 
    q=num_quantiles, 
    #color='blue', 
    label='Expansion'
    )

    # Binned scatter for contraction
    binned_scatter(
    df.loc[contraction_condition, 'Deposit_Growth'], 
    df.loc[contraction_condition, 'Loan_Growth'], 
    q=num_quantiles, 
    #color='orange', 
    label='Contraction'
    )

    # Plot settings
    plt.title('Binned Deposits Growth vs. Loan Growth (Quantile Bins)')
    plt.ylabel('Loan Growth')
    plt.xlabel('Deposits Growth')
    plt.ylim(-0.01, 0.05)
    plt.xlim(-0.1, 0.2)
    plt.legend()
    plt.grid(True, which='both', linestyle='--', lw=0.5, alpha=0.5, color='lightgrey')
    plt.show()

    #-------------------------------------------------------------------------------------------------#
    # Create the figure
    plt.figure(figsize=(12, 6))

    # Binned scatter for expansion
    binned_scatter(
    df.loc[expansion_condition, 'Deposit_Growth'], 
    df.loc[expansion_condition, 'Cash Growth'], 
    q=num_quantiles, 
    #color='blue', 
    label='Expansion'
    )

    # Binned scatter for contraction
    binned_scatter(
    df.loc[contraction_condition, 'Deposit_Growth'], 
    df.loc[contraction_condition, 'Cash Growth'], 
    q=num_quantiles, 
    #color='orange', 
    label='Contraction'
    )

    # Plot settings
    plt.title('Binned Deposits Growth vs. Cash Growth (Quantile Bins)')
    plt.ylabel('Cash Growth')
    plt.xlabel('Deposits Growth')
    plt.legend()
    plt.grid(True, which='both', linestyle='--', lw=0.5, alpha=0.5, color='lightgrey')
    plt.show()

    #-------------------------------------------------------------------------------------------------#
    # Create the figure
    plt.figure(figsize=(12, 6))

    # Binned scatter for expansion
    binned_scatter(
    df.loc[expansion_condition, 'Deposit_Growth'], 
    df.loc[expansion_condition, 'Securities Growth'], 
    q=num_quantiles, 
    #color='blue', 
    label='Expansion'
    )

    # Binned scatter for contraction
    binned_scatter(
    df.loc[contraction_condition, 'Deposit_Growth'], 
    df.loc[contraction_condition, 'Securities Growth'], 
    q=num_quantiles, 
    #color='orange', 
    label='Contraction'
    )

    # Plot settings
    plt.title('Binned Deposits Growth vs. Securities Growth (Quantile Bins)')
    plt.ylabel('Securities Growth')
    plt.xlabel('Deposits Growth')
    plt.legend()
    plt.grid(True, which='both', linestyle='--', lw=0.5, alpha=0.5, color='lightgrey')
    plt.show()

In [None]:
cross_sectional_plots(df, 'ff_hike', num_quantiles=50)

In [None]:
cross_sectional_plots(df, 'mp_shock', num_quantiles=50)