# Call Reports Analysis

##### Housekeeping and loading data

In [99]:
# Load packages
import os
import statsmodels.api as sm
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [100]:
path = 'C:/Users/angel/Documents/Economics/Research/Banking Project/data/clean'
path_output = 'C:/Users/angel/Documents/Economics/Research/Banking Project/data/output'

# Set path to be the directory:
os.chdir(path)

In [101]:
fed_funds = pd.read_csv('FEDFUNDS.csv', parse_dates = ['DATE'])

# Rename 'DATE' to 'Date' in fed_funds:
fed_funds = fed_funds.rename(columns = {'DATE':'Date'})

# Divide FEDFUNDS by 100 to get the interest rate in percentage terms:
fed_funds['FEDFUNDS'] = fed_funds['FEDFUNDS']/100

In [102]:
# Make the dates in fed_funds to be the last day of the previous month:
fed_funds['Date'] = fed_funds['Date'] - pd.DateOffset(days = 1)

In [103]:
# Read file that contains only the variables of interest, specify that the column 'Date' is a date:
main = pd.read_csv('call_reports.csv', parse_dates = ['Date'])

  main = pd.read_csv('call_reports.csv', parse_dates = ['Date'])


In [104]:
# drop all columns in main that start with 'Unnamed':
main = main.loc[:, ~main.columns.str.contains('^Unnamed')]

In [105]:
# merge the data:
main  = pd.merge(main, fed_funds, how = 'left', on = 'Date').sort_values(by = ['IDRSSD', 'Date'])

## Deposits

##### Making some tests

In [113]:
main['RCFD2200'] = main['RCON2200'].fillna(0) + main['RCFN2200'].fillna(0)

In [114]:
main[(main['IDRSSD'] == 480228) & (main['Date']=='2010-12-31')][['Date', 'Financial Institution Name', 'RCON2200', 'RCFN2200', 'RCFD2200']]

Unnamed: 0,Date,Financial Institution Name,RCON2200,RCFN2200,RCFD2200
550955,2010-12-31,"BANK OF AMERICA, NATIONAL ASSOCIATION",837555815,200614834.0,1038171000.0


In [115]:
main[(main['IDRSSD'] == 802866) & (main['Date'].dt.year<2007)][['Date', 'Financial Institution Name', 'RCON2200', 'RCFN2200', 'RCFD2200']]

Unnamed: 0,Date,Financial Institution Name,RCON2200,RCFN2200,RCFD2200
5691,2001-03-31,SILICON VALLEY BANK,4065675,,4065675.0
168580,2001-06-30,SILICON VALLEY BANK,3666568,,3666568.0
325696,2001-09-30,SILICON VALLEY BANK,3477299,,3477299.0
481687,2001-12-31,SILICON VALLEY BANK,3395268,,3395268.0
14324,2002-03-31,SILICON VALLEY BANK,3195018,,3195018.0
177164,2002-06-30,SILICON VALLEY BANK,3011471,,3011471.0
334255,2002-09-30,SILICON VALLEY BANK,3109589,,3109589.0
490196,2002-12-31,SILICON VALLEY BANK,3445638,,3445638.0
22775,2003-03-31,SILICON VALLEY BANK,3257976,,3257976.0
185584,2003-06-30,SILICON VALLEY BANK,3494809,,3494809.0


In [109]:
main[main['IDRSSD'] == 802866][['Date', 'Financial Institution Name', 'RCFD2170']]

Unnamed: 0,Date,Financial Institution Name,RCFD2170
5691,2001-03-31,SILICON VALLEY BANK,
168580,2001-06-30,SILICON VALLEY BANK,
325696,2001-09-30,SILICON VALLEY BANK,
481687,2001-12-31,SILICON VALLEY BANK,
14324,2002-03-31,SILICON VALLEY BANK,
...,...,...,...
619638,2021-12-31,SILICON VALLEY BANK,208581000.0
151831,2022-03-31,SILICON VALLEY BANK,217804000.0
313638,2022-06-30,SILICON VALLEY BANK,211824000.0
469734,2022-09-30,SILICON VALLEY BANK,210244000.0


In [122]:
main[(main['IDRSSD'] == 480228) & (main['Date']<'2021-09-30')][['Date', 'Financial Institution Name', 'RCFD2170']]

Unnamed: 0,Date,Financial Institution Name,RCFD2170
3413,2001-03-31,"BANK OF AMERICA, NATIONAL ASSOCIATION",5.535090e+08
166332,2001-06-30,"BANK OF AMERICA, NATIONAL ASSOCIATION",5.638440e+08
323465,2001-09-30,"BANK OF AMERICA, NATIONAL ASSOCIATION",5.750200e+08
479485,2001-12-31,"BANK OF AMERICA, NATIONAL ASSOCIATION",5.516490e+08
12143,2002-03-31,"BANK OF AMERICA, NATIONAL ASSOCIATION",5.406100e+08
...,...,...,...
302409,2020-06-30,"BANK OF AMERICA, NATIONAL ASSOCIATION",2.161656e+09
458573,2020-09-30,"BANK OF AMERICA, NATIONAL ASSOCIATION",2.157008e+09
613403,2020-12-31,"BANK OF AMERICA, NATIONAL ASSOCIATION",2.258832e+09
145633,2021-03-31,"BANK OF AMERICA, NATIONAL ASSOCIATION",2.316773e+09


In [124]:
main[(main['IDRSSD'] == 480228) & (main['Date']<'2021-09-30')][['Date', 'Financial Institution Name', 'RCONB993', 'RCFDB995']]

Unnamed: 0,Date,Financial Institution Name,RCONB993,RCFDB995
3413,2001-03-31,"BANK OF AMERICA, NATIONAL ASSOCIATION",,
166332,2001-06-30,"BANK OF AMERICA, NATIONAL ASSOCIATION",,
323465,2001-09-30,"BANK OF AMERICA, NATIONAL ASSOCIATION",,
479485,2001-12-31,"BANK OF AMERICA, NATIONAL ASSOCIATION",,
12143,2002-03-31,"BANK OF AMERICA, NATIONAL ASSOCIATION",3881000.0,19443000.0
...,...,...,...,...
302409,2020-06-30,"BANK OF AMERICA, NATIONAL ASSOCIATION",0.0,36450000.0
458573,2020-09-30,"BANK OF AMERICA, NATIONAL ASSOCIATION",0.0,36745000.0
613403,2020-12-31,"BANK OF AMERICA, NATIONAL ASSOCIATION",0.0,35406000.0
145633,2021-03-31,"BANK OF AMERICA, NATIONAL ASSOCIATION",0.0,23557000.0


##### Basic Definitions

In [None]:
# Create a new column that will contain the deposit expenditure it will sum 'RIADA517', 'RIADA518', 'RIAD4508', 'RIAD0093' for
# any date before 2017:
main['Deposit Expenditure'] = np.where(main['Date'] < '2017-01-01', main['RIADA517']+main['RIADA518']+main['RIAD4508']+main['RIAD0093'], 
                            main['RIADHK03']+main['RIADHK04']+main['RIAD4508']+main['RIAD0093'])

# Group by 'ID' and compute the difference in 'Deposit_Expenditure' to get the actual expenditure per period
main['Actual Expenditure'] = main.groupby(['IDRSSD', 'Year'])['Deposit Expenditure'].diff().fillna(main['Deposit Expenditure'])

main['Total Deposit'] = np.where(main['Date'] < '2017-01-01', main['RCON3485']+main['RCONB563']+main['RCONA514']+main['RCONA529'], 
                                     main['RCON3485']+main['RCONB563']+main['RCONHK16']+main['RCONHK17'])

# get the variation in deposits per period:
main['Deposit Variation'] = main.groupby(['IDRSSD'])['Total Deposit'].diff()

# Compute the Aggregate Deposits as the sum of all deposits in a given date:
main['Aggregate Deposits'] = main.groupby('Date')['Total Deposit'].transform('sum')

# get the variation of 'Aggregate Deposits':
main['Aggregate Deposits Variation'] = main.groupby('IDRSSD')['Aggregate Deposits'].diff()
main['Pct. Aggregate Deposits Variation'] = main.groupby('IDRSSD')['Aggregate Deposits'].pct_change()

# compute the share of deposits of a given bank in the total deposits of all banks in a given date:
main['Deposit Share'] = main['Total Deposit']/main.groupby('Date')['Total Deposit'].transform('sum')

# Calculate s_{it} - s_{it-1} (Deposit Share Variation)
main['Deposit Share Variation'] = main.groupby('IDRSSD')['Deposit Share'].diff()

In [None]:
# Rename RCONB563 as 'Savings Accounts':
main.rename(columns = {'RCONB563':'Savings Accounts', 'RCON3485': 'Transaction Accounts'}, inplace = True)

# Small TDs
main['Small TD'] = np.where(main['Date'] < '2017-01-01', main['RCONA529'], main['RCONHK16']) 
main['Small TD Exp'] = np.where(main['Date'] < '2017-01-01', main['RIADA518'], main['RIADHK03'])
main['Actual Small TD Exp'] = main.groupby(['IDRSSD', 'Year'])['Small TD Exp'].diff().fillna(main['Small TD Exp'])
main['Small TD Rate'] =   main['Actual Small TD Exp'] / main['Small TD']
main['Small TD Share'] = main['Small TD'] / main['Total Deposit']

# Large TDs
main['Large TD'] = np.where(main['Date'] < '2017-01-01', main['RCONA514'], main['RCONHK17']) 
main['Large TD Exp'] = np.where(main['Date'] < '2017-01-01', main['RIADA517'], main['RIADHK04'])
main['Actual Large TD Exp'] = main.groupby(['IDRSSD', 'Year'])['Large TD Exp'].diff().fillna(main['Large TD Exp'])
main['Large TD Rate'] = main['Actual Large TD Exp'] / main['Large TD']
main['Large TD Share'] = main['Large TD'] / main['Total Deposit']


# Savings
main['Savings Rate'] = main.groupby(['IDRSSD', 'Year'])['RIAD0093'].diff().fillna(main['RIAD0093']) / main['Savings Accounts']
main['Savings Share'] = main['Savings Accounts'] / main['Total Deposit']

# Transaction
main['Transaction Share'] = main['Transaction Accounts'] / main['Total Deposit']

In [None]:
balanced_panel = 1

if balanced_panel == 1:

    # Step 1: Count the total number of unique dates in the dataset
    total_dates = main['Date'].nunique()
    print('Number of dates:', total_dates)

    # Step 2: Count the number of unique dates each bank shows up
    bank_date_counts = main[main['Total Deposit'] > 0].groupby('IDRSSD')['Date'].nunique()
    print('Banks that show up in all dates:', sum(bank_date_counts==total_dates))

    # Step 3: Identify banks that are present in all dates with positive 'Total Deposit':
    banks = bank_date_counts[bank_date_counts == total_dates].index
    main = main[main['IDRSSD'].isin(banks)]


In [None]:
# Compute deposit rates:
main['Deposit Rate'] = main['Actual Expenditure'] / main['Total Deposit']

# Average the deposit rate by 'Date', weighting by 'Total Deposit':
main['Weighted Deposit Rate'] = main.groupby('Date')['Deposit Rate'].transform(lambda x: np.average(x, weights = main.loc[x.index, 'Total Deposit']))

In [None]:
# Create an extra column in df3 with the avg. deposit rate per Date:
main['Avg. Deposit Rate'] = main.groupby('Date')['Deposit Rate'].transform('mean')
main['Std. Deposit Rate'] = main.groupby('Date')['Deposit Rate'].transform('std')
main['R_hat'] = (main['Deposit Rate'] - main['Avg. Deposit Rate']) / main['Std. Deposit Rate']

In [None]:
# Data on repos
# Expenditure
main['Income Repo']         = main['RIAD4020']
main['Expenditure Repo']    = main['RIAD4180']

# Quantities
main['Repo Loans']          = main['RCFD3365']
main['Repo Debt']           = main['RCFD3353']

# Group by 'ID' and compute the difference in 'Deposit_Expenditure' to get the actual expenditure per period
main['Actual Expenditure Repo'] = main.groupby(['IDRSSD', 'Year'])['Expenditure Repo'].diff().fillna(main['Expenditure Repo'])
main['Actual Income Repo'] = main.groupby(['IDRSSD', 'Year'])['Expenditure Repo'].diff().fillna(main['Income Repo'])

In [None]:
# Maturity of deposits
main['Small TD Maturity'] = np.where(main['Date'] < '2017-01-01', 0.25*main['RCONA579']+15/12*main['RCONA580']+2*main['RCONA581']+3*main['RCONA582'],
                               0.25*main['RCONHK07']+15/12*main['RCONHK08']+2*main['RCONHK09']+5*main['RCONHK10'])
main['Large TD Maturity'] = np.where(main['Date'] < '2017-01-01', 0.25*main['RCONA584']+15/12*main['RCONA585']+2*main['RCONA586']+3*main['RCONA587'],
                               0.25*main['RCONHK12']+15/12*main['RCONHK13']+2*main['RCONHK14']+5*main['RCONHK15'])
main['Deposit Maturity'] = (0*(main['Transaction Accounts'] + main['Savings Accounts'])+ 5*main['RCON3200'] + main['Small TD Maturity'] + main['Large TD Maturity'])/(main['Transaction Accounts'] + main['Savings Accounts'] + main['Small TD'] + main['Large TD'])

# MISSING THE "(...) and Fed funds purchased"                         

In [None]:
df = main[[   'IDRSSD', 'Date',                                           # IDRSSD and Date 
            'Total Deposit', 'Deposit Share', 'Deposit Variation',      # Bank specific qunatity variables
            'Deposit Rate', 'R_hat',                                    # Bank specific rate variable
            'Aggregate Deposits', 'Aggregate Deposits Variation',       # Aggregate quantity variables 
            'FEDFUNDS', 'Weighted Deposit Rate'                         # Aggregate rate variables
            ]]

In [None]:
# plot the time series of 'Weighted Deposit Rate' and 'FEDFUNDS':
sns.lineplot(data = df, x = 'Date', y = 'Weighted Deposit Rate', 
             label = 'Weighted Deposit Rate', color = 'blue', linewidth = 2, linestyle = '-.')
sns.lineplot(data = df, x = 'Date', y = 'FEDFUNDS', 
             label = 'FedFunds', color = 'black', linestyle = '-', linewidth = 2)
plt.xlabel('Date')
plt.ylabel('Rate')
plt.title('Weighted Deposit Rate and FEDFUNDS')
plt.grid(color = 'lightgray', linestyle = '--', linewidth = 0.5, alpha = 0.5)
plt.legend(loc='upper center')
plt.show()

In [None]:
# Until here, dataset matches with Lucas.
#df2[(df2['Date']>'2023-01-01') & (df2['Date']<'2024-01-01')]['Deposit Rate'].describe()

##### Distribution of deposit rates

In [None]:
df_bottom = df.groupby('Date').apply(lambda x: x[(x['Total Deposit'] >= x['Total Deposit'].quantile(0.05))&
                                                 (x['Total Deposit'] <= x['Total Deposit'].quantile(0.95))]).reset_index(drop=True)
df_top = df.groupby('Date').apply(lambda x: x[x['Total Deposit'] >= x['Total Deposit'].quantile(0.95)]).reset_index(drop=True)


In [None]:
# plot the kernel distribution of the deposit rates for the above dates:
sns.kdeplot(df[df['Date']=='2001-03-31']['R_hat'], label='2001-03-31', color='black', fill=True, alpha=0.5)
sns.kdeplot(df[df['Date']=='2022-03-31']['R_hat'], label='2022-03-31', color='lightblue', fill=True, alpha=0.5)
plt.xlim(-3.1, 3.1)
plt.ylim(0, 0.75)
plt.legend()
plt.title('All Deposits')
plt.show()

# plot the kernel distribution of the deposit rates for the above dates:
sns.kdeplot(df_bottom[df_bottom['Date']=='2001-03-31']['R_hat'], label='2001-03-31', color='black', fill=True, alpha=0.5)
sns.kdeplot(df_bottom[df_bottom['Date']=='2022-03-31']['R_hat'], label='2022-03-31', color='lightblue', fill=True, alpha=0.5)
#Adjust the x-axis to be able to see the distribution better:
plt.xlim(-3.1, 3.1)
plt.ylim(0, 0.75)
plt.legend()
plt.title('Mid 90% of Total Deposits')
plt.show()



# plot the kernel distribution of the deposit rates for the above dates:
sns.kdeplot(df_top[df_top['Date']=='2001-03-31']['R_hat'], label='2001-03-31', color='black', fill=True, alpha=0.5)
sns.kdeplot(df_top[df_top['Date']=='2022-03-31']['R_hat'], label='2022-03-31', color='lightblue', fill=True, alpha=0.5)
plt.xlim(-3.1, 3.1)
plt.ylim(0, 0.75)
plt.legend()
plt.title('Top 5% of Total Deposits')
plt.show()

##### What type of funding banks use?

In [None]:
df = main[[ 'IDRSSD', 'Date', 'Financial Institution Name',                           # Dates and IDS 
            'Total Deposit', 'Small TD', 'Large TD', 
            'Savings Accounts', 'Transaction Accounts',                               # Deposit type variables
            'Deposit Share', 'Deposit Variation',                                     # Bank specific quantity variables
            'Small TD Share', 'Large TD Share', 'Savings Share', 'Transaction Share', # Deposit type shares
            'Deposit Rate', 'R_hat',                                                  # Bank specific rate variable
            'Savings Rate', 'Large TD Rate', 'Small TD Rate',                         # Deposit type rates
            'Aggregate Deposits', 'Aggregate Deposits Variation',                     # Aggregate quantity variables 
            'FEDFUNDS', 'Weighted Deposit Rate'                                       # Aggregate rate variables
            ]]

In [None]:
# take the top 10 banks in 'Total Deposit' in the most recent date, and plot their deposit rates:
top_10_today = df[df['Date'] == df['Date'].max()].nlargest(10, 'Total Deposit')['Financial Institution Name']

In [None]:
# make a dataset that has only the top 10 banks in 'Total Deposit' per year:
df_top10 = df.groupby(['Date']).apply(lambda x: x.nlargest(10, 'Total Deposit')).reset_index(drop=True)

# compute the share of each bank in the total deposits of the top 10 banks per year:
df_top10['Share in Top 10'] = df_top10['Total Deposit'] / df_top10.groupby('Date')['Total Deposit'].transform('sum')

In [None]:
# Group by 'Date' and sum the weighted shares:
df_top10_unweighted_shares = df_top10.groupby('Date')[['Small TD Share', 'Large TD Share', 
                                    'Savings Share', 'Transaction Share']].mean().reset_index()

# plot the weighted average share of each type of deposit for the top 10 banks:
df_top10_unweighted_shares.set_index('Date').plot(markersize=2, linewidth=2, 
                                color=['black', '#66B2FF', '#09BA56', '#FFC400'])
plt.xlabel('Date')
plt.ylabel('Share')
plt.ylim(0, 1)
plt.title('Average Share of each type of Deposit (Top 10 Banks)')
plt.grid(color = 'lightgray', linestyle = '--', linewidth = 0.5)
plt.show()


In [None]:
df_top10_unweighted = df_top10.groupby('Date')[['Small TD', 'Large TD', 
                                    'Savings Accounts', 'Transaction Accounts']].mean().reset_index()

# make the stacked barplot as in the cell below, but divide deposits by 1e6:
df_top10_unweighted[['Small TD', 'Large TD', 'Savings Accounts', 'Transaction Accounts']] = df_top10_unweighted[['Small TD', 'Large TD', 'Savings Accounts', 'Transaction Accounts']]/1e6

In [None]:
# make a stacked bar plot of the total deposits of the top 10 banks:
df_top10_unweighted.set_index('Date').plot(kind='bar', stacked=True, color=['black', '#66B2FF', '#09BA56', '#FFC400'])
# divide the values in the y-axis by 1e6 to make the numbers more readable:
# add just some ticks in the x-axis in the format 'YYYY-MM-DD':
plt.xticks(np.arange(0, len(df_top10_unweighted), 12), df_top10_unweighted['Date'].dt.strftime('%Y-%m-%d')[::12], rotation=45)
plt.xlabel('Date')
plt.ylabel('Total Deposits (millions of USD)')
plt.title('Total Deposits of the Top 10 Banks')
plt.grid(color = 'lightgray', linestyle = '--', linewidth = 0.1)
plt.show()

In [None]:
# compute the weighted average of the share of each type of deposit for the top 10 banks, weighted by 'Share in Top 10':
df_top10['Weighted Small TD Share'] = df_top10['Small TD Share'] * df_top10['Share in Top 10']
df_top10['Weighted Large TD Share'] = df_top10['Large TD Share'] * df_top10['Share in Top 10']
df_top10['Weighted Savings Share'] = df_top10['Savings Share'] * df_top10['Share in Top 10']
df_top10['Weighted Transaction Share'] = df_top10['Transaction Share'] * df_top10['Share in Top 10']

In [None]:
# Group by 'Date' and sum the weighted shares:
df_top10_weighted = df_top10.groupby('Date')[['Weighted Small TD Share', 'Weighted Large TD Share', 
                                    'Weighted Savings Share', 'Weighted Transaction Share']].sum().reset_index()

# plot the weighted average share of each type of deposit for the top 10 banks:
df_top10_weighted.set_index('Date').plot(markersize=2, linewidth=3, 
                                color=['black', '#66B2FF', '#09BA56', '#FFC400'])
plt.xlabel('Date')
plt.ylabel('Share')
plt.ylim(0, 1)
plt.title('Weighted Average Share of each type of Deposit (Top 10 Banks)')
plt.grid(color = 'lightgray', linestyle = '--', linewidth = 0.5)
plt.show()

##### Where does this dispersion is coming from?

In [None]:
# Create an extra column in df3 with the avg. deposit rate per Date:
df2 = df[(df['Savings Accounts']>0) & (df['Small TD']>0) & (df['Large TD']>0)].copy()

df2['Avg. Savings Rate'] = df2.groupby('Date')['Savings Rate'].transform('mean')
df2['Std. Savings Rate'] = df2.groupby('Date')['Savings Rate'].transform('std')
df2['Savings_hat'] = (df2['Savings Rate'] - df2['Avg. Savings Rate']) / df2['Std. Savings Rate']

In [None]:
df3 = df[(df['Savings Accounts']>0) & (df['Small TD']>0) & (df['Large TD']>0)].copy()
df3 = df3.groupby('Date').apply(lambda x: x[(x['Small TD'] >= x['Small TD'].quantile(0.01)) & (x['Small TD'] <= x['Small TD'].quantile(0.99))]).reset_index(drop=True)

df3['Avg. Small TD Rate'] = df3.groupby('Date')['Small TD Rate'].transform('mean')
df3['Std. Small TD Rate'] = df3.groupby('Date')['Small TD Rate'].transform('std')
df3['Small_TD_hat'] = (df3['Small TD Rate'] - df3['Avg. Small TD Rate']) / df3['Std. Small TD Rate']

In [None]:
df2_bottom = df2.groupby('Date').apply(lambda x: x[(x['Total Deposit'] >= x['Total Deposit'].quantile(0.05)) &
                                                 (x['Total Deposit'] <= 
                                                  x['Total Deposit'].quantile(0.95))]).reset_index(drop=True)
df2_top = df2.groupby('Date').apply(lambda x: x[x['Total Deposit'] >=
                                            x['Total Deposit'].quantile(0.95)]).reset_index(drop=True)

df3_bottom = df3.groupby('Date').apply(lambda x: x[(x['Total Deposit'] >= x['Total Deposit'].quantile(0.05))&
                                    (x['Total Deposit'] <= x['Total Deposit'].quantile(0.95))]).reset_index(drop=True)
df3_top = df3.groupby('Date').apply(lambda x: x[x['Total Deposit'] >= 
                                                x['Total Deposit'].quantile(0.95)]).reset_index(drop=True)

In [None]:
# plot the kernel distribution of the deposit rates for the above dates:
sns.kdeplot(df2_top[df_top['Date']=='2001-03-31']['Savings_hat'], label='2001-03-31', color='black', fill=True, alpha=0.5)
sns.kdeplot(df2_top[df_top['Date']=='2022-03-31']['Savings_hat'], label='2022-03-31', color='lightblue', fill=True, alpha=0.5)
plt.xlim(-3.1, 3.1)
plt.legend()
plt.title('Savings rates - Top 5% of Total Deposits')
plt.savefig(path_output + '/savings_top5.svg')
plt.show()

In [None]:
# plot the kernel distribution of the deposit rates for the above dates:
sns.kdeplot(df3_top[df3_top['Date']=='2001-03-31']['Small_TD_hat'], label='2001-03-31', color='black', fill=True, alpha=0.5)
sns.kdeplot(df3_top[df3_top['Date']=='2022-03-31']['Small_TD_hat'], label='2022-03-31', color='lightblue', fill=True, alpha=0.5)
#plt.xlim(-0.02, 0.02)
plt.legend()
plt.title('Small TD rates - Top 5% of Total Deposits')
plt.savefig(path_output + '/smallTD_top5.svg')
plt.show()

##### Tracking deposits

In [None]:
df = main[[ 'IDRSSD', 'Date', 'Financial Institution Name',                           # Dates and IDS 
            'Total Deposit', 'Deposit Share', 
            'Deposit Variation', 'Deposit Share Variation',                           # Bank specific quantity variables
            'Deposit Rate', 'R_hat',                                                  # Bank specific rate variable
            'Savings Rate',                                                           # Deposit type rates
            'Aggregate Deposits', 'Aggregate Deposits Variation',                     # Aggregate quantity variables 
            'FEDFUNDS', 'Weighted Deposit Rate'                                       # Aggregate rate variables
            ]]

In [None]:
# Drop the NaN values in the 'variation' variables:
#df = df.dropna(subset = ['Deposit Variation', 'Aggregate Deposits Variation'])

# compute the 'Growth Rate' variable by subtracting log of 'Total Deposit' in t-1 from log of 'Total Deposit' in t:
df['Growth Rate'] = np.log(df['Total Deposit']).diff()

In [None]:
# do the same plot as above for the top 50 banks in 'Total Deposit' in March 2019:
how_many = 50
top_how_many = df[df['Date'] == '2019-03-31'].nlargest(how_many, 'Total Deposit')['IDRSSD']

In [None]:
# Create a 'Time' column that attributes a number to each date:
df['Time'] = df.groupby('Date').ngroup()

# Regress log(Aggretate Deposits) on 'Time'. Save the de-trended series in 'Detrended Agg. Deposits':
df['Detrended Agg. Deposits'] = sm.OLS(np.log(df['Aggregate Deposits']), sm.add_constant(df['Time'])).fit().resid

In [None]:
# Plot the de-trended series:
sns.lineplot(data = df, x = 'Date', y = 'Detrended Agg. Deposits', linewidth = 2, 
             color = 'lightgrey', label = 'De-trended Log(Aggregate Deposits)')
# add the FEDFUNDS rate:
sns.lineplot(data = df, x = 'Date', y = 'FEDFUNDS', linewidth = 2, 
             color = 'black', label = 'FEDFUNDS', linestyle=':')
# add the weighted deposit rate:
sns.lineplot(data = df, x = 'Date', y = 'Weighted Deposit Rate', linewidth = 2, 
             color = 'blue', linestyle='-.', label = 'Weighted Deposit Rate')
# add a horizontal line at 0:
plt.axhline(y=0, color='red', linestyle='--')
plt.xlabel('Date')
plt.title('De-trended Log(Aggregate Deposits)')
plt.grid(color = 'lightgray', linestyle = '--', linewidth = 0.5, alpha = 0.5)
plt.legend(loc='lower left')
plt.show()

In [None]:
# for all the moments where the 'Detrended Agg. Deposits' is below 0, plot make a histogram of the 'Deposit Variation':
sns.histplot(df[(df['Detrended Agg. Deposits'] < 0) &
                (df['IDRSSD'].isin(top_how_many))
                #& (df['Date'].isin(dates_top[dates_top == how_many].index))
                ]['R_hat']*100, bins = 150, color = 'red', stat = 'density')
sns.histplot(df[(df['Detrended Agg. Deposits'] > 0) &
                (df['IDRSSD'].isin(top_how_many))
                #& (df['Date'].isin(dates_top[dates_top == how_many].index))
                ]['R_hat']*100, bins = 150, color = 'lightblue', stat = 'density')
plt.xlabel('Normalized Deposit Rates')
plt.ylabel('Density')
plt.title(f'Deposit Rates (Top {how_many} Banks)')
#plt.xlim(-1, 1)
plt.legend(['Negative Detrended Agg. Deposits', 'Positive Detrended Agg. Deposits'], loc='upper left')
plt.show()

# for all the moments where the 'Detrended Agg. Deposits' is below 0, plot make a histogram of the 'Deposit Variation':
sns.histplot(df[(df['Detrended Agg. Deposits'] < 0) &
                (~df['IDRSSD'].isin(top_how_many))]['R_hat']*100, bins = 2000, color = 'red', stat = 'density')
sns.histplot(df[(df['Detrended Agg. Deposits'] > 0) &
                (~df['IDRSSD'].isin(top_how_many))]['R_hat']*100, bins = 100, color = 'lightblue', stat = 'density')
plt.xlabel('Normalized Deposit Rate')
plt.ylabel('Density')
plt.title(f'Deposit Rates (Other Banks)')
#plt.xlim(-1, 1)
plt.legend(['Negative Detrended Agg. Deposits', 'Positive Detrended Agg. Deposits'], loc='upper left')
plt.show()

# for all the moments where the 'Detrended Agg. Deposits' is below 0, plot make a histogram of the 'Deposit Variation':
sns.histplot(df[(df['Detrended Agg. Deposits'] < 0)]['R_hat']*100, bins = 2000, color = 'red', stat = 'density')
sns.histplot(df[(df['Detrended Agg. Deposits'] > 0)]['R_hat']*100, bins = 100, color = 'lightblue', stat = 'density')
plt.xlabel('Normalized Deposit Rate')
plt.ylabel('Density')
plt.title(f'Deposit Rates (All Banks)')
#plt.xlim(-1, 1)
plt.legend(['Negative Detrended Agg. Deposits', 'Positive Detrended Agg. Deposits'], loc='upper left')
plt.show()

In [None]:
# create a variable named 'FF Hike' that is 1 if the FEDFUNDS rate increased, 0 otherwise:
df['FF Hike'] = df['FEDFUNDS'].diff().apply(lambda x: 1 if x >= 0 else 0)

# create a variable named 'FF Cut' that is 1 if the FEDFUNDS rate decreased, 0 otherwise:
df['FF Cut'] = df['FEDFUNDS'].diff().apply(lambda x: 1 if x < 0 else 0)

# compute the mean of 'FEDFUNDS' accross all dates:
mean_fedfunds = df['FEDFUNDS'].mean()

# create a variable named 'FF High' that is 1 if the FEDFUNDS rate is above the mean, 0 otherwise:
df['FF High'] = df['FEDFUNDS'].apply(lambda x: 1 if x > mean_fedfunds else 0)

In [None]:
# make a time-series of the standard deviation of the 'Deposit Rate' variable and plot it:
df['Std. Deposit Rate'] = df.groupby('Date')['Deposit Rate'].transform('std')

# plot the time series of the standard deviation of the 'Deposit Rate' variable.
sns.lineplot(data = df[df['Date']<'2023-12-31'], x = 'Date', y = 'Std. Deposit Rate', linewidth = 2, color = 'green')
# paint the left y-axis in grey, as well as the ticks and labels:
plt.yticks(color = 'green')
plt.tick_params(axis='y', colors='green')
# can you also paint the left y-label in green:
plt.ylabel('Std. of Deposit Rate', color = 'green')

# In the right y-axis, plot the FEDFUNDS rate:
ax2 = plt.twinx()
sns.lineplot(data = df[df['Date']<'2023-12-31'], x = 'Date', y = 'FEDFUNDS', linewidth = 3, color = 'black', linestyle=':', ax=ax2)
# paint the right y-axis in blue, as well as the ticks and labels:
ax2.yaxis.label.set_color('black')
ax2.tick_params(axis='y', colors='black')

plt.xlabel('Date')
plt.title('Standard Deviation of Deposit Rate and FEDFUNDS')
plt.grid(color = 'lightgray', linestyle = '--', linewidth = 0.5, alpha = 0.5)
plt.savefig(path_output+'/std_deposit_rate_fedfunds.svg', format='svg')
plt.show()


In [None]:
# plot the histogram of 'R_hat' for moments where the 'Detrended Agg. Deposits' is below/above 0, and when 'FF Hike' is 1:
sns.histplot(df[(df['FF Hike'] == 1) & 
                (df['IDRSSD'].isin(top_how_many))
                #& (df['Date'].isin(dates_top[dates_top == how_many].index))
                ]['R_hat']*100, bins = 100, color = 'red', stat = 'density', kde=False)
sns.histplot(df[(df['FF Hike'] == 0) &
                (df['IDRSSD'].isin(top_how_many))
                #& (df['Date'].isin(dates_top[dates_top == how_many].index))
                ]['R_hat']*100, bins = 100, color = 'lightblue', stat = 'density', kde=False)
plt.xlabel('Normalized Deposit Rate')
plt.ylabel('Density')
plt.title(f'Deposit Rates (Big Banks)')
#plt.xlim(-1, 1)
plt.legend(['FF Hike', 'No FF Hike'], loc='upper left')
plt.show()

# plot only the kernel density of 'R_hat' for moments where the 'Detrended Agg. Deposits' is below/above 0, and when 'FF Hike' is 1:
sns.kdeplot(df[(df['FF Hike'] == 1) & 
                (df['IDRSSD'].isin(top_how_many))
                #& (df['Date'].isin(dates_top[dates_top == how_many].index))
                ]['R_hat']*100, color = 'red', fill=True, alpha=0.5)
sns.kdeplot(df[(df['FF Hike'] == 0) &
                (df['IDRSSD'].isin(top_how_many))
                #& (df['Date'].isin(dates_top[dates_top == how_many].index))
                ]['R_hat']*100, color = 'lightblue', fill=True, alpha=0.5)
#plt.xlim(-1, 1)
plt.legend(['FF Hike', 'No FF Hike'])
plt.title('Normalized Deposit Rates (Big Banks)')
plt.savefig(path_output+'/normalized_deposit_rates_big_banks.svg', format='svg')
plt.show()

In [None]:
# plot the histogram of 'R_hat' for moments where the 'Detrended Agg. Deposits' is below/above 0, and when 'FF Hike' is 1:
sns.histplot(df[(df['FF High'] == 1) & 
                (df['IDRSSD'].isin(top_how_many))
                #& (df['Date'].isin(dates_top[dates_top == how_many].index))
                ]['R_hat']*100, bins = 100, color = 'red', stat = 'density', kde=False)
sns.histplot(df[(df['FF High'] == 0) &
                (df['IDRSSD'].isin(top_how_many))
                #& (df['Date'].isin(dates_top[dates_top == how_many].index))
                ]['R_hat']*100, bins = 100, color = 'lightblue', stat = 'density', kde=False)
plt.xlabel('Normalized Deposit Rate')
plt.ylabel('Density')
plt.title(f'Deposit Rates (Big Banks)')
#plt.xlim(-1, 1)
plt.legend(['High FFR', 'Low FFR'], loc='upper left')
plt.show()

# plot only the kernel density of 'R_hat' for moments where the 'Detrended Agg. Deposits' is below/above 0, and when 'FF Hike' is 1:
sns.kdeplot(df[(df['FF High'] == 1) & 
                (df['IDRSSD'].isin(top_how_many))
                #& (df['Date'].isin(dates_top[dates_top == how_many].index))
                ]['R_hat']*100, color = 'red', fill=True, alpha=0.5)
sns.kdeplot(df[(df['FF High'] == 0) &
                (df['IDRSSD'].isin(top_how_many))
                #& (df['Date'].isin(dates_top[dates_top == how_many].index))
                ]['R_hat']*100, color = 'lightblue', fill=True, alpha=0.5)
#plt.xlim(-1, 1)
plt.legend(['High FFR', 'Low FFR'])
plt.title('Normalized Deposit Rates (Big Banks)')
plt.savefig(path_output+'/normalized_deposit_rates_big_banks_FFR.svg', format='svg')
plt.show()

In [None]:
# make a test on the variance of 'R_hat' when 'FF Hike' is 1 or 0:
from scipy.stats import levene

stat, pval = levene(df[(df['FF Hike'] == 1) &
                (df['IDRSSD'].isin(top_how_many))
                #& (df['Date'].isin(dates_top[dates_top == how_many].index))
                ]['R_hat'], 
          df[(df['FF Hike'] == 0) &
                (df['IDRSSD'].isin(top_how_many))
                #& (df['Date'].isin(dates_top[dates_top == how_many].index))
                ]['R_hat'])

if pval<0.05:
    print('The p-value of the Levene test is:', pval)
    print('The null of equal variance is rejected.')
    print('The variance of the Deposit Rate is different when the FEDFUNDS rate is hiked.')

stat, pval = levene(df[(df['FF High'] == 1) &
            (df['IDRSSD'].isin(top_how_many))
            #& (df['Date'].isin(dates_top[dates_top == how_many].index))
            ]['R_hat'], 
      df[(df['FF High'] == 0) &
            (df['IDRSSD'].isin(top_how_many))
            #& (df['Date'].isin(dates_top[dates_top == how_many].index))
            ]['R_hat'])

if pval<0.05:
    print('The p-value of the Levene test is:', pval)
    print('The null of equal variance is rejected.')
    print('The variance of the Deposit Rate is different when the FEDFUNDS rate is above avg.')


##### Spread Beta

In [None]:
df = main[[ 'IDRSSD', 'Date', 'Financial Institution Name',                           # Dates and IDS 
            'Deposit Rate',                                                           # Deposit type rates
            'FEDFUNDS'                                                                # Aggregate rate
            ]]

In [None]:
# Step 1: Sort by bank and date to ensure correct ordering
df = df.sort_values(['IDRSSD', 'Date'])

# Step 2: Calculate the change in 'Deposit Rate' and 'FEDFUNDS'
df['Delta Deposit Rate'] = df.groupby('IDRSSD')['Deposit Rate'].diff()
df['Delta FEDFUNDS'] = df['FEDFUNDS'].diff()

# Step 3: Create lagged values of 'Delta FEDFUNDS' for up to 4 lags
for lag in range(1, 5):
    df[f'Delta FEDFUNDS_lag{lag}'] = df['Delta FEDFUNDS'].shift(lag)

# Drop rows with NaN values created by differencing and lagging
df = df.dropna().reset_index(drop=True)

# Step 4: Run the regression for each bank and store the coefficients
results = []

for bank_id, bank_data in df.groupby('IDRSSD'):
    # Set up the regression model
    X = bank_data[['Delta FEDFUNDS', 'Delta FEDFUNDS_lag1', 'Delta FEDFUNDS_lag2', 'Delta FEDFUNDS_lag3', 'Delta FEDFUNDS_lag4']]
    X = sm.add_constant(X)  # Add intercept (constant term)
    y = bank_data['Delta Deposit Rate']
    
    # Fit the model
    model = sm.OLS(y, X).fit()
    
    # Store the coefficients with the bank ID
    coeffs = model.params
    coeffs['IDRSSD'] = bank_id
    results.append(coeffs)

In [None]:
# Convert the results to a DataFrame
results_df = pd.DataFrame(results)

# Sum all the coefficients for the lags of 'Delta FEDFUNDS', put it in a column named 'Deposit Beta':
results_df['Spread Beta'] = results_df[['Delta FEDFUNDS', 'Delta FEDFUNDS_lag1', 'Delta FEDFUNDS_lag2', 'Delta FEDFUNDS_lag3', 'Delta FEDFUNDS_lag4']].sum(axis=1)

# merge results_df[['IDRSSD', 'Deposit Beta']] with df on 'IDRSSD':
df = df.merge(results_df[['IDRSSD', 'Spread Beta']], on='IDRSSD')

In [None]:
# show only one 'Deposit Beta' per bank:
df[(df['Spread Beta'] >= df['Spread Beta'].quantile(0.1)) & 
        (df['Spread Beta'] <= df['Spread Beta'].quantile(0.9))][['IDRSSD',
         'Spread Beta']].drop_duplicates().sort_values('Spread Beta')['Spread Beta'].describe()

##### Deposit Beta

In [None]:
df = main[[ 'IDRSSD', 'Date', 'Financial Institution Name',                           # Dates and IDS 
            'Total Deposit',                                                          # Bank specific quantity variables
            'FEDFUNDS'                                                                # Aggregate rate
            ]]

In [None]:
# Step 1: Sort by bank and date to ensure correct ordering
df = df.sort_values(['IDRSSD', 'Date'])

# Step 2: Calculate the change in 'FEDFUNDS'
df['Delta FEDFUNDS'] = df['FEDFUNDS'].diff()

# Compute the log changes of 'Total Deposit':
df['Log Total Deposit'] = np.log(df['Total Deposit'])
df['Delta Log Total Deposit'] = df.groupby('IDRSSD')['Log Total Deposit'].diff()

# Step 3: Create lagged values of 'Delta FEDFUNDS' for up to 4 lags
for lag in range(1, 5):
    df[f'Delta FEDFUNDS_lag{lag}'] = df['Delta FEDFUNDS'].shift(lag)

# Drop rows with NaN values created by differencing and lagging
df = df.dropna().reset_index(drop=True)

# Step 4: Run the regression for each bank and store the coefficients
results = []

for bank_id, bank_data in df.groupby('IDRSSD'):
    # Set up the regression model
    X = bank_data[['Delta FEDFUNDS', 'Delta FEDFUNDS_lag1', 'Delta FEDFUNDS_lag2', 'Delta FEDFUNDS_lag3', 'Delta FEDFUNDS_lag4']]
    X = sm.add_constant(X)  # Add intercept (constant term)
    y = bank_data['Delta Log Total Deposit']
    
    # Fit the model
    model = sm.OLS(y, X).fit()
    
    # Store the coefficients with the bank ID
    coeffs = model.params
    coeffs['IDRSSD'] = bank_id
    results.append(coeffs)
# Convert the results to a DataFrame
results_df = pd.DataFrame(results)

# Sum all the coefficients for the lags of 'Delta FEDFUNDS', put it in a column named 'Deposit Beta':
results_df['Deposit Beta'] = results_df[['Delta FEDFUNDS', 'Delta FEDFUNDS_lag1', 'Delta FEDFUNDS_lag2', 'Delta FEDFUNDS_lag3', 'Delta FEDFUNDS_lag4']].sum(axis=1)

# merge results_df[['IDRSSD', 'Deposit Beta']] with df on 'IDRSSD':
df = df.merge(results_df[['IDRSSD', 'Deposit Beta']], on='IDRSSD')
# show only one 'Deposit Beta' per bank:
df[(df['Deposit Beta'] >= df['Deposit Beta'].quantile(0.1)) & 
        (df['Deposit Beta'] <= df['Deposit Beta'].quantile(0.9))][['IDRSSD',
         'Deposit Beta']].drop_duplicates().sort_values('Deposit Beta')['Deposit Beta'].describe()

In [None]:
sns.histplot(df[(df['Deposit Beta'] >= df['Deposit Beta'].quantile(0.1)) & 
        (df['Deposit Beta'] <= df['Deposit Beta'].quantile(0.9))]['Deposit Beta'], 
        bins = 50, color = 'black', stat = 'density')
plt.xlabel('Deposit Beta')
plt.ylabel('Density')
plt.title('Histogram of Deposit Beta')
plt.grid(color = 'lightgray', linestyle = '--', linewidth = 0.5, alpha = 0.5)
plt.xlim(-4, 2)
plt.show()

##### Maturity of Deposits

In [None]:
df = main[['IDRSSD', 'Date', 'Total Deposit', 'Deposit Rate', 'Deposit Maturity', 'FEDFUNDS', 'Weighted Deposit Rate']]

In [None]:
# trim at the 1% and 99% quantiles in 'Deposit Matutiry':
df = df[(df['Deposit Maturity'] >= df['Deposit Maturity'].quantile(0.01)) & (df['Deposit Maturity'] <= df['Deposit Maturity'].quantile(0.99))]

In [None]:
# Make the same plot as above, but for the top 50 banks in 'Total Deposit' in March 2019:
top_50 = df[df['Date'] == '2019-03-31'].nlargest(50, 'Total Deposit')['IDRSSD']

# make the same plot as above considering only IDRSSD in top_50:
df[df['IDRSSD'].isin(top_50)].groupby('Date')['Deposit Maturity'].mean().plot(linewidth=3)
# plot now considering the IDRSSD that are NOT in top_50:
df[~df['IDRSSD'].isin(top_50)].groupby('Date')['Deposit Maturity'].mean().plot(linewidth=3)
plt.xlabel('Date')
plt.ylabel('Maturity')
plt.title('Maturity of Deposits')
plt.legend(['Top 50 Banks', 'Small Banks'])
plt.grid(color = 'lightgray', linestyle = '--', linewidth = 0.5, alpha = 0.5)
plt.show()