#### Housekeeping

In [17]:
# Load packages
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [18]:
path = 'C:/Users/angel/Documents/Economics/Research/Banking Project/data/clean'

# Set path to be the directory:
os.chdir(path)

In [19]:
# Load information on Fed Funds Rate:
fed_funds = pd.read_csv('FEDFUNDS.csv', parse_dates = ['DATE'])

# Rename 'DATE' to 'Date' in fed_funds:
fed_funds = fed_funds.rename(columns = {'DATE':'Date'})

# Divide FEDFUNDS by 100 to get the interest rate in percentage terms:
fed_funds['FEDFUNDS'] = fed_funds['FEDFUNDS']/100

# Make the dates in fed_funds to be the last day of the previous month:
fed_funds['Date'] = fed_funds['Date'] - pd.DateOffset(days = 1)

In [20]:
# Load the CSV_TRANSFORMATION file:
mergers = pd.read_csv('CSV_TRANSFORMATIONS.csv', parse_dates = ['D_DT_TRANS'])

# Rename the date column to 'Date':
mergers = mergers.rename(columns = {'D_DT_TRANS': 'Date', '#ID_RSSD_PREDECESSOR': 'IDRSSD_PRE', 
                                    'ID_RSSD_SUCCESSOR': 'IDRSSD_SUC'})

In [21]:
# Read the call reports:
cr = pd.read_csv('call_reports.csv', parse_dates = ['Date'], low_memory = False)

# drop all columns in main that start with 'Unnamed':
cr = cr.loc[:, ~cr.columns.str.contains('^Unnamed')]

### Balance Sheet (Schedule RC)

In [None]:
cr['Total Assets'] = cr['RCFD2170']
cr['Total Deposits'] = cr['RCON2200']
cr['Loans'] = cr['RCFD5369']+cr['RCFDB529']
cr['Fed Funds Purchased'] = cr['RCONB993']+cr['RCFDB995']

In [None]:
# Variables derived from the raw variables

# Group by 'ID' and compute the difference in 'Deposit_Expenditure' to get the actual expenditure per period
cr['Actual Expenditure'] = cr.groupby(['IDRSSD', 'Year'])['Deposit Expenditure'].diff().fillna(cr['Deposit Expenditure'])


# Compute the Aggregate Deposits as the sum of all deposits in a given date:
cr['Aggregate Deposits'] = cr.groupby('Date')['Total Deposit'].transform('sum')

# get the variation of 'Aggregate Deposits':
cr['Aggregate Deposits Variation'] = cr.groupby('IDRSSD')['Aggregate Deposits'].diff()
cr['Pct. Aggregate Deposits Variation'] = cr.groupby('IDRSSD')['Aggregate Deposits'].pct_change()

# compute the share of deposits of a given bank in the total deposits of all banks in a given date:
cr['Deposit Share'] = cr['Total Deposit']/cr.groupby('Date')['Total Deposit'].transform('sum')

# Calculate s_{it} - s_{it-1} (Deposit Share Variation)
cr['Deposit Share Variation'] = cr.groupby('IDRSSD')['Deposit Share'].diff()

cr['Actual Small TD Exp'] = cr.groupby(['IDRSSD', 'Year'])['Small TD Exp'].diff().fillna(cr['Small TD Exp'])
cr['Small TD Rate'] =   cr['Actual Small TD Exp'] / cr['Small TD']
cr['Small TD Share'] = cr['Small TD'] / cr['Total Deposit']

cr['Actual Large TD Exp'] = cr.groupby(['IDRSSD', 'Year'])['Large TD Exp'].diff().fillna(cr['Large TD Exp'])
cr['Large TD Rate'] = cr['Actual Large TD Exp'] / cr['Large TD']
cr['Large TD Share'] = cr['Large TD'] / cr['Total Deposit']

cr['Savings Share'] = cr['Savings Accounts'] / cr['Total Deposit']

# Transaction
cr['Transaction Share'] = cr['Transaction Accounts'] / cr['Total Deposit']



# Group by 'ID' and compute the difference in 'Deposit_Expenditure' to get the actual expenditure per period
cr['Actual Expenditure Repo'] = cr.groupby(['IDRSSD', 'Year'])['Expenditure Repo'].diff().fillna(cr['Expenditure Repo'])
cr['Actual Income Repo'] = cr.groupby(['IDRSSD', 'Year'])['Expenditure Repo'].diff().fillna(cr['Income Repo'])


In [None]:
# Compute deposit rates:
#cr['Deposit Rate'] = cr['Actual Expenditure'] / cr['Total Deposit']

# Create an extra column in df3 with the avg. deposit rate per Date:
#cr['Avg. Deposit Rate'] = cr.groupby('Date')['Deposit Rate'].transform('mean')
#cr['Std. Deposit Rate'] = cr.groupby('Date')['Deposit Rate'].transform('std')
#cr['R_hat'] = (cr['Deposit Rate'] - cr['Avg. Deposit Rate']) / cr['Std. Deposit Rate']

# Average the deposit rate by 'Date', weighting by 'Total Deposit':
#cr['Weighted Deposit Rate'] = cr.groupby('Date')['Deposit Rate'].transform(lambda x: np.average(x, weights = cr.loc[x.index, 'Total Deposit']))

#### Assets

### Loans

In [None]:
# Create income from loans
cr['Interest Income Loans'] = cr['RIAD4010'] + cr['RIAD4065']
cr['Actual Interest Income on Loans'] = cr.groupby(['IDRSSD', 'Year'])['Interest Income Loans'].diff().fillna(cr['Interest Income Loans'])


### Securities

In [24]:
cr['Interest Income in Securities'] = cr['RIADB488']
cr['Securities'] = cr['RCFDB558']