In [6]:
import os
print("Current working directory:", os.getcwd())

Current working directory: /Users/ching-lung/deposit-prediction


## Data Preprocessing

This file is used for data pre-processing. The main goal is to merge the macroeconomic predictors with the targeted responses, including interest-bearing deposits, non-interesting-bearing deposits, and time deposits.

In [8]:
# imported packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [10]:
# Import the CSV file
db  = pd.read_csv("/Users/ching-lung/deposit-prediction/raw_data/stresstest_baseline.csv")
dsa = pd.read_csv("/Users/ching-lung/deposit-prediction/raw_data/stresstest_severely_adverse.csv")
dea = pd.read_csv("/Users/ching-lung/deposit-prediction/raw_data/stresstest_exploratory_A.csv")
deb = pd.read_csv("/Users/ching-lung/deposit-prediction/raw_data/stresstest_exploratory_B.csv")

# Display the first few rows
db.head(3)

Unnamed: 0,Scenario Name,Date,Real GDP growth,Nominal GDP growth,Real disposable income growth,Nominal disposable income growth,Unemployment rate,CPI inflation rate,3-month Treasury rate,5-year Treasury yield,10-year Treasury yield,BBB corporate yield,Mortgage rate,Prime rate,Dow Jones Total Stock Market Index (Level),House Price Index (Level),Commercial Real Estate Price Index (Level),Market Volatility Index (Level)
0,Supervisory Baseline,2024 Q1,1.0,3.2,2.5,4.6,3.9,2.4,5.3,4.2,4.1,5.8,6.5,8.4,47787.5,311.6,350.2,24.6
1,Supervisory Baseline,2024 Q2,0.7,2.9,1.8,4.0,4.1,2.3,5.0,4.0,4.0,5.7,6.1,8.1,47787.5,312.8,351.5,26.0
2,Supervisory Baseline,2024 Q3,0.9,3.1,1.8,4.0,4.2,2.4,4.6,3.9,3.9,5.7,5.8,7.7,47787.5,313.9,352.8,26.8


## Baseline

In [44]:
db['Date'] = db['Date'].str.replace(' ', '')

# Initial economic values (in absolute numbers from 1976 Q1 data)
initial_gdp = {
    'Nominal GDP': 28620,  # $28620 billion
    'Real GDP': 23050,     # $23050	 billion
    'Nominal disposable income': 20800,  # $1,366.3 billion
    'Real disposable income': 17600      # $1,358.0 billion
}

# Add initial GDP (and income) for each category with float64 type
gdp_columns = []
for category, initial_value in initial_gdp.items():
    gdp_column = f'{category}'
    gdp_columns.append(gdp_column)
    db[gdp_column] = pd.Series(dtype='float64')  # Ensure the column is float64
    db.loc[0, gdp_column] = round(initial_value, 2)  # Set initial value for the first year

# Compute values iteratively for all categories
for category in initial_gdp.keys():
    gdp_column = f'{category}'
    growth_column = f'{category} growth'
    for i in range(1, len(db)):
        db.loc[i, gdp_column] = round(db.loc[i - 1, gdp_column] * (1 + db.loc[i, growth_column] / 100), 2)

db.to_csv("/Users/ching-lung/deposit-prediction/cleaned_data/stress_basline.csv", index=False)

# Display the final DataFrame
db.head(3)

Unnamed: 0,Scenario Name,Date,Real GDP growth,Nominal GDP growth,Real disposable income growth,Nominal disposable income growth,Unemployment rate,CPI inflation rate,3-month Treasury rate,5-year Treasury yield,...,Mortgage rate,Prime rate,Dow Jones Total Stock Market Index (Level),House Price Index (Level),Commercial Real Estate Price Index (Level),Market Volatility Index (Level),Nominal GDP,Real GDP,Nominal disposable income,Real disposable income
0,Supervisory Baseline,2024Q1,1.0,3.2,2.5,4.6,3.9,2.4,5.3,4.2,...,6.5,8.4,47787.5,311.6,350.2,24.6,28620.0,23050.0,20800.0,17600.0
1,Supervisory Baseline,2024Q2,0.7,2.9,1.8,4.0,4.1,2.3,5.0,4.0,...,6.1,8.1,47787.5,312.8,351.5,26.0,29449.98,23211.35,21632.0,17916.8
2,Supervisory Baseline,2024Q3,0.9,3.1,1.8,4.0,4.2,2.4,4.6,3.9,...,5.8,7.7,47787.5,313.9,352.8,26.8,30362.93,23420.25,22497.28,18239.3


## Severely Adverse

In [46]:
dsa['Date'] = dsa['Date'].str.replace(' ', '')

# Initial economic values (in absolute numbers from 1976 Q1 data)
initial_gdp = {
    'Nominal GDP': 28620,  # $28620 billion
    'Real GDP': 23050,     # $23050	 billion
    'Nominal disposable income': 20800,  # $1,366.3 billion
    'Real disposable income': 17600      # $1,358.0 billion
}

# Add initial GDP (and income) for each category with float64 type
gdp_columns = []
for category, initial_value in initial_gdp.items():
    gdp_column = f'{category}'
    gdp_columns.append(gdp_column)
    dsa[gdp_column] = pd.Series(dtype='float64')  # Ensure the column is float64
    dsa.loc[0, gdp_column] = round(initial_value, 2)  # Set initial value for the first year

# Compute values iteratively for all categories
for category in initial_gdp.keys():
    gdp_column = f'{category}'
    growth_column = f'{category} growth'
    for i in range(1, len(dsa)):
        dsa.loc[i, gdp_column] = round(dsa.loc[i - 1, gdp_column] * (1 + dsa.loc[i, growth_column] / 100), 2)

dsa.to_csv("/Users/ching-lung/deposit-prediction/cleaned_data/stress_severely_adverse.csv", index=False)

# Display the final DataFrame
dsa.head(3)

Unnamed: 0,Scenario Name,Date,Real GDP growth,Nominal GDP growth,Real disposable income growth,Nominal disposable income growth,Unemployment rate,CPI inflation rate,3-month Treasury rate,5-year Treasury yield,...,Mortgage rate,Prime rate,Dow Jones Total Stock Market Index (Level),House Price Index (Level),Commercial Real Estate Price Index (Level),Market Volatility Index (Level),Nominal GDP,Real GDP,Nominal disposable income,Real disposable income
0,Supervisory Severely Adverse,2024Q1,-11.6,-9.9,-7.8,-6.0,5.6,2.3,2.1,0.4,...,4.0,5.1,26130.6,261.4,338.5,65.0,28620.0,23050.0,20800.0,17600.0
1,Supervisory Severely Adverse,2024Q2,-6.7,-5.7,-4.0,-2.8,6.8,1.5,0.2,0.3,...,3.7,3.2,22761.8,241.1,328.0,70.0,26988.66,21505.65,20217.6,16896.0
2,Supervisory Severely Adverse,2024Q3,-8.0,-7.1,-4.2,-3.2,8.1,1.3,0.1,0.4,...,3.8,3.1,21799.3,225.4,314.0,61.4,25072.47,19785.2,19570.64,16186.37


## Exploratory Condition A

In [48]:
dea['Date'] = dea['Date'].str.replace(' ', '')

# Initial economic values (in absolute numbers from 1976 Q1 data)
initial_gdp = {
    'Nominal GDP': 28620,  # $28620 billion
    'Real GDP': 23050,     # $23050	 billion
    'Nominal disposable income': 20800,  # $1,366.3 billion
    'Real disposable income': 17600      # $1,358.0 billion
}

# Add initial GDP (and income) for each category with float64 type
gdp_columns = []
for category, initial_value in initial_gdp.items():
    gdp_column = f'{category}'
    gdp_columns.append(gdp_column)
    dea[gdp_column] = pd.Series(dtype='float64')  # Ensure the column is float64
    dea.loc[0, gdp_column] = round(initial_value, 2)  # Set initial value for the first year

# Compute values iteratively for all categories
for category in initial_gdp.keys():
    gdp_column = f'{category}'
    growth_column = f'{category} growth'
    for i in range(1, len(dea)):
        dea.loc[i, gdp_column] = round(dea.loc[i - 1, gdp_column] * (1 + dea.loc[i, growth_column] / 100), 2)

dea.to_csv("/Users/ching-lung/deposit-prediction/cleaned_data/stress_exploratory_A.csv", index=False)

# Display the final DataFrame
dea.head(3)

Unnamed: 0,Scenario Name,Date,Real GDP growth,Nominal GDP growth,Real disposable income growth,Nominal disposable income growth,Unemployment rate,CPI inflation rate,3-month Treasury rate,5-year Treasury yield,...,Mortgage rate,Prime rate,Dow Jones Total Stock Market Index (Level),House Price Index (Level),Commercial Real Estate Price Index (Level),Market Volatility Index (Level),Nominal GDP,Real GDP,Nominal disposable income,Real disposable income
0,Exploratory Conditions A,2024Q1,-1.3,-0.4,2.2,4.9,4.2,3.0,6.2,5.5,...,7.8,9.2,36067.7,296.7,339.6,45.0,28620.0,23050.0,20800.0,17600.0
1,Exploratory Conditions A,2024Q2,-2.1,1.3,-0.5,3.8,4.7,4.5,6.5,5.0,...,7.5,9.5,34244.7,284.2,330.3,50.0,28992.06,22565.95,21590.4,17512.0
2,Exploratory Conditions A,2024Q3,-3.6,0.7,-1.7,3.2,5.5,5.3,6.8,4.7,...,7.3,9.8,33723.8,271.9,318.6,45.4,29195.0,21753.58,22281.29,17214.3


## Exploratory Condition B

In [50]:
deb['Date'] = deb['Date'].str.replace(' ', '')

# Initial economic values (in absolute numbers from 1976 Q1 data)
initial_gdp = {
    'Nominal GDP': 28620,  # $28620 billion
    'Real GDP': 23050,     # $23050	 billion
    'Nominal disposable income': 20800,  # $1,366.3 billion
    'Real disposable income': 17600      # $1,358.0 billion
}

# Add initial GDP (and income) for each category with float64 type
gdp_columns = []
for category, initial_value in initial_gdp.items():
    gdp_column = f'{category}'
    gdp_columns.append(gdp_column)
    deb[gdp_column] = pd.Series(dtype='float64')  # Ensure the column is float64
    deb.loc[0, gdp_column] = round(initial_value, 2)  # Set initial value for the first year

# Compute values iteratively for all categories
for category in initial_gdp.keys():
    gdp_column = f'{category}'
    growth_column = f'{category} growth'
    for i in range(1, len(deb)):
        deb.loc[i, gdp_column] = round(deb.loc[i - 1, gdp_column] * (1 + deb.loc[i, growth_column] / 100), 2)

deb.to_csv("/Users/ching-lung/deposit-prediction/cleaned_data/stress_exploratory_B.csv", index=False)

# Display the final DataFrame
deb.head(3)

Unnamed: 0,Scenario Name,Date,Real GDP growth,Nominal GDP growth,Real disposable income growth,Nominal disposable income growth,Unemployment rate,CPI inflation rate,3-month Treasury rate,5-year Treasury yield,...,Mortgage rate,Prime rate,Dow Jones Total Stock Market Index (Level),House Price Index (Level),Commercial Real Estate Price Index (Level),Market Volatility Index (Level),Nominal GDP,Real GDP,Nominal disposable income,Real disposable income
0,Exploratory Conditions B,2024Q1,-3.8,-2.4,1.0,4.1,4.5,3.4,5.9,4.9,...,7.9,8.9,26123.4,268.1,337.8,65.0,28620.0,23050.0,20800.0,17600.0
1,Exploratory Conditions B,2024Q2,-1.8,2.1,-0.5,4.4,5.0,5.1,6.2,5.3,...,8.1,9.2,22753.4,251.8,326.6,70.0,29221.02,22635.1,21715.2,17512.0
2,Exploratory Conditions B,2024Q3,-13.2,-8.6,-8.1,-2.9,7.1,6.0,5.9,4.9,...,8.0,8.9,21790.6,235.5,312.6,61.4,26708.01,19647.27,21085.46,16093.53
