In [1]:
########################################################################
# Required for Jupyter Notebooks to find the src directory.
import sys
sys.path.append('../')
########################################################################

import pandas as pd

from src.data_loading.loaders import DataRetriever
from src.metrics.volatility import Volatility

## Load Data from Data Directory

Read transformed data.

In [2]:
excel_loader = DataRetriever.create(
    path_or_url='../data/transformed_data.xlsx'
)

data = excel_loader.load_data()

print(data.shape)
data.head()

(312, 33)


Unnamed: 0,date,real_gdp,gov_prod,_merge,real_gdpp,real_gdp_log,real_gdpp_log,real_gdp_log_diff,real_gdpp_log_diff,real_gdp_hp_cycle,...,real_gdp_log_bk_cycle,real_gdp_log_bk_trend,real_gdp_log_lin_cycle,real_gdp_log_lin_trend,real_gdpp_log_hp_cycle,real_gdpp_log_hp_trend,real_gdpp_log_bk_cycle,real_gdpp_log_bk_trend,real_gdpp_log_lin_cycle,real_gdpp_log_lin_trend
0,1947-01-01,2182.681,560.515,both,1622.166,7.688309,7.391518,,,68.06473,...,,,-0.12525,7.813559,0.000377,7.391141,,,0.006187,7.385331
1,1947-04-01,2176.892,560.091,both,1616.801,7.685653,7.388205,-0.002656,-0.003313,37.021396,...,,,-0.135578,7.821231,-0.006498,7.394703,,,-0.005628,7.393833
2,1947-07-01,2172.432,560.034,both,1612.398,7.683603,7.385478,-0.002051,-0.002727,7.264522,...,,,-0.1453,7.828903,-0.012787,7.398265,,,-0.016858,7.402336
3,1947-10-01,2206.452,555.556,both,1650.896,7.699141,7.409073,0.015539,0.023596,15.879428,...,,,-0.137433,7.836575,0.00725,7.401824,,,-0.001765,7.410838
4,1948-01-01,2239.682,563.895,both,1675.787,7.714089,7.424038,0.014948,0.014965,23.525896,...,,,-0.130157,7.844246,0.018671,7.405367,,,0.004697,7.419341


## Volatility Metrics

In [3]:
metrics = Volatility(dataframe=data)

### Standard Deviation

In [4]:
std_cols = {
    'Levels': ('real_gdp', 'real_gdpp'),
    'Log Transformed': ('real_gdp_log', 'real_gdpp_log'),
    'Log Differences': ('real_gdp_log_diff', 'real_gdpp_log_diff'),
    'HP Filtered': ('real_gdp_hp_cycle', 'real_gdpp_hp_cycle'),
    'BK Filtered': ('real_gdp_bk_cycle', 'real_gdpp_bk_cycle'),
    'Linear Detrended': ('real_gdp_lin_cycle', 'real_gdpp_lin_cycle'),
    'Log HP Filtered': ('real_gdp_log_hp_cycle', 'real_gdpp_log_hp_cycle'),
    'Log BK Filtered': ('real_gdp_log_bk_cycle', 'real_gdpp_log_bk_cycle'),
    'Log Linear Detrended': ('real_gdp_log_lin_cycle', 'real_gdpp_log_lin_cycle')
}

In [5]:
# Create an empty DataFrame to store the results with index for GDP and GDPP.
std_results = pd.DataFrame(index=['GDP', 'GDPP', 'Diff'])

# Calculate the standard deviation of GDP and GDPP for each series.
for series, (gdp, gdpp) in std_cols.items():
    std_gdp = metrics.standard_deviation(column=gdp)
    std_gdpp = metrics.standard_deviation(column=gdpp)
    diff = std_gdp - std_gdpp
    std_results[series] = [std_gdp, std_gdpp, diff]

std_results

# Convert the results to a markdown table.
# std_results.to_markdown(floatfmt=".3f")

Unnamed: 0,Levels,Log Transformed,Log Differences,HP Filtered,BK Filtered,Linear Detrended,Log HP Filtered,Log BK Filtered,Log Linear Detrended
GDP,6212.930385,0.696144,0.011152,167.370734,147.711596,1187.931541,0.016334,0.014947,0.07514
GDPP,5291.84675,0.771394,0.015757,182.649695,162.991481,1171.34402,0.025009,0.023749,0.081994
Diff,921.083635,-0.07525,-0.004606,-15.278961,-15.279884,16.587521,-0.008675,-0.008802,-0.006855


## Coefficient of Variation

In [6]:
cov_cols = {
    'Levels': ('real_gdp', 'real_gdpp'),
    'Log Transformed': ('real_gdp_log', 'real_gdpp_log'),
    'Log Differences': ('real_gdp_log_diff', 'real_gdpp_log_diff'),
    'HP Filtered': ('real_gdp_hp_cycle', 'real_gdpp_hp_cycle'),
    'BK Filtered': ('real_gdp_bk_cycle', 'real_gdpp_bk_cycle'),
    'Linear Detrended': ('real_gdp_lin_cycle', 'real_gdpp_lin_cycle'),
    'Log HP Filtered': ('real_gdp_log_hp_cycle', 'real_gdpp_log_hp_cycle'),
    'Log BK Filtered': ('real_gdp_log_bk_cycle', 'real_gdpp_log_bk_cycle'),
    'Log Linear Detrended': ('real_gdp_log_lin_cycle', 'real_gdpp_log_lin_cycle')
}

In [7]:
# Create an empty DataFrame to store the results with index for GDP and GDPP.
cov_results = pd.DataFrame(index=['GDP', 'GDPP', 'Diff'])

# Calculate the standard deviation of GDP and GDPP for each series.
for series, (gdp, gdpp) in std_cols.items():

    # If the series is a filtered series, use the reference column to
    # calculate the coefficient of variation. This avoids division by
    # very small numbers.
    if series in ['HP Filtered', 'BK Filtered', 'Linear Detrended']:
        cov_gdp = metrics.coefficient_of_variation(column=gdp, reference_col='real_gdp')
        cov_gdpp = metrics.coefficient_of_variation(column=gdpp, reference_col='real_gdpp')
    elif series in ['Log HP Filtered', 'Log BK Filtered', 'Log Linear Detrended']:
        cov_gdp = metrics.coefficient_of_variation(column=gdp, reference_col='real_gdp_log')
        cov_gdpp = metrics.coefficient_of_variation(column=gdpp, reference_col='real_gdpp_log')
    else:
        cov_gdp = metrics.coefficient_of_variation(column=gdp)
        cov_gdpp = metrics.coefficient_of_variation(column=gdpp)
    
    diff = cov_gdp - cov_gdpp
    cov_results[series] = [cov_gdp, cov_gdpp, diff]

cov_results

# Convert the results to a markdown table.
# cov_results.to_markdown(floatfmt=".3f")

Unnamed: 0,Levels,Log Transformed,Log Differences,HP Filtered,BK Filtered,Linear Detrended,Log HP Filtered,Log BK Filtered,Log Linear Detrended
GDP,0.61226,0.077293,1.458458,0.016494,0.014556,0.117066,0.001814,0.00166,0.008343
GDPP,0.67071,0.08859,1.969046,0.02315,0.020658,0.148461,0.002872,0.002727,0.009417
Diff,-0.058451,-0.011296,-0.510588,-0.006656,-0.006102,-0.031395,-0.001059,-0.001068,-0.001074
