In [None]:
#### THIS NOTEBOOK RUNS MRI PROJECT FUNCTIONALITY

In [1]:
# Expanding visibility zone for Python engine to make HH Modules seen
import sys 
sys.path.append('../..')

# Importing functions from custom modules
from HH_Modules.hh_dates import hh_create_bus_dates
from HH_Modules.hh_dates import hh_drop_nyse_closures
from HH_Modules.hh_mri import hh_build_mri_from_model
from HH_Modules.hh_mri import hh_standartize_mri_data
from HH_Modules.hh_mri import hh_aggregate_mri_data

# Importing standard modules and date-special modules
import numpy as np
import pandas as pd
from datetime import date
from datetime import timedelta

In [2]:
# CONSTANTS AND PARAMETERS SETTING

# Standart date format for notebook
date_format = '%Y-%m-%d'
# Starting date to buil MRI
first_date_stamp = date(1998, 1, 1)
# Ending date to buil MRI
last_date_stamp = date(2018, 12, 31)

# Source xlsx file attributes
path_mri_data_xlsx = 'Data_Files/Source_Files/mri_data.xlsx'
mri_model_name = 'Model 01'

# HDF5 file with structured source data for selected date interval attributes
path_mri_data_hdf = 'Data_Files/Source_Files/mri_data.h5'
object_mri_data_hdf = 'source_data'

# Date to check early values filling
standart_date_stamp = date(2001, 1, 1)
standart_date = standart_date_stamp.strftime(date_format)

# Limitations for rolling windows for z-score calculating
roll_window_min = 252 # One business year
roll_window_max = 252 * 100 # Hundred business years
# Limitations for z-score winsorizing
winsor_bound_bottom = -5
winsor_bound_top = 5

# Limitations for moving average for MRI calculation
MRI_moving_average_window_max = 5

# HDF5 with group matrices builded from z-scored means of standartized winsorized weighted z-score matrices for each group asset
path_mri_standart_hdf = 'Data_Files/Source_Files/mri_group_z_matrix.h5'

# HDF5 with asset level info
path_mri_assets_hdf = 'Data_Files/Source_Files/mri_released_assets.h5'
object_selected_data_hdf = 'selected_data'
object_standartized_data_hdf = 'standartized_data'

# HDF5 with group level info
path_mri_groups_hdf = 'Data_Files/Source_Files/mri_released_groups.h5'
object_diag_grouped_hdf = 'diag_grouped_data'
object_perc_grouped_hdf = 'percentile_grouped_data'

# HDF5 with MRI level info
path_mri_index_hdf = 'Data_Files/Source_Files/mri_released_index.h5'
object_diag_mri_hdf = 'diag_MRI_data'
object_raw_perc_mri_hdf = 'raw_perc_MRI_data'
object_ma_perc_mri_hdf = 'ma_perc_MRI_data'

In [3]:
# Business dates index creating
date_index = hh_create_bus_dates('date', first_date_stamp, last_date_stamp, 'day')
# NYSE closure dates dropping from business dates index
date_index = hh_drop_nyse_closures(date_index)

# Converting xlsx data to data tables: choose commented True alternative option to regenerate h5 file from xlsx
#[df_model_asset, df_model_MRI, df_selected_data] = hh_build_mri_from_model(path_mri_data_xlsx, mri_model_name, 
#                                                                           path_mri_data_hdf, object_mri_data_hdf, date_index, True)
[df_model_asset, df_model_MRI, df_selected_data] = hh_build_mri_from_model(path_mri_data_xlsx, mri_model_name, 
                                                                           path_mri_data_hdf, object_mri_data_hdf, date_index, False)

hh_create_bus_dates: Business dates index for period from 1998-01-01 to 2018-12-31 with day interval successfully generated
hh_drop_nyse_closures: NYSE closure dates successfully dropped from date index
hh_build_mri_from_model: Model profile successfully read
hh_build_mri_from_model: Group border rows successfully dropped
hh_build_mri_from_model: Group sum weights control successfully performed
hh_build_mri_from_model: Model asset part extracted
hh_build_mri_from_model: Model MRI part extracted
hh_build_mri_from_model: HDF5 file taken as is because of update refusing
hh_build_mri_from_model: Limited data from HDF5 file Data_Files/Source_Files/mri_data.h5 extracted successfully
hh_build_mri_from_model: Missed border date rows in limited data table added
hh_missing_data_manager: np.Nan substitution with option previous performed successfully
hh_missing_data_manager: Overall count of actual np.Nan values in data table is 7252
hh_build_mri_from_model: Missed data in limited data table fill

In [4]:
# THIS CELL CAN BE SKIPPED TO AVOID HARD CALCULATIONS IN CASE OF CONSTANT MODEL AND SOURCE DATASETS - DATA FOR FURTHER CALCULATIONS WIIL BE TAKEN FROM HDF5 FILES!

# Standartizing dataset
# Building collection of standartized winsorized z-scores for all assets
# Building collection of group's z matrices diagonales for all groups
# Building collection of percentiled group's z matrices for all groups
[df_standartized_assets, df_diag_mean_z_groups, df_percentiled_groups] = hh_standartize_mri_data(df_model_asset, df_selected_data, standart_date, roll_window_min, 
                                                                                                 roll_window_max, winsor_bound_bottom, winsor_bound_top, 
                                                                                                 path_mri_standart_hdf)

# Saving results for assets to HDF5 to avoid hard calculations with constant source model and datasets
df_selected_data = df_selected_data.astype(float)
(df_selected_data.reset_index()).to_hdf(path_mri_assets_hdf, key = object_selected_data_hdf, mode = 'a', format = 'fixed')
df_standartized_assets = df_standartized_assets.astype(float)
(df_standartized_assets.unstack(level = 0).reset_index()).to_hdf(path_mri_assets_hdf, key = object_standartized_data_hdf, mode = 'a', format = 'fixed')

# Saving results for groups to HDF5 to avoid hard calculations with constant source model and datasets
df_diag_mean_z_groups = df_diag_mean_z_groups.astype(float)
(df_diag_mean_z_groups.unstack(level = 0).reset_index()).to_hdf(path_mri_groups_hdf, key = object_diag_grouped_hdf, mode = 'a', format = 'fixed')
df_percentiled_groups = df_percentiled_groups.astype(float)
(df_percentiled_groups.unstack(level = 0).reset_index()).to_hdf(path_mri_groups_hdf, key = object_perc_grouped_hdf, mode = 'a', format = 'fixed')

hh_standartize_mri_data: basic asset for group EQ determined succesfully: iv_us
hh_standartize_mri_data: basic asset for group FI determined succesfully: oas_hy
hh_standartize_mri_data: basic asset for group FX determined succesfully: fx_gbp
hh_standartize_mri_data: group EQ standartizing started
hh_standartize_mri_data: asset iv_us in group EQ standartized successfully
hh_standartize_mri_data: asset iv_eu in group EQ standartized successfully
hh_standartize_mri_data: asset iv_uk in group EQ standartized successfully
hh_standartize_mri_data: asset iv_jp in group EQ standartized successfully
hh_standartize_mri_data: asset iv_rvx in group EQ standartized successfully
hh_standartize_mri_data: asset iv_eem in group EQ standartized successfully
hh_standartize_mri_data: weighted mean matrix for group EQ builded successfully
hh_standartize_mri_data: z-score matrix for group EQ weighted mean matrix builded successfully
hh_standartize_mri_data: z-score matrix for group EQ saved to HDF5 file Dat

In [5]:
# THIS CELL CAN BE SKIPPED TO AVOID HARD CALCULATIONS IN CASE OF CONSTANT MODEL AND SOURCE DATASETS - DATA FOR FURTHER PLOTTING WIIL BE TAKEN FROM HDF5 FILES!

# Building diagonale of weighted mean z-score matrix builded from z-score group matrices
# Building weighted mean of percemtiled z-matrices for groups   
# Building result of moving average for weighted mean of percemtiled z-matrices for groups
[ser_diag_MRI, ser_raw_perc_mean, ser_ma_perc_mean] = hh_aggregate_mri_data(df_model_MRI, path_mri_standart_hdf, path_mri_groups_hdf, object_perc_grouped_hdf, 
                                                                            MRI_moving_average_window_max)

# Saving results for groups to HDF5 to avoid hard calculations with constant source model and datasets
ser_diag_MRI = ser_diag_MRI.astype(float)
ser_diag_MRI.to_hdf(path_mri_index_hdf, key = object_diag_mri_hdf, mode = 'a', format = 'fixed')
ser_raw_perc_mean = ser_raw_perc_mean.astype(float)
ser_raw_perc_mean.to_hdf(path_mri_index_hdf, key = object_raw_perc_mri_hdf, mode = 'a', format = 'fixed')
ser_ma_perc_mean = ser_ma_perc_mean.astype(float)
ser_ma_perc_mean.to_hdf(path_mri_index_hdf, key = object_ma_perc_mri_hdf, mode = 'a', format = 'fixed')

hh_aggregate_mri_data: group EQ z matrix data extracted successfully
hh_aggregate_mri_data: group FI z matrix data extracted successfully
hh_aggregate_mri_data: group FX z matrix data extracted successfully
hh_aggregate_mri_data: weighted mean matrix for MRI builded successfully
hh_aggregate_mri_data: z-score matrix for MRI weighted mean matrix builded successfully
hh_aggregate_mri_data: group EQ percentiled data vector extracted successfully
hh_aggregate_mri_data: group FI percentiled data vector extracted successfully
hh_aggregate_mri_data: group FX percentiled data vector extracted successfully
hh_aggregate_mri_data: weighted data vector from percentiled group matrices for MRI and moving average for this vector builded successfully
