In [1]:
# THIS NOTEBOOK SHOWS EXAMPLES OF CUSTOM AND PYTHON FUNCTIONS APPLICATION FOR THE PURPOSES OF MRI PROJECT

In [2]:
# Expanding visibility zone for Python engine to make HH Modules seen
import sys 
sys.path.append('../..')

In [3]:
# Importing functions from hh_dates HH Module
from HH_Modules.hh_dates import hh_create_bus_dates
from HH_Modules.hh_dates import hh_drop_nyse_closures
from HH_Modules.hh_ts import hh_missing_data_manager
from HH_Modules.hh_ts import hh_rolling_percentile
from HH_Modules.hh_ts import hh_rolling_simple_MA
from HH_Modules.hh_ts import hh_rolling_z_score
from HH_Modules.hh_mri import hh_build_mri_from_model
from HH_Modules.hh_mri import hh_standartize_mri_data

In [4]:
# Importing standard modules and date-special modules
import numpy as np
import pandas as pd
from datetime import date
from datetime import timedelta

In [5]:
# Examples of using hh_dates function hh_create_bus_dates
date_format = '%Y-%m-%d'
null_date_ML_number = 693962
null_date_stamp = date(1900, 1, 1)
first_date_stamp = date(1998, 1, 1)
last_date_stamp = date(2018, 12, 31)
standart_date_stamp = date(2001, 1, 1)
null_date = null_date_stamp.strftime(date_format)
first_date = first_date_stamp.strftime(date_format)
last_date = last_date_stamp.strftime(date_format)
standart_date = standart_date_stamp.strftime(date_format)

date_index_days_str = hh_create_bus_dates('string', first_date, last_date, 'day')
date_index_weeks_str = hh_create_bus_dates('string', first_date, last_date, 'week')
date_index_monthes_str = hh_create_bus_dates('string', first_date, last_date, 'month')
date_index_quarters_str = hh_create_bus_dates('string', first_date, last_date, 'quarter')
date_index_years_str = hh_create_bus_dates('string', first_date, last_date, 'year')

date_index_days_date = hh_create_bus_dates('date', first_date_stamp, last_date_stamp, 'day')
date_index_weeks_date = hh_create_bus_dates('date', first_date_stamp, last_date_stamp, 'week')
date_index_monthes_date = hh_create_bus_dates('date', first_date_stamp, last_date_stamp, 'month')
date_index_quarters_date = hh_create_bus_dates('date', first_date_stamp, last_date_stamp, 'quarter')
date_index_years_date = hh_create_bus_dates('date', first_date_stamp, last_date_stamp, 'year')

print('First date: ', first_date)
print('Last date: ', last_date)
print('Business days (string type): ', date_index_days_str)
print('Business weeks ends (date type): ', date_index_weeks_date)
print('Business monthes ends (string type): ', date_index_monthes_str)
print('Business quarters ends (date type): ', date_index_quarters_date)
print('Business years ends (string type): ', date_index_years_str)

hh_create_bus_dates: Business dates index for period from 1998-01-01 to 2018-12-31 with day interval successfully generated
hh_create_bus_dates: Business dates index for period from 1998-01-01 to 2018-12-31 with week interval successfully generated
hh_create_bus_dates: Business dates index for period from 1998-01-01 to 2018-12-31 with month interval successfully generated
hh_create_bus_dates: Business dates index for period from 1998-01-01 to 2018-12-31 with quarter interval successfully generated
hh_create_bus_dates: Business dates index for period from 1998-01-01 to 2018-12-31 with year interval successfully generated
hh_create_bus_dates: Business dates index for period from 1998-01-01 to 2018-12-31 with day interval successfully generated
hh_create_bus_dates: Business dates index for period from 1998-01-01 to 2018-12-31 with week interval successfully generated
hh_create_bus_dates: Business dates index for period from 1998-01-01 to 2018-12-31 with month interval successfully generat

In [6]:
# Generating Timeseries for comparing MatLab and Python hh_create_bus_dates functionality
calendar_delta = first_date_stamp - null_date_stamp

date_index_calendar_days = pd.date_range(first_date, last_date, freq = 'D')

ser_all_days = pd.Series(np.arange(date_index_calendar_days.size) + null_date_ML_number + calendar_delta.days, 
                         index = date_index_calendar_days.date, name = 'ML Numbers')

ser_bus_days = ser_all_days[date_index_days_str]
ser_bus_days.index.name = 'Business Dates'
ser_bus_days.head(), ser_bus_days.tail()

(Business Dates
 1998-01-01    729756
 1998-01-02    729757
 1998-01-05    729760
 1998-01-06    729761
 1998-01-07    729762
 Freq: B, Name: ML Numbers, dtype: int32, Business Dates
 2018-12-25    737419
 2018-12-26    737420
 2018-12-27    737421
 2018-12-28    737422
 2018-12-31    737425
 Freq: B, Name: ML Numbers, dtype: int32)

In [7]:
# Generating xlsx file for comparing MatLab and Python hh_create_bus_dates functionality on example of business days daily
xls_writer = pd.ExcelWriter('Data_Files/Test_Files/bus_dts.xlsx')
ser_bus_days.to_excel(xls_writer, 'daily')
xls_writer.save()

In [8]:
# Examples of using hh_dates function hh_drop_nyse_closures
calendar_delta = first_date_stamp - null_date_stamp

date_nyse_index_days = hh_drop_nyse_closures(date_index_days_str)
ser_bus_nyse_days = pd.Series(0, index = date_nyse_index_days.date, name = 'ML Numbers')
ser_bus_nyse_days.index.name = 'Business Dates Wihtout NYSE Closures'
ser_bus_nyse_days.head(), ser_bus_nyse_days.tail()

hh_drop_nyse_closures: NYSE closure dates successfully dropped from date index


(Business Dates Wihtout NYSE Closures
 1998-01-02    0
 1998-01-05    0
 1998-01-06    0
 1998-01-07    0
 1998-01-08    0
 Name: ML Numbers, dtype: int64, Business Dates Wihtout NYSE Closures
 2018-12-24    0
 2018-12-26    0
 2018-12-27    0
 2018-12-28    0
 2018-12-31    0
 Name: ML Numbers, dtype: int64)

In [9]:
# Generating xlsx file for comparing MatLab and Python hh_drop_nyse_closures functionality on example of business days daily
xls_writer = pd.ExcelWriter('Data_Files/Test_Files/bus_nyse_dts.xlsx')
ser_bus_nyse_days.to_excel(xls_writer, 'daily')
xls_writer.save()
# identified one discrepancy: 2018-12-05 - NYSE closure day for Python library and valid business day for MatLab function

In [10]:
# Examples of using hh_ts function hh_missing_data_manager
df_substitution_test = pd.DataFrame(np.arange(15).reshape(5, 3), index = date_index_days_str[:5], columns = ['Atos', 'Portos', 'Aramis'])
df_substitution_test.iloc[:2, 0] = 30
df_substitution_test.iloc[4, 2] = 10
df_substitution_test.iloc[1, 2] = np.NaN
df_substitution_test.iloc[2, :] = np.NaN
df_substitution_test.iloc[4, :2] = np.NaN
df_substitution_test

Unnamed: 0_level_0,Atos,Portos,Aramis
Business Dates,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1998-01-01,30.0,1.0,2.0
1998-01-02,30.0,4.0,
1998-01-05,,,
1998-01-06,9.0,10.0,11.0
1998-01-07,,,10.0


In [11]:
df_substituted_clear = hh_missing_data_manager(df_substitution_test, manage_option = 'clear')
df_substituted_clear

hh_missing_data_manager: np.Nan substitution with option clear performed successfully
hh_missing_data_manager: Overall count of actual np.Nan values in data table is 0


Unnamed: 0_level_0,Atos,Portos,Aramis
Business Dates,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1998-01-01,30.0,1.0,2.0
1998-01-06,9.0,10.0,11.0


In [12]:
df_substituted_mean = hh_missing_data_manager(df_substitution_test, manage_option = 'mean')
df_substituted_mean

hh_missing_data_manager: np.Nan substitution with option mean performed successfully
hh_missing_data_manager: Overall count of actual np.Nan values in data table is 0


Unnamed: 0_level_0,Atos,Portos,Aramis
Business Dates,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1998-01-01,30.0,1.0,2.0
1998-01-02,30.0,4.0,7.666667
1998-01-05,23.0,5.0,7.666667
1998-01-06,9.0,10.0,11.0
1998-01-07,23.0,5.0,10.0


In [13]:
df_substituted_median = hh_missing_data_manager(df_substitution_test, manage_option = 'median')
df_substituted_median

hh_missing_data_manager: np.Nan substitution with option median performed successfully
hh_missing_data_manager: Overall count of actual np.Nan values in data table is 0


Unnamed: 0_level_0,Atos,Portos,Aramis
Business Dates,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1998-01-01,30.0,1.0,2.0
1998-01-02,30.0,4.0,10.0
1998-01-05,30.0,4.0,10.0
1998-01-06,9.0,10.0,11.0
1998-01-07,30.0,4.0,10.0


In [14]:
df_substituted_previous = hh_missing_data_manager(df_substitution_test, manage_option = 'previous')
df_substituted_previous

hh_missing_data_manager: np.Nan substitution with option previous performed successfully
hh_missing_data_manager: Overall count of actual np.Nan values in data table is 0


Unnamed: 0_level_0,Atos,Portos,Aramis
Business Dates,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1998-01-01,30.0,1.0,2.0
1998-01-02,30.0,4.0,2.0
1998-01-05,30.0,4.0,2.0
1998-01-06,9.0,10.0,11.0
1998-01-07,9.0,10.0,10.0


In [15]:
df_substituted_previous_lag = hh_missing_data_manager(df_substitution_test, manage_option = 'previous', prev_lag = 1)
df_substituted_previous_lag

hh_missing_data_manager: np.Nan substitution with option previous performed successfully
hh_missing_data_manager: Overall count of actual np.Nan values in data table is 1


Unnamed: 0_level_0,Atos,Portos,Aramis
Business Dates,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1998-01-01,30.0,1.0,2.0
1998-01-02,30.0,4.0,2.0
1998-01-05,30.0,4.0,
1998-01-06,9.0,10.0,11.0
1998-01-07,9.0,10.0,10.0


In [16]:
# Example of using hh_mri function hh_build_mri_from_model: choose commented True alternative option to regenerate h5 file from xlsx
path_mri_data_xlsx = 'Data_Files/Source_Files/mri_data.xlsx'
mri_model_name = 'Model 01'
path_mri_data_hdf = 'Data_Files/Source_Files/mri_data.h5'
object_mri_data_hdf = 'source_data'
#[df_model_asset, df_model_MRI, df_selected_data] = hh_build_mri_from_model(path_mri_data_xlsx, mri_model_name, 
#                                                                           path_mri_data_hdf, object_mri_data_hdf, date_nyse_index_days, True)
[df_model_asset, df_model_MRI, df_selected_data] = hh_build_mri_from_model(path_mri_data_xlsx, mri_model_name, 
                                                                           path_mri_data_hdf, object_mri_data_hdf, date_nyse_index_days, False)

hh_build_mri_from_model: Model profile successfully read
hh_build_mri_from_model: Group border rows successfully dropped
hh_build_mri_from_model: Group sum weights control successfully performed
hh_build_mri_from_model: Model asset part extracted
hh_build_mri_from_model: Model MRI part extracted
hh_build_mri_from_model: HDF5 file taken as is because of update refusing
hh_build_mri_from_model: Limited data from HDF5 file Data_Files/Source_Files/mri_data.h5 extracted successfully
hh_build_mri_from_model: Missed border date rows in limited data table added
hh_missing_data_manager: np.Nan substitution with option previous performed successfully
hh_missing_data_manager: Overall count of actual np.Nan values in data table is 7252
hh_build_mri_from_model: Missed data in limited data table filled successfully


In [17]:
df_selected_data.head(), df_selected_data.tail()

(            iv_us  iv_eu  iv_uk  iv_jp  iv_rvx  iv_eem  oas_em  oas_hy  \
 Date                                                                     
 1998-01-02  23.42    NaN    NaN    NaN     NaN     NaN     NaN    2.88   
 1998-01-05  24.36    NaN    NaN    NaN     NaN     NaN     NaN    2.88   
 1998-01-06  25.66    NaN    NaN    NaN     NaN     NaN     NaN    2.88   
 1998-01-07  25.07    NaN    NaN    NaN     NaN     NaN     NaN    2.88   
 1998-01-08  26.01    NaN    NaN    NaN     NaN     NaN     NaN    2.88   
 
             fx_gbp  fx_eur  fx_jpy  fx_chf  
 Date                                        
 1998-01-02    9.85     NaN  13.500   11.05  
 1998-01-05   10.05     NaN  14.350   11.45  
 1998-01-06   10.40     NaN  14.575   12.05  
 1998-01-07   10.45     NaN  14.500   12.05  
 1998-01-08   10.45     NaN  14.600   11.75  ,
             iv_us    iv_eu   iv_uk  iv_jp  iv_rvx  iv_eem  oas_em  oas_hy  \
 Date                                                                   

In [18]:
# Generating xlsx file for comparing MatLab and Python hh_build_mri_from_model functionality
xls_writer = pd.ExcelWriter('Data_Files/Test_Files/extracted_and_filled.xlsx')
df_selected_data.to_excel(xls_writer, 'Python')
xls_writer.save()
# No discrepancies identified except earlier 2018-12-05 question - NYSE closure for Python library and valid business day for MatLab function

In [19]:
df_model_asset

Unnamed: 0,Asset Group,Asset Group Description,Asset Code,Asset Tab Name,Asset Desc,Processing Routine,Factor Weights
0,EQ,MRI Equity IVol Grp,iv_us,VIX,CBOE SPX VOLATILITY INDX,x,0.166667
1,EQ,MRI Equity IVol Grp,iv_eu,V2X,VSTOXX Index,x,0.166667
2,EQ,MRI Equity IVol Grp,iv_uk,VFTSE,FTSE100 Volatility Index,x,0.166667
3,EQ,MRI Equity IVol Grp,iv_jp,VNKY,NIKKEI Volatility Index,x,0.166667
4,EQ,MRI Equity IVol Grp,iv_rvx,RVX,CBOE RSL2000 VOLATILTY I,x,0.166667
5,EQ,MRI Equity IVol Grp,iv_eem,VXEEM,CBOE EM ETF Volatility,x,0.166667
6,FI,MRI Fixed Income Grp,oas_em,EMUSOAS,Bloomberg Barclays EM USD Aggr,x,0.5
7,FI,MRI Fixed Income Grp,oas_hy,LF98OAS,Bloomberg Barclays US Corporat,x,0.5
8,FX,MRI FOREX IVol Grp,fx_gbp,GBPUSD,GBP-USD OPT VOL 3M,x,0.25
9,FX,MRI FOREX IVol Grp,fx_eur,EURUSD,EUR-USD OPT VOL 3M,x,0.25


In [20]:
df_model_MRI

Unnamed: 0,Asset Group,Asset Group Description,Asset Code,Asset Tab Name,Asset Desc,Processing Routine,Factor Weights
0,MRI,Market Risk Indicator,EQ,,MRI Equity IVol Grp,x,0.333333
1,MRI,Market Risk Indicator,FI,,MRI Fixed Income Grp,x,0.333333
2,MRI,Market Risk Indicator,FX,,MRI FOREX IVol Grp,x,0.333333


In [21]:
# Example of using hh_ts function hh_rolling_percentile
ser_ranks_less_any = hh_rolling_percentile(df_selected_data['iv_eu'], 252, 252000, min_interpretation = 'any', manage_option = 'less')
ser_ranks_less_equal_any = hh_rolling_percentile(df_selected_data['iv_eu'], 252, 252000, min_interpretation = 'any', manage_option = 'less_equal')
ser_ranks_mean_any = hh_rolling_percentile(df_selected_data['iv_eu'], 252, 252000, min_interpretation = 'any', manage_option = 'mean')
ser_ranks_wnd_any = hh_rolling_percentile(df_selected_data['iv_us'], 30, 60, min_interpretation = 'any', manage_option = 'mean')
ser_ranks_less_notNaN = hh_rolling_percentile(df_selected_data['iv_eu'], 252, 252000, manage_option = 'less')
ser_ranks_less_equal_notNaN = hh_rolling_percentile(df_selected_data['iv_eu'], 252, 252000, manage_option = 'less_equal')
ser_ranks_mean_notNaN = hh_rolling_percentile(df_selected_data['iv_eu'], 252, 252000, manage_option = 'mean')
ser_ranks_wnd_notNaN = hh_rolling_percentile(df_selected_data['iv_us'], 30, 60, manage_option = 'mean')

hh_rolling_percentile: Percentile rank calculation with min_interpretation any and option less performed successfully
hh_rolling_percentile: Percentile rank calculation with min_interpretation any and option less_equal performed successfully
hh_rolling_percentile: Percentile rank calculation with min_interpretation any and option mean performed successfully
hh_rolling_percentile: Percentile rank calculation with min_interpretation any and option mean performed successfully
hh_rolling_percentile: Percentile rank calculation with min_interpretation not_NaN and option less performed successfully
hh_rolling_percentile: Percentile rank calculation with min_interpretation not_NaN and option less_equal performed successfully
hh_rolling_percentile: Percentile rank calculation with min_interpretation not_NaN and option mean performed successfully
hh_rolling_percentile: Percentile rank calculation with min_interpretation not_NaN and option mean performed successfully


In [22]:
# Generating xlsx file for comparing MatLab and Python hh_rolling_percentile functionality
xls_writer = pd.ExcelWriter('Data_Files/Test_Files/python_pctiles.xlsx')
ser_ranks_less_any.to_excel(xls_writer, 'Less any')
ser_ranks_less_equal_any.to_excel(xls_writer, 'Less_Equal any')
ser_ranks_mean_any.to_excel(xls_writer, 'Mean any')
ser_ranks_wnd_any.to_excel(xls_writer, 'Short_Window any')
ser_ranks_less_notNaN.to_excel(xls_writer, 'Less not NaN')
ser_ranks_less_equal_notNaN.to_excel(xls_writer, 'Less_Equal not NaN')
ser_ranks_mean_notNaN.to_excel(xls_writer, 'Mean not NaN')
ser_ranks_wnd_notNaN.to_excel(xls_writer, 'Short_Window not NaN')
xls_writer.save()

In [23]:
# Example of using hh_ts function hh_rolling_simple_MA
ser_SMA_long_any = hh_rolling_simple_MA(np.log(df_selected_data['iv_eu']), 252, 252000, min_interpretation = 'any')
ser_SMA_short_any = hh_rolling_simple_MA(np.log(df_selected_data['iv_us']), 30, 60, min_interpretation = 'any', factor_period = 'day')
ser_SMA_long_notNaN = hh_rolling_simple_MA(np.log(df_selected_data['iv_eu']), 252, 252000)
ser_SMA_short_notNaN = hh_rolling_simple_MA(np.log(df_selected_data['iv_us']), 30, 60, factor_period = 'day')

hh_rolling_simple_MA: Moving average calculation with min_interpretation any performed successfully
hh_rolling_simple_MA: Moving average calculation with min_interpretation any performed successfully
hh_rolling_simple_MA: Moving average calculation with min_interpretation not_NaN performed successfully
hh_rolling_simple_MA: Moving average calculation with min_interpretation not_NaN performed successfully


In [24]:
# Generating xlsx file for comparing MatLab and Python hh_rolling_simple_MA functionality
xls_writer = pd.ExcelWriter('Data_Files/Test_Files/python_SMA.xlsx')
ser_SMA_long_any.to_excel(xls_writer, 'SMA long any')
ser_SMA_short_any.to_excel(xls_writer, 'SMA short any')
ser_SMA_long_notNaN.to_excel(xls_writer, 'SMA long not NaN')
ser_SMA_short_notNaN.to_excel(xls_writer, 'SMA short not NaN')
xls_writer.save()

In [25]:
# Example of using hh_ts function hh_rolling_z_score
[df_z_score, df_z_matrix] = hh_rolling_z_score(np.log(df_selected_data['iv_us']), min_wnd = 252, max_wnd = 25200, winsor_option = 'value', winsor_bottom = -5, winsor_top = 5, fill_option = 'backfill')

hh_rolling_z_score: Mean, Std and Z Score series calculated successfully
hh_rolling_z_score: Z Matrix values calculated successfully
hh_rolling_z_score: Rolling winsorized Z Score series calculated successfully
hh_rolling_z_score: Calculating Z Score data with winsor_option value and fill_option backfill performed successfully


In [26]:
# Generating xlsx file for comparing MatLab and Python hh_rolling_z_score functionality
xls_writer = pd.ExcelWriter('Data_Files/Test_Files/python_Z_score.xlsx')
df_z_score.to_excel(xls_writer, 'Z Score series')
df_z_matrix.iloc[[0, 251, 252, 253, 501, 502, 503, 504, 5281, 5282], :].to_excel(xls_writer, 'Z Matrix')
xls_writer.save()

In [27]:
###################################################################################################################################

In [28]:
roll_window_min = 252 # One business year
roll_window_max = 252 * 100 # Hundred business years
winsor_bound_bottom = -5
winsor_bound_top = 5

In [29]:
df_result = hh_standartize_mri_data(df_model_asset, df_selected_data, standart_date, roll_window_min, roll_window_max, winsor_bound_bottom, winsor_bound_top)

hh_standartize_mri_data: group EQ started standartizing
hh_standartize_mri_data: asset iv_us in group EQ started standartizing
hh_rolling_z_score: Mean, Std and Z Score series calculated successfully
hh_rolling_z_score: Z Matrix values calculated successfully
hh_rolling_z_score: Rolling winsorized Z Score series calculated successfully
hh_rolling_z_score: Calculating Z Score data with winsor_option value and fill_option backfill performed successfully
hh_standartize_mri_data: asset iv_us in group EQ standartized successfully
hh_standartize_mri_data: asset iv_eu in group EQ started standartizing
hh_rolling_z_score: Mean, Std and Z Score series calculated successfully
hh_rolling_z_score: Z Matrix values calculated successfully
hh_rolling_z_score: Rolling winsorized Z Score series calculated successfully
hh_rolling_z_score: Calculating Z Score data with winsor_option value and fill_option backfill performed successfully
hh_standartize_mri_data: asset iv_eu in group EQ standartized success

In [31]:
# Generating xlsx file for comparing MatLab and Python hh_rolling_z_score functionality
xls_writer = pd.ExcelWriter('Data_Files/Test_Files/python_group_mean.xlsx')
df_result.iloc[[0, 251, 252, 253, 501, 502, 503, 504, 5281, 5282], :].to_excel(xls_writer, 'Mean Matrix')
xls_writer.save()

NameError: name 'panel_group' is not defined

In [None]:
df_test_one = pd.DataFrame(np.arange(6).reshape(3,2))
df_test_two = pd.DataFrame(np.arange(6).reshape(3,2) * 10)
df_test_one.iloc[1, :] = np.NaN
df_test_two.iloc[1, 1] = np.NaN
df_test_two.iloc[2, 0] = np.NaN
arr_test = [0.5, 0.5]
df_test_wages = pd.DataFrame(np.zeros(df_test_one.shape))
df_test_ones = pd.DataFrame(np.ones(6).reshape(3,2) * 10)
df_test_one_w = df_test_one * arr_test[0]
df_test_wages = df_test_wages + df_test_one.notna() * arr_test[0]
df_test_two_w = df_test_two * arr_test[1]
df_test_wages = df_test_wages + df_test_two.notna() * arr_test[1]
df_concat = pd.concat([df_test_one_w, df_test_two_w], keys = ['one', 'two'], copy = False)
print(df_test_one)
print(df_test_two)
print(df_test_one_w)
print(df_test_two_w)
df_test_final = df_concat.sum(level = 1)
df_test_final[df_test_wages > 0] =  df_test_final[df_test_wages > 0] / df_test_wages[df_test_wages > 0]
df_test_final[df_test_wages == 0] = np.NaN
print(df_test_final)

In [None]:
np.zeros(df_concat.shape)

In [None]:
df_concat.sum(level = 1)

In [None]:
df_test_wages