In [None]:
# THIS NOTEBOOK SHOWS EXAMPLES OF CUSTOM AND PYTHON FUNCTIONS APPLICATION FOR THE PURPOSES OF MRI PROJECT

In [None]:
# Expanding visibility zone for Python engine to make HH Modules seen
import sys 
sys.path.append('../..')

In [None]:
# Importing functions from hh_dates HH Module
from HH_Modules.hh_dates import hh_create_bus_dates
from HH_Modules.hh_dates import hh_drop_nyse_closures
from HH_Modules.hh_ts import hh_missing_data_manager
from HH_Modules.hh_ts import hh_rolling_percentile
from HH_Modules.hh_ts import hh_rolling_simple_MA
from HH_Modules.hh_ts import hh_rolling_z_score
from HH_Modules.hh_mri import hh_build_mri_from_model
from HH_Modules.hh_mri import hh_standartize_mri_data

In [None]:
# Importing standard modules and date-special modules
import numpy as np
import pandas as pd
from datetime import date
from datetime import timedelta

In [None]:
# Examples of using hh_dates function hh_create_bus_dates
date_format = '%Y-%m-%d'
null_date_ML_number = 693962
null_date_stamp = date(1900, 1, 1)
first_date_stamp = date(1998, 1, 1)
last_date_stamp = date(2018, 12, 31)
standart_date_stamp = date(2001, 1, 1)
null_date = null_date_stamp.strftime(date_format)
first_date = first_date_stamp.strftime(date_format)
last_date = last_date_stamp.strftime(date_format)
standart_date = standart_date_stamp.strftime(date_format)

date_index_days_str = hh_create_bus_dates('string', first_date, last_date, 'day')
date_index_weeks_str = hh_create_bus_dates('string', first_date, last_date, 'week')
date_index_monthes_str = hh_create_bus_dates('string', first_date, last_date, 'month')
date_index_quarters_str = hh_create_bus_dates('string', first_date, last_date, 'quarter')
date_index_years_str = hh_create_bus_dates('string', first_date, last_date, 'year')

date_index_days_date = hh_create_bus_dates('date', first_date_stamp, last_date_stamp, 'day')
date_index_weeks_date = hh_create_bus_dates('date', first_date_stamp, last_date_stamp, 'week')
date_index_monthes_date = hh_create_bus_dates('date', first_date_stamp, last_date_stamp, 'month')
date_index_quarters_date = hh_create_bus_dates('date', first_date_stamp, last_date_stamp, 'quarter')
date_index_years_date = hh_create_bus_dates('date', first_date_stamp, last_date_stamp, 'year')

print('First date: ', first_date)
print('Last date: ', last_date)
print('Business days (string type): ', date_index_days_str)
print('Business weeks ends (date type): ', date_index_weeks_date)
print('Business monthes ends (string type): ', date_index_monthes_str)
print('Business quarters ends (date type): ', date_index_quarters_date)
print('Business years ends (string type): ', date_index_years_str)

In [None]:
# Generating Timeseries for comparing MatLab and Python hh_create_bus_dates functionality
calendar_delta = first_date_stamp - null_date_stamp

date_index_calendar_days = pd.date_range(first_date, last_date, freq = 'D')

ser_all_days = pd.Series(np.arange(date_index_calendar_days.size) + null_date_ML_number + calendar_delta.days, 
                         index = date_index_calendar_days.date, name = 'ML Numbers')

ser_bus_days = ser_all_days[date_index_days_str]
ser_bus_days.index.name = 'Business Dates'
ser_bus_days.head(), ser_bus_days.tail()

In [None]:
# Generating xlsx file for comparing MatLab and Python hh_create_bus_dates functionality on example of business days daily
xls_writer = pd.ExcelWriter('Data_Files/Test_Files/bus_dts.xlsx')
ser_bus_days.to_excel(xls_writer, 'daily')
xls_writer.save()

In [None]:
# Examples of using hh_dates function hh_drop_nyse_closures
calendar_delta = first_date_stamp - null_date_stamp

date_nyse_index_days = hh_drop_nyse_closures(date_index_days_str)
ser_bus_nyse_days = pd.Series(0, index = date_nyse_index_days.date, name = 'ML Numbers')
ser_bus_nyse_days.index.name = 'Business Dates Wihtout NYSE Closures'
ser_bus_nyse_days.head(), ser_bus_nyse_days.tail()

In [None]:
# Generating xlsx file for comparing MatLab and Python hh_drop_nyse_closures functionality on example of business days daily
xls_writer = pd.ExcelWriter('Data_Files/Test_Files/bus_nyse_dts.xlsx')
ser_bus_nyse_days.to_excel(xls_writer, 'daily')
xls_writer.save()
# identified one discrepancy: 2018-12-05 - NYSE closure day for Python library and valid business day for MatLab function

In [None]:
# Examples of using hh_ts function hh_missing_data_manager
df_substitution_test = pd.DataFrame(np.arange(15).reshape(5, 3), index = date_index_days_str[:5], columns = ['Atos', 'Portos', 'Aramis'])
df_substitution_test.iloc[:2, 0] = 30
df_substitution_test.iloc[4, 2] = 10
df_substitution_test.iloc[1, 2] = np.NaN
df_substitution_test.iloc[2, :] = np.NaN
df_substitution_test.iloc[4, :2] = np.NaN
df_substitution_test

In [None]:
df_substituted_clear = hh_missing_data_manager(df_substitution_test, manage_option = 'clear')
df_substituted_clear

In [None]:
df_substituted_mean = hh_missing_data_manager(df_substitution_test, manage_option = 'mean')
df_substituted_mean

In [None]:
df_substituted_median = hh_missing_data_manager(df_substitution_test, manage_option = 'median')
df_substituted_median

In [None]:
df_substituted_previous = hh_missing_data_manager(df_substitution_test, manage_option = 'previous')
df_substituted_previous

In [None]:
df_substituted_previous_lag = hh_missing_data_manager(df_substitution_test, manage_option = 'previous', prev_lag = 1)
df_substituted_previous_lag

In [None]:
# Example of using hh_mri function hh_build_mri_from_model: choose commented True alternative option to regenerate h5 file from xlsx
path_mri_data_xlsx = 'Data_Files/Source_Files/mri_data.xlsx'
mri_model_name = 'Model 01'
path_mri_data_hdf = 'Data_Files/Source_Files/mri_data.h5'
object_mri_data_hdf = 'source_data'
#[df_model_asset, df_model_MRI, df_selected_data] = hh_build_mri_from_model(path_mri_data_xlsx, mri_model_name, 
#                                                                           path_mri_data_hdf, object_mri_data_hdf, date_nyse_index_days, True)
[df_model_asset, df_model_MRI, df_selected_data] = hh_build_mri_from_model(path_mri_data_xlsx, mri_model_name, 
                                                                           path_mri_data_hdf, object_mri_data_hdf, date_nyse_index_days, False)

In [None]:
df_selected_data.head(), df_selected_data.tail()

In [None]:
# Generating xlsx file for comparing MatLab and Python hh_build_mri_from_model functionality
xls_writer = pd.ExcelWriter('Data_Files/Test_Files/extracted_and_filled.xlsx')
df_selected_data.to_excel(xls_writer, 'Python')
xls_writer.save()
# No discrepancies identified except earlier 2018-12-05 question - NYSE closure for Python library and valid business day for MatLab function

In [None]:
df_model_asset

In [None]:
df_model_MRI

In [None]:
# Example of using hh_ts function hh_rolling_percentile
ser_ranks_less_any = hh_rolling_percentile(df_selected_data['iv_eu'], 252, 252000, min_interpretation = 'any', manage_option = 'less')
ser_ranks_less_equal_any = hh_rolling_percentile(df_selected_data['iv_eu'], 252, 252000, min_interpretation = 'any', manage_option = 'less_equal')
ser_ranks_mean_any = hh_rolling_percentile(df_selected_data['iv_eu'], 252, 252000, min_interpretation = 'any', manage_option = 'mean')
ser_ranks_wnd_any = hh_rolling_percentile(df_selected_data['iv_us'], 30, 60, min_interpretation = 'any', manage_option = 'mean')
ser_ranks_less_notNaN = hh_rolling_percentile(df_selected_data['iv_eu'], 252, 252000, manage_option = 'less')
ser_ranks_less_equal_notNaN = hh_rolling_percentile(df_selected_data['iv_eu'], 252, 252000, manage_option = 'less_equal')
ser_ranks_mean_notNaN = hh_rolling_percentile(df_selected_data['iv_eu'], 252, 252000, manage_option = 'mean')
ser_ranks_wnd_notNaN = hh_rolling_percentile(df_selected_data['iv_us'], 30, 60, manage_option = 'mean')

In [None]:
# Generating xlsx file for comparing MatLab and Python hh_rolling_percentile functionality
xls_writer = pd.ExcelWriter('Data_Files/Test_Files/python_pctiles.xlsx')
ser_ranks_less_any.to_excel(xls_writer, 'Less any')
ser_ranks_less_equal_any.to_excel(xls_writer, 'Less_Equal any')
ser_ranks_mean_any.to_excel(xls_writer, 'Mean any')
ser_ranks_wnd_any.to_excel(xls_writer, 'Short_Window any')
ser_ranks_less_notNaN.to_excel(xls_writer, 'Less not NaN')
ser_ranks_less_equal_notNaN.to_excel(xls_writer, 'Less_Equal not NaN')
ser_ranks_mean_notNaN.to_excel(xls_writer, 'Mean not NaN')
ser_ranks_wnd_notNaN.to_excel(xls_writer, 'Short_Window not NaN')
xls_writer.save()

In [None]:
# Example of using hh_ts function hh_rolling_simple_MA
ser_SMA_long_any = hh_rolling_simple_MA(np.log(df_selected_data['iv_eu']), 252, 252000, min_interpretation = 'any')
ser_SMA_short_any = hh_rolling_simple_MA(np.log(df_selected_data['iv_us']), 30, 60, min_interpretation = 'any', factor_period = 'day')
ser_SMA_long_notNaN = hh_rolling_simple_MA(np.log(df_selected_data['iv_eu']), 252, 252000)
ser_SMA_short_notNaN = hh_rolling_simple_MA(np.log(df_selected_data['iv_us']), 30, 60, factor_period = 'day')

In [None]:
# Generating xlsx file for comparing MatLab and Python hh_rolling_simple_MA functionality
xls_writer = pd.ExcelWriter('Data_Files/Test_Files/python_SMA.xlsx')
ser_SMA_long_any.to_excel(xls_writer, 'SMA long any')
ser_SMA_short_any.to_excel(xls_writer, 'SMA short any')
ser_SMA_long_notNaN.to_excel(xls_writer, 'SMA long not NaN')
ser_SMA_short_notNaN.to_excel(xls_writer, 'SMA short not NaN')
xls_writer.save()

In [None]:
# Example of using hh_ts function hh_rolling_z_score
[df_z_score, df_z_matrix] = hh_rolling_z_score(np.log(df_selected_data['iv_eu']), min_wnd = 252, max_wnd = 25200, winsor_option = 'value', winsor_bottom = -5, winsor_top = 5, fill_option = 'backfill')

In [None]:
# Generating xlsx file for comparing MatLab and Python hh_rolling_z_score functionality
xls_writer = pd.ExcelWriter('Data_Files/Test_Files/python_Z_score.xlsx')
df_z_score.to_excel(xls_writer, 'Z Score series')
df_z_matrix.iloc[[0, 251, 252, 253, 501, 502, 503, 504, 5281, 5282], :].to_excel(xls_writer, 'Z Matrix')
xls_writer.save()

In [None]:
###################################################################################################################################

In [None]:
roll_window_min = 252 # One business year
roll_window_max = 252 * 100 # Hundred business years
winsor_bound_bottom = -5
winsor_bound_top = 5

In [None]:
df_test = hh_standartize_mri_data(df_model_asset, df_selected_data, standart_date, roll_window_min, roll_window_max, winsor_bound_bottom, winsor_bound_top)