In [1]:
### Importing standard modules and date-special modules:
import numpy as np
import pandas as pd

In [2]:
### EXTRACTING UNIVERSE DATA FROM GENERAL MS EXCEL SOURCE
def get_market_membership_from_excel():
    ### Importing standard modules and date-special modules:
    import numpy as np
    import pandas as pd
    ### Defining constants:
    path_msci_data = 'Data_Files/Source_Files/sample_data_gaps.xlsx'
    tab_monthly = 'monthly_data'    
    arr_markets_needed = ['DM', 'FM', 'EM']   
    dict_markets = {50 : 'DM', 57 : 'EM', 504 : 'FM'}
    ### Extracting universe data:
    df_universe = pd.read_excel(io = path_msci_data, sheet_name = tab_monthly, skiprows = [0, 2], header = 0,
                                na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                             '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null'])
    df_universe = df_universe.loc[:, ['dates', 'region', 'ctry']]
    df_universe.columns = ['Date', 'Market', 'Code']
    df_universe.set_index(['Code', 'Date'], inplace = True)
    ser_universe = df_universe.squeeze()
    ser_universe.sort_index(level = [0, 1], inplace = True)
    ser_universe.replace(dict_markets, inplace = True)
    ser_market_membership = ser_universe[ser_universe.isin(arr_markets_needed)]
    
    return ser_market_membership

### EXTRACTING RETURNS DATA FROM GENERAL MS EXCEL SOURCE
def get_universe_returns_from_excel():
    ### Importing standard modules and date-special modules:
    import numpy as np
    import pandas as pd    
    from datetime import date
    ### Constants declaring:
    path_msci_data = 'Data_Files/Source_Files/sample_data_gaps.xlsx'
    tab_daily = 'daily_returns'
    date_first = date(1992, 1, 1)
    date_last = date(2018, 12, 31)
    index_dates = pd.date_range(date_first, date_last, freq = 'B')    
    ### Extracting returns data:
    df_returns = pd.read_excel(io = path_msci_data, sheet_name = tab_daily, skiprows = [0, 2], header = 0,
                               na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                            '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null'])
    df_returns = df_returns.loc[:, ['dates', 'ctry', 'retusd', 'retloc']]
    df_returns.columns = ['Date', 'Code', 'Ret_USD', 'Ret_LOC']
    df_returns.set_index(['Code', 'Date'], inplace = True)
    df_returns.sort_index(level = [0, 1], inplace = True)
    ser_realized_ret_USD = df_returns['Ret_USD'].copy()
    ser_realized_ret_LOC = df_returns['Ret_LOC'].copy()
    ### Reindexation and forward filling procedure for USD returns:
    dict_realized_ret_USD = {}
    dict_index_ret_USD = {}
    ser_ret_index_USD = pd.Series(np.NaN, index = ser_realized_ret_USD.index)
    for iter_country in ser_realized_ret_USD.index.get_level_values(0).unique():
        ser_ret_index_USD[iter_country] = (1 + ser_realized_ret_USD[iter_country]).cumprod()
        ser_ret_index_USD[iter_country].iloc[0] = 1
        ser_ret_index_USD_iter = ser_ret_index_USD[iter_country].reindex(index_dates, method = 'ffill')
        ser_ret_index_USD_iter.fillna(method = 'ffill', inplace = True)  
        dict_index_ret_USD[iter_country] = ser_ret_index_USD_iter
        ser_realized_ret_USD_iter = (ser_ret_index_USD_iter / ser_ret_index_USD_iter.shift(1) - 1)
        dict_realized_ret_USD[iter_country] = ser_realized_ret_USD_iter
    ser_realized_ret_USD = pd.concat(dict_realized_ret_USD)  
    ser_realized_ret_USD.index.names = ['Code', 'Date']
    ser_realized_ret_USD.sort_index(level = [0, 1], inplace = True)
    ser_index_ret_USD = pd.concat(dict_index_ret_USD)  
    ser_index_ret_USD.index.names = ['Code', 'Date']
    ser_index_ret_USD.sort_index(level = [0, 1], inplace = True)    
    ### Reindexation and forward filling procedure for LOC returns:
    dict_realized_ret_LOC = {}
    dict_index_ret_LOC = {}    
    ser_ret_index_LOC = pd.Series(np.NaN, index = ser_realized_ret_LOC.index)
    for iter_country in ser_realized_ret_LOC.index.get_level_values(0).unique():
        ser_ret_index_LOC[iter_country] = (1 + ser_realized_ret_LOC[iter_country]).cumprod()
        ser_ret_index_LOC[iter_country].iloc[0] = 1   
        ser_ret_index_LOC_iter = ser_ret_index_LOC[iter_country].reindex(index_dates, method = 'ffill')
        ser_ret_index_LOC_iter.fillna(method = 'ffill', inplace = True)
        dict_index_ret_LOC[iter_country] = ser_ret_index_LOC_iter        
        ser_realized_ret_LOC_iter = (ser_ret_index_LOC_iter / ser_ret_index_LOC_iter.shift(1) - 1)   
        dict_realized_ret_LOC[iter_country] = ser_realized_ret_LOC_iter
    ser_realized_ret_LOC = pd.concat(dict_realized_ret_LOC)    
    ser_realized_ret_LOC.index.names = ['Code', 'Date']
    ser_realized_ret_LOC.sort_index(level = [0, 1], inplace = True)
    ser_index_ret_LOC = pd.concat(dict_index_ret_LOC)  
    ser_index_ret_LOC.index.names = ['Code', 'Date']
    ser_index_ret_LOC.sort_index(level = [0, 1], inplace = True)  
    
    return [ser_realized_ret_USD, ser_realized_ret_LOC, ser_index_ret_USD, ser_index_ret_LOC]

### EXTRACTING IMPLIED VOLATILITY DATA AND VRP FACTOR DATA FROM GENERAL MS EXCEL SOURCE
def get_universe_ivol_from_excel():
    ### Importing standard modules and date-special modules:
    import numpy as np
    import pandas as pd    
    from datetime import date    
    ### Constants declaring:
    path_msci_data = 'Data_Files/Source_Files/sample_data_gaps.xlsx'
    tab_ivol = 'ivol_data'
    date_first = date(1992, 1, 1)
    date_last = date(2018, 12, 31)
    index_dates = pd.date_range(date_first, date_last, freq = 'B')    
    ### Extracting ivol data:
    df_ivol = pd.read_excel(io = path_msci_data, sheet_name = tab_ivol, skiprows = [0, 2], header = 0,
                            na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                         '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null'])
    df_ivol = df_ivol.loc[:, ['dates', 'ctry', 'ivol3m', 'vrp3m']]
    df_ivol.columns = ['Date', 'Code', 'IVol_3m', 'VRP_3m']
    df_ivol.set_index(['Code', 'Date'], inplace = True)
    df_ivol.sort_index(level = [0, 1], inplace = True)
    ser_ivol3m = df_ivol['IVol_3m']
    ser_vrp3m = df_ivol['VRP_3m']    
    ### Reindexation and forward filling procedure for implied volatlity variables:
    dict_ivol3m = {}
    for iter_country in ser_ivol3m.index.get_level_values(0).unique():  
        ser_ivol_iter = ser_ivol3m[iter_country].reindex(index_dates, method = 'ffill')
        ser_ivol_iter.fillna(method = 'ffill', inplace = True)
        dict_ivol3m[iter_country] = ser_ivol_iter
    ser_ivol3m = pd.concat(dict_ivol3m)    
    ser_ivol3m.index.names = ['Code', 'Date']
    ser_ivol3m.sort_index(level = [0, 1], inplace = True)
    dict_vrp3m = {}
    for iter_country in ser_vrp3m.index.get_level_values(0).unique():    
        ser_vrp_iter = ser_vrp3m[iter_country].reindex(index_dates, method = 'ffill')
        ser_vrp_iter.fillna(method = 'ffill', inplace = True)
        dict_vrp3m[iter_country] = ser_vrp_iter
    ser_vrp3m = pd.concat(dict_vrp3m)    
    ser_vrp3m.index.names = ['Code', 'Date']
    ser_vrp3m.sort_index(level = [0, 1], inplace = True)
    
    return [ser_ivol3m, ser_vrp3m]

### EXTRACTING MRI INDEX FROM HDF5 SOURCE
def get_gri_from_hdf():
    ### Importing standard modules and date-special modules:
    import numpy as np
    import pandas as pd    
    ### Constants declaring:
    path_gri_index_hdf = 'Data_Files/Source_Files/gri_released_index.h5'
    gri_vector_key = 'gri_vector_key'
    gri_start_key = 'gri_start_key'  
    gri_level_perc_key = 'gri_level_perc_key' ### ADDED FOR BETA FACTORS CALCULATION
    gri_momentum_perc_key = 'gri_momentum_perc_key' ### ADDED FOR BETA FACTORS CALCULATION    
    ### Extracting GRI:
    ser_gri_released = pd.read_hdf(path_gri_index_hdf, gri_vector_key)
    ser_gri_start_col = pd.read_hdf(path_gri_index_hdf, gri_start_key)
    ser_gri_level_perc = pd.read_hdf(path_gri_index_hdf, gri_level_perc_key) ### ADDED FOR BETA FACTORS CALCULATION
    ser_gri_momentum_perc = pd.read_hdf(path_gri_index_hdf, gri_momentum_perc_key) ### ADDED FOR BETA FACTORS CALCULATION
    return [ser_gri_released, ser_gri_start_col, ser_gri_level_perc, ser_gri_momentum_perc] ### CHANGED FOR BETA FACTORS CALCULATION

### EXTRACTING SOURCE DATA FROM MS EXCEL FILES AND SAVING TO HDF FILES
import pandas as pd
import numpy as np
### Extracting data from xlsx files
ser_market_membership = get_market_membership_from_excel()
[ser_realized_ret_USD, ser_realized_ret_LOC, ser_index_ret_USD, ser_index_ret_LOC] = get_universe_returns_from_excel()
[ser_ivol3m, ser_vrp3m] = get_universe_ivol_from_excel()
[ser_gri_released, ser_gri_start_col, ser_gri_level_perc, ser_gri_momentum_perc] = get_gri_from_hdf() ### CHANGED FOR BETA FACTORS CALCULATION
ser_gri_released.name = 'GRI'
ser_gri_start_col.name = 'GRI'
### Declaring constants:
path_market_risk_source_hdf = 'Data_Files/Source_Files/market_risk_source.h5'
market_membership_key = 'market_membership_key'
realized_ret_USD_key = 'realized_ret_USD_key'
realized_ret_LOC_key = 'realized_ret_LOC_key'
index_ret_USD_key = 'index_ret_USD_key'
index_ret_LOC_key = 'index_ret_LOC_key'
ivol3m_key = 'ivol3m_key'
vrp3m_key = 'vrp3m_key'
gri_released_key = 'gri_released_key'
gri_start_key = 'gri_start_key'
gri_level_perc_key = 'gri_level_perc_key' ### ADDED FOR BETA FACTORS CALCULATION
gri_momentum_perc_key = 'gri_momentum_perc_key' ### ADDED FOR BETA FACTORS CALCULATION
### close files that have been previously opened
import tables
tables.file._open_files.close_all()
### Saving data to hdf5 fixed formatted files:
ser_market_membership.to_hdf(path_market_risk_source_hdf, market_membership_key, mode = 'w', format = 'fixed')
ser_realized_ret_USD.to_hdf(path_market_risk_source_hdf, realized_ret_USD_key, mode = 'a', format = 'fixed')
ser_realized_ret_LOC.to_hdf(path_market_risk_source_hdf, realized_ret_LOC_key, mode = 'a', format = 'fixed')
ser_index_ret_USD.to_hdf(path_market_risk_source_hdf, index_ret_USD_key, mode = 'a', format = 'fixed')
ser_index_ret_LOC.to_hdf(path_market_risk_source_hdf, index_ret_LOC_key, mode = 'a', format = 'fixed')
ser_ivol3m.to_hdf(path_market_risk_source_hdf, ivol3m_key, mode = 'a', format = 'fixed')
ser_vrp3m.to_hdf(path_market_risk_source_hdf, vrp3m_key, mode = 'a', format = 'fixed')
ser_gri_released.to_hdf(path_market_risk_source_hdf, gri_released_key, mode = 'a', format = 'fixed')
ser_gri_start_col.to_hdf(path_market_risk_source_hdf, gri_start_key, mode = 'a', format = 'fixed')
ser_gri_level_perc.to_hdf(path_market_risk_source_hdf, gri_level_perc_key, mode = 'a', format = 'fixed') ### ADDED FOR BETA FACTORS CALCULATION
ser_gri_momentum_perc.to_hdf(path_market_risk_source_hdf, gri_momentum_perc_key, mode = 'a', format = 'fixed') ### ADDED FOR BETA FACTORS CALCULATION

In [2]:
### EXTRACTING PREVIOUSLY SAVED DATA FROM HDF5 FILE
### Declaring constants:
path_market_risk_source_hdf = 'Data_Files/Source_Files/market_risk_source.h5'
market_membership_key = 'market_membership_key'
realized_ret_USD_key = 'realized_ret_USD_key'
realized_ret_LOC_key = 'realized_ret_LOC_key'
index_ret_USD_key = 'index_ret_USD_key'
index_ret_LOC_key = 'index_ret_LOC_key'
ivol3m_key = 'ivol3m_key'
vrp3m_key = 'vrp3m_key'
path_gri_index_hdf = 'Data_Files/Source_Files/gri_released_index.h5'
gri_released_key = 'gri_released_key'
gri_start_key = 'gri_start_key'
gri_level_perc_key = 'gri_level_perc_key' ### ADDED FOR BETA FACTORS CALCULATION
gri_momentum_perc_key = 'gri_momentum_perc_key' ### ADDED FOR BETA FACTORS CALCULATION
### Exporting data from hdf5 fixed formatted files:
ser_market_membership = pd.read_hdf(path_market_risk_source_hdf, market_membership_key)
ser_realized_ret_USD = pd.read_hdf(path_market_risk_source_hdf, realized_ret_USD_key)
ser_realized_ret_LOC = pd.read_hdf(path_market_risk_source_hdf, realized_ret_LOC_key)
ser_index_ret_USD = pd.read_hdf(path_market_risk_source_hdf, index_ret_USD_key)
ser_index_ret_LOC = pd.read_hdf(path_market_risk_source_hdf, index_ret_LOC_key)
ser_ivol3m = pd.read_hdf(path_market_risk_source_hdf, ivol3m_key)
ser_vrp3m = pd.read_hdf(path_market_risk_source_hdf, vrp3m_key)
ser_gri_released = pd.read_hdf(path_market_risk_source_hdf, gri_released_key)
ser_gri_start_col = pd.read_hdf(path_market_risk_source_hdf, gri_start_key)
ser_gri_level_perc = pd.read_hdf(path_market_risk_source_hdf, gri_level_perc_key) ### ADDED FOR BETA FACTORS CALCULATION
ser_gri_momentum_perc = pd.read_hdf(path_market_risk_source_hdf, gri_momentum_perc_key) ### ADDED FOR BETA FACTORS CALCULATION

In [3]:
### EXTRACTING UNIVERSE DATA FOR A PARTICULAR DATE
def get_date_membership(iter_date):
    ### Preparing data for universe filtering:
    if (pd.to_datetime(iter_date) == pd.to_datetime(iter_date - pd.offsets.BusinessMonthEnd(0))):
        iter_month_end = iter_date
    else: 
        iter_month_end = pd.to_datetime(iter_date - pd.offsets.BusinessMonthEnd(1))    
    ### Filtering and replacing iter_date to end-of-month date:
    ser_iter_membership = ser_market_membership.loc[:, iter_month_end]
    ser_iter_membership = ser_iter_membership.to_frame().assign(Date = iter_month_end).set_index('Date', append = True).squeeze()
    
    return ser_iter_membership

In [4]:
### DEFINING ASYMMETRY SERIES BUILDER
def get_asymmetry_series(ser_market_membership, ser_returns):
    ### Defining asymmetry calculator:
    def get_asymmetry_value(ser_iter_returns):
        ### Constants declaring (common for all factors):  
        num_year_work_days = 260        
        ### Skewness calculating:
        asymmetry_result = np.NaN
        if (ser_iter_returns.count() > num_year_work_days // 2):
            ser_iter_returns = ser_iter_returns.dropna().iloc[- num_year_work_days * 2 : ] 
            ser_iter_returns = ser_iter_returns - ser_iter_returns.mean()                  
            asymmetry_result = (ser_iter_returns[ser_iter_returns > ser_iter_returns.std()].count() - \
                                ser_iter_returns[ser_iter_returns < -ser_iter_returns.std()].count()) \
                               / ser_iter_returns.count()
        ### Results output:
        return asymmetry_result
    ### Defining constants:
    iter_date = ser_market_membership.index.get_level_values(1)[0]
    ### Main loop performing:
    ser_member_returns = ser_returns.loc[ser_market_membership.index.get_level_values(0),  :]   
    ser_asymmetry = ser_member_returns.groupby('Code').apply(get_asymmetry_value)
    ser_asymmetry = ser_asymmetry.reindex(ser_market_membership.index.get_level_values(0))
    ser_asymmetry = ser_asymmetry.to_frame().assign(Date = iter_date).set_index('Date', append = True).squeeze()
    ### Results output:
    return ser_asymmetry

In [5]:
### GET ASYMMETRY TAIL FACTOR
def get_asymmetry_tail_factor(iter_date): 
    ### Constants declaring (common for all factors):  
    num_year_work_days = 260
    window_years = 5       
    ### Data preparing (common for all factors):
    index_iter_date = pd.date_range(end = iter_date, periods = num_year_work_days * window_years, freq = 'B')
    ser_iter_membership = get_date_membership(iter_date)
    ser_iter_returns = ser_realized_ret_LOC.loc[:, index_iter_date]
    ### Removing effectively missing return observations
    ser_iter_returns.replace(0, np.nan, inplace = True)    
    ### Factor calculation:
    ser_iter_factor = - get_asymmetry_series(ser_iter_membership, ser_iter_returns) 
    ### Results output:
    return ser_iter_factor

In [6]:
### LOOPER FOR ASYMMETRY TAIL FACTOR
#date_range_test = pd.date_range(start = '2010-10-25', periods = 6)
#date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()
date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()[155 : 156]
arr_asymmetry_tail_factor = []
iter_counter = 0
for iter_date in date_range_test:
    iter_counter = iter_counter + 1
    arr_asymmetry_tail_factor.append(get_asymmetry_tail_factor(iter_date))
    if ((iter_counter // 10) == (iter_counter / 10)):
        print('Progress printout', iter_counter, '/', iter_date)
        
ser_asymmetry_tail_factor = pd.concat(arr_asymmetry_tail_factor)

In [38]:
### ASYMMETRY TAIL FACTOR TESTING:
print('ser_asymmetry_tail_factor - AR 29-Dec-2006:', ser_asymmetry_tail_factor.loc['AR' , '2006-12-29'])
print('ser_asymmetry_tail_factor - US 29-Dec-2006:', ser_asymmetry_tail_factor.loc['US' , '2006-12-29'])
ser_asymmetry_tail_factor_mean = pd.Series(np.NaN, index = ser_asymmetry_tail_factor.index.get_level_values(1).unique())
for iter_date in ser_asymmetry_tail_factor_mean.index:  
    ser_asymmetry_tail_factor_mean[iter_date] = ser_asymmetry_tail_factor.loc[:, iter_date].mean()
ser_asymmetry_tail_factor_mean.sort_index(inplace = True)
print('ser_asymmetry_tail_factor - cross-sectional mean min:', ser_asymmetry_tail_factor_mean.min())
print('ser_asymmetry_tail_factor - cross-sectional mean mean:', ser_asymmetry_tail_factor_mean.mean())
print('ser_asymmetry_tail_factor - cross-sectional mean max:', ser_asymmetry_tail_factor_mean.max())
print('ser_asymmetry_tail_factor - cross-sectional mean stdev:', ser_asymmetry_tail_factor_mean.std())
print('ser_asymmetry_tail_factor - cross-sectional mean mean:', ser_asymmetry_tail_factor_mean.count())

ser_asymmetry_tail_factor - AR 29-Dec-2006: -0.0
ser_asymmetry_tail_factor - US 29-Dec-2006: 0.019230769230769232
ser_asymmetry_tail_factor - cross-sectional mean min: -0.005933971323553761
ser_asymmetry_tail_factor - cross-sectional mean mean: 0.00047159164615741777
ser_asymmetry_tail_factor - cross-sectional mean max: 0.01357023341150325
ser_asymmetry_tail_factor - cross-sectional mean stdev: 0.00354223282922243
ser_asymmetry_tail_factor - cross-sectional mean mean: 234
