In [1]:
### Importing standard modules and date-special modules:
import numpy as np
import pandas as pd

In [1]:
### EXTRACTING UNIVERSE DATA FROM GENERAL MS EXCEL SOURCE
def get_market_membership_from_excel():
    ### Importing standard modules and date-special modules:
    import numpy as np
    import pandas as pd
    ### Defining constants:
    path_msci_data = 'Data_Files/Source_Files/sample_data_gaps.xlsx'
    tab_monthly = 'monthly_data'    
    arr_markets_needed = ['DM', 'FM', 'EM']   
    dict_markets = {50 : 'DM', 57 : 'EM', 504 : 'FM'}
    ### Extracting universe data:
    df_universe = pd.read_excel(io = path_msci_data, sheet_name = tab_monthly, skiprows = [0, 2], header = 0,
                                na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                             '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null'])
    df_universe = df_universe.loc[:, ['dates', 'region', 'ctry']]
    df_universe.columns = ['Date', 'Market', 'Code']
    df_universe.set_index(['Code', 'Date'], inplace = True)
    ser_universe = df_universe.squeeze()
    ser_universe.sort_index(level = [0, 1], inplace = True)
    ser_universe.replace(dict_markets, inplace = True)
    ser_market_membership = ser_universe[ser_universe.isin(arr_markets_needed)]
    
    return ser_market_membership

### EXTRACTING RETURNS DATA FROM GENERAL MS EXCEL SOURCE
def get_universe_returns_from_excel():
    ### Importing standard modules and date-special modules:
    import numpy as np
    import pandas as pd    
    from datetime import date
    ### Constants declaring:
    path_msci_data = 'Data_Files/Source_Files/sample_data_gaps.xlsx'
    tab_daily = 'daily_returns'
    date_first = date(1992, 1, 1)
    date_last = date(2018, 12, 31)
    index_dates = pd.date_range(date_first, date_last, freq = 'B')    
    ### Extracting returns data:
    df_returns = pd.read_excel(io = path_msci_data, sheet_name = tab_daily, skiprows = [0, 2], header = 0,
                               na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                            '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null'])
    df_returns = df_returns.loc[:, ['dates', 'ctry', 'retusd', 'retloc']]
    df_returns.columns = ['Date', 'Code', 'Ret_USD', 'Ret_LOC']
    df_returns.set_index(['Code', 'Date'], inplace = True)
    df_returns.sort_index(level = [0, 1], inplace = True)
    ser_realized_ret_USD = df_returns['Ret_USD'].copy()
    ser_realized_ret_LOC = df_returns['Ret_LOC'].copy()
    ### Reindexation and forward filling procedure for USD returns:
    dict_realized_ret_USD = {}
    dict_index_ret_USD = {}
    ser_ret_index_USD = pd.Series(np.NaN, index = ser_realized_ret_USD.index)
    for iter_country in ser_realized_ret_USD.index.get_level_values(0).unique():
        ser_ret_index_USD[iter_country] = (1 + ser_realized_ret_USD[iter_country]).cumprod()
        ser_ret_index_USD[iter_country].iloc[0] = 1
        ser_ret_index_USD_iter = ser_ret_index_USD[iter_country].reindex(index_dates, method = 'ffill')
        ser_ret_index_USD_iter.fillna(method = 'ffill', inplace = True)  
        dict_index_ret_USD[iter_country] = ser_ret_index_USD_iter
        ser_realized_ret_USD_iter = (ser_ret_index_USD_iter / ser_ret_index_USD_iter.shift(1) - 1)
        dict_realized_ret_USD[iter_country] = ser_realized_ret_USD_iter
    ser_realized_ret_USD = pd.concat(dict_realized_ret_USD)  
    ser_realized_ret_USD.index.names = ['Code', 'Date']
    ser_realized_ret_USD.sort_index(level = [0, 1], inplace = True)
    ser_index_ret_USD = pd.concat(dict_index_ret_USD)  
    ser_index_ret_USD.index.names = ['Code', 'Date']
    ser_index_ret_USD.sort_index(level = [0, 1], inplace = True)    
    ### Reindexation and forward filling procedure for LOC returns:
    dict_realized_ret_LOC = {}
    dict_index_ret_LOC = {}    
    ser_ret_index_LOC = pd.Series(np.NaN, index = ser_realized_ret_LOC.index)
    for iter_country in ser_realized_ret_LOC.index.get_level_values(0).unique():
        ser_ret_index_LOC[iter_country] = (1 + ser_realized_ret_LOC[iter_country]).cumprod()
        ser_ret_index_LOC[iter_country].iloc[0] = 1   
        ser_ret_index_LOC_iter = ser_ret_index_LOC[iter_country].reindex(index_dates, method = 'ffill')
        ser_ret_index_LOC_iter.fillna(method = 'ffill', inplace = True)
        dict_index_ret_LOC[iter_country] = ser_ret_index_LOC_iter        
        ser_realized_ret_LOC_iter = (ser_ret_index_LOC_iter / ser_ret_index_LOC_iter.shift(1) - 1)   
        dict_realized_ret_LOC[iter_country] = ser_realized_ret_LOC_iter
    ser_realized_ret_LOC = pd.concat(dict_realized_ret_LOC)    
    ser_realized_ret_LOC.index.names = ['Code', 'Date']
    ser_realized_ret_LOC.sort_index(level = [0, 1], inplace = True)
    ser_index_ret_LOC = pd.concat(dict_index_ret_LOC)  
    ser_index_ret_LOC.index.names = ['Code', 'Date']
    ser_index_ret_LOC.sort_index(level = [0, 1], inplace = True)  
    
    return [ser_realized_ret_USD, ser_realized_ret_LOC, ser_index_ret_USD, ser_index_ret_LOC]

### EXTRACTING IMPLIED VOLATILITY DATA AND VRP FACTOR DATA FROM GENERAL MS EXCEL SOURCE
def get_universe_ivol_from_excel():
    ### Importing standard modules and date-special modules:
    import numpy as np
    import pandas as pd    
    from datetime import date    
    ### Constants declaring:
    path_msci_data = 'Data_Files/Source_Files/sample_data_gaps.xlsx'
    tab_ivol = 'ivol_data'
    date_first = date(1992, 1, 1)
    date_last = date(2018, 12, 31)
    index_dates = pd.date_range(date_first, date_last, freq = 'B')    
    ### Extracting ivol data:
    df_ivol = pd.read_excel(io = path_msci_data, sheet_name = tab_ivol, skiprows = [0, 2], header = 0,
                            na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                         '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null'])
    df_ivol = df_ivol.loc[:, ['dates', 'ctry', 'ivol3m', 'vrp3m']]
    df_ivol.columns = ['Date', 'Code', 'IVol_3m', 'VRP_3m']
    df_ivol.set_index(['Code', 'Date'], inplace = True)
    df_ivol.sort_index(level = [0, 1], inplace = True)
    ser_ivol3m = df_ivol['IVol_3m']
    ser_vrp3m = df_ivol['VRP_3m']    
    ### Reindexation and forward filling procedure for implied volatlity variables:
    dict_ivol3m = {}
    for iter_country in ser_ivol3m.index.get_level_values(0).unique():  
        ser_ivol_iter = ser_ivol3m[iter_country].reindex(index_dates, method = 'ffill')
        ser_ivol_iter.fillna(method = 'ffill', inplace = True)
        dict_ivol3m[iter_country] = ser_ivol_iter
    ser_ivol3m = pd.concat(dict_ivol3m)    
    ser_ivol3m.index.names = ['Code', 'Date']
    ser_ivol3m.sort_index(level = [0, 1], inplace = True)
    dict_vrp3m = {}
    for iter_country in ser_vrp3m.index.get_level_values(0).unique():    
        ser_vrp_iter = ser_vrp3m[iter_country].reindex(index_dates, method = 'ffill')
        ser_vrp_iter.fillna(method = 'ffill', inplace = True)
        dict_vrp3m[iter_country] = ser_vrp_iter
    ser_vrp3m = pd.concat(dict_vrp3m)    
    ser_vrp3m.index.names = ['Code', 'Date']
    ser_vrp3m.sort_index(level = [0, 1], inplace = True)
    
    return [ser_ivol3m, ser_vrp3m]

### EXTRACTING MRI INDEX FROM HDF5 SOURCE
def get_gri_from_hdf():
    ### Importing standard modules and date-special modules:
    import numpy as np
    import pandas as pd    
    ### Constants declaring:
    path_gri_index_hdf = 'Data_Files/Source_Files/gri_released_index.h5'
    gri_vector_key = 'gri_vector_key'
    gri_start_key = 'gri_start_key'  
    gri_level_perc_key = 'gri_level_perc_key' ### ADDED FOR BETA FACTORS CALCULATION
    gri_momentum_perc_key = 'gri_momentum_perc_key' ### ADDED FOR BETA FACTORS CALCULATION    
    ### Extracting GRI:
    ser_gri_released = pd.read_hdf(path_gri_index_hdf, gri_vector_key)
    ser_gri_start_col = pd.read_hdf(path_gri_index_hdf, gri_start_key)
    ser_gri_level_perc = pd.read_hdf(path_gri_index_hdf, gri_level_perc_key) ### ADDED FOR BETA FACTORS CALCULATION
    ser_gri_momentum_perc = pd.read_hdf(path_gri_index_hdf, gri_momentum_perc_key) ### ADDED FOR BETA FACTORS CALCULATION
    return [ser_gri_released, ser_gri_start_col, ser_gri_level_perc, ser_gri_momentum_perc] ### CHANGED FOR BETA FACTORS CALCULATION

### EXTRACTING SOURCE DATA FROM MS EXCEL FILES AND SAVING TO HDF FILES
import pandas as pd
import numpy as np
### Extracting data from xlsx files
ser_market_membership = get_market_membership_from_excel()
[ser_realized_ret_USD, ser_realized_ret_LOC, ser_index_ret_USD, ser_index_ret_LOC] = get_universe_returns_from_excel()
[ser_ivol3m, ser_vrp3m] = get_universe_ivol_from_excel()
[ser_gri_released, ser_gri_start_col, ser_gri_level_perc, ser_gri_momentum_perc] = get_gri_from_hdf() ### CHANGED FOR BETA FACTORS CALCULATION
ser_gri_released.name = 'GRI'
ser_gri_start_col.name = 'GRI'
### Declaring constants:
path_market_risk_source_hdf = 'Data_Files/Source_Files/market_risk_source.h5'
market_membership_key = 'market_membership_key'
realized_ret_USD_key = 'realized_ret_USD_key'
realized_ret_LOC_key = 'realized_ret_LOC_key'
index_ret_USD_key = 'index_ret_USD_key'
index_ret_LOC_key = 'index_ret_LOC_key'
ivol3m_key = 'ivol3m_key'
vrp3m_key = 'vrp3m_key'
gri_released_key = 'gri_released_key'
gri_start_key = 'gri_start_key'
gri_level_perc_key = 'gri_level_perc_key' ### ADDED FOR BETA FACTORS CALCULATION
gri_momentum_perc_key = 'gri_momentum_perc_key' ### ADDED FOR BETA FACTORS CALCULATION
### close files that have been previously opened
import tables
tables.file._open_files.close_all()
### Saving data to hdf5 fixed formatted files:
ser_market_membership.to_hdf(path_market_risk_source_hdf, market_membership_key, mode = 'w', format = 'fixed')
ser_realized_ret_USD.to_hdf(path_market_risk_source_hdf, realized_ret_USD_key, mode = 'a', format = 'fixed')
ser_realized_ret_LOC.to_hdf(path_market_risk_source_hdf, realized_ret_LOC_key, mode = 'a', format = 'fixed')
ser_index_ret_USD.to_hdf(path_market_risk_source_hdf, index_ret_USD_key, mode = 'a', format = 'fixed')
ser_index_ret_LOC.to_hdf(path_market_risk_source_hdf, index_ret_LOC_key, mode = 'a', format = 'fixed')
ser_ivol3m.to_hdf(path_market_risk_source_hdf, ivol3m_key, mode = 'a', format = 'fixed')
ser_vrp3m.to_hdf(path_market_risk_source_hdf, vrp3m_key, mode = 'a', format = 'fixed')
ser_gri_released.to_hdf(path_market_risk_source_hdf, gri_released_key, mode = 'a', format = 'fixed')
ser_gri_start_col.to_hdf(path_market_risk_source_hdf, gri_start_key, mode = 'a', format = 'fixed')
ser_gri_level_perc.to_hdf(path_market_risk_source_hdf, gri_level_perc_key, mode = 'a', format = 'fixed') ### ADDED FOR BETA FACTORS CALCULATION
ser_gri_momentum_perc.to_hdf(path_market_risk_source_hdf, gri_momentum_perc_key, mode = 'a', format = 'fixed') ### ADDED FOR BETA FACTORS CALCULATION

In [2]:
### EXTRACTING PREVIOUSLY SAVED DATA FROM HDF5 FILE
### Declaring constants:
path_market_risk_source_hdf = 'Data_Files/Source_Files/market_risk_source.h5'
market_membership_key = 'market_membership_key'
realized_ret_USD_key = 'realized_ret_USD_key'
realized_ret_LOC_key = 'realized_ret_LOC_key'
index_ret_USD_key = 'index_ret_USD_key'
index_ret_LOC_key = 'index_ret_LOC_key'
ivol3m_key = 'ivol3m_key'
vrp3m_key = 'vrp3m_key'
path_gri_index_hdf = 'Data_Files/Source_Files/gri_released_index.h5'
gri_released_key = 'gri_released_key'
gri_start_key = 'gri_start_key'
gri_level_perc_key = 'gri_level_perc_key' ### ADDED FOR BETA FACTORS CALCULATION
gri_momentum_perc_key = 'gri_momentum_perc_key' ### ADDED FOR BETA FACTORS CALCULATION
### Exporting data from hdf5 fixed formatted files:
ser_market_membership = pd.read_hdf(path_market_risk_source_hdf, market_membership_key)
ser_realized_ret_USD = pd.read_hdf(path_market_risk_source_hdf, realized_ret_USD_key)
ser_realized_ret_LOC = pd.read_hdf(path_market_risk_source_hdf, realized_ret_LOC_key)
ser_index_ret_USD = pd.read_hdf(path_market_risk_source_hdf, index_ret_USD_key)
ser_index_ret_LOC = pd.read_hdf(path_market_risk_source_hdf, index_ret_LOC_key)
ser_ivol3m = pd.read_hdf(path_market_risk_source_hdf, ivol3m_key)
ser_vrp3m = pd.read_hdf(path_market_risk_source_hdf, vrp3m_key)
ser_gri_released = pd.read_hdf(path_market_risk_source_hdf, gri_released_key)
ser_gri_start_col = pd.read_hdf(path_market_risk_source_hdf, gri_start_key)
ser_gri_level_perc = pd.read_hdf(path_market_risk_source_hdf, gri_level_perc_key) ### ADDED FOR BETA FACTORS CALCULATION
ser_gri_momentum_perc = pd.read_hdf(path_market_risk_source_hdf, gri_momentum_perc_key) ### ADDED FOR BETA FACTORS CALCULATION

In [10]:
### EXTRACTING UNIVERSE DATA FOR A PARTICULAR DATE
def get_date_membership(iter_date):
    import numpy as np
    import pandas as pd    
    ### Defining constants:    
    path_market_risk_source_hdf = 'Data_Files/Source_Files/market_risk_source.h5'
    market_membership_key = 'market_membership_key'  
    ### Preparing data for universe filtering:
    if (pd.to_datetime(iter_date) == pd.to_datetime(iter_date - pd.offsets.BusinessMonthEnd(0))):
        iter_month_end = iter_date
    else: 
        iter_month_end = pd.to_datetime(iter_date - pd.offsets.BusinessMonthEnd(1))    
#    ser_iter_membership = pd.read_hdf(path_market_risk_source_hdf, market_membership_key, where = 'Date = iter_month_end')
    ser_iter_membership = ser_market_membership.loc[:, iter_month_end : iter_month_end]
    ser_iter_membership.rename(index = {iter_month_end : iter_date}, inplace = True)
    
    return ser_iter_membership

In [11]:
### DEFINING EXPONENTIAL WEIGHTS GENERATOR
def get_exp_weights(window_years = 5, halflife_months = 3):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    import math     
    ### Defining constants:
    num_year_work_days = 260
    num_year_months = 12
    ### Array of regressioon window day numbers descending:
    arr_weight_days = np.arange(num_year_work_days * window_years, 0, -1) - 1
    ### Creating weights series:
    num_period_factor = math.exp(math.log(0.5) / round((num_year_work_days / num_year_months * halflife_months)))
    arr_weights = np.exp(math.log(num_period_factor) * arr_weight_days)
    ser_weights = pd.Series(arr_weights)        
    ser_weights.name = 'Weight'
    
    return ser_weights

In [12]:
### DEFINING WEIGHTS TO SERIES BINDER
def bind_exp_weights(ser_returns, weighting_kind = 'equal', window_years = 5, halflife_months = 3, ser_condition = pd.Series(np.NaN)):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    ### Creating weights series:
    if (weighting_kind == 'equal'):
        ser_weights = pd.Series(1, index = ser_returns.index)
    if (weighting_kind == 'expo'):       
        ser_weights = get_exp_weights(window_years, halflife_months)[- ser_returns.size : ]
        ser_weights.index = ser_returns.index
    if (weighting_kind == 'expo_cond'):
        ser_condition = abs(ser_condition - ser_condition.iloc[-1])
        ser_condition = ser_condition.sort_values(ascending = False)
        ser_weights = get_exp_weights(window_years, halflife_months)[- ser_returns.size : ]
        ser_weights = pd.Series(ser_weights.values, ser_condition.index)
        ser_weights.sort_index(inplace = True)
        ser_weights.name = 'Weight'
        
    return ser_weights

In [13]:
### DEFINING EXPONENTIAL VOLATILITY CALCULATOR
def get_expvol_value(ser_returns, ser_weights):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    ### Exponential volatility calculating:
    expvol_result = np.NaN
    ser_returns = ser_returns.dropna()
    index_rolling = ser_returns.index.intersection(ser_weights.index)           
    ### Exponential volatility calculating:
    expvol_y = ser_returns[index_rolling]
    expvol_w = ser_weights[index_rolling]             
    expvol_w = expvol_w / expvol_w.sum()
    expvol_result = np.sqrt(expvol_w.dot(expvol_y * expvol_y))
        
    return expvol_result

In [14]:
### DEFINING EXPONENTIAL VOLATILITY SERIES BUILDER
def get_expvol_series(ser_market_membership, ser_returns, weighting_kind = 'equal', window_years = 5, halflife_months = 3, ser_condition = pd.Series(np.NaN)):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    ### Defining constants:
    num_year_work_days = 260
    ### Flattening MSCI changes by logarythm
    ser_returns = np.log(1 + ser_returns)
    ser_condition.fillna(method = 'ffill', inplace = True)
    ### Main loop performing:
    ser_expvol = pd.Series(np.NaN, index = ser_market_membership.index)
    for iter_country in ser_market_membership.index.get_level_values(0).unique():        
        ### Extracting returns data vector for each country/date point:
        if (iter_country in ser_returns.index.get_level_values(0).unique()):
            for iter_date in ser_market_membership[iter_country].index.get_level_values(0).unique():
                ser_iter_returns = ser_returns[iter_country].loc[iter_date - pd.offsets.BusinessDay(num_year_work_days * window_years - 1) : iter_date]
                ser_iter_returns = ser_iter_returns - ser_iter_returns.mean()
                if (ser_iter_returns.size > 0):
                    if (ser_condition.count() > 0):
                        ser_iter_condition = ser_condition[ser_iter_returns.index]
                    else:
                        ser_iter_condition = pd.Series(np.NaN)                     
                    ser_iter_weights = bind_exp_weights(ser_iter_returns, weighting_kind, window_years, halflife_months, ser_iter_condition) ## CHANGES: ADDED       
                ser_iter_returns.dropna(inplace = True)
                if (ser_iter_returns.count() > num_year_work_days // 2):
                    expvol_result = get_expvol_value(ser_iter_returns, ser_iter_weights) * np.sqrt(num_year_work_days)
                    ser_expvol.loc[iter_country, iter_date] = expvol_result
                    
    return ser_expvol

In [15]:
### DEFINING BETA CALCULATOR
def get_beta_value(ser_returns, ser_weights, ser_factors):
    ### Importing standard and stats modules:    
    import numpy as np
    import pandas as pd
    import statsmodels.api as sm
    ### Index intersection:
    wls_result = np.NaN
    ser_returns = ser_returns.dropna()
    index_rolling = ser_returns.index.intersection(ser_factors.index)           
    ### Regression performing:   
    wls_y = ser_returns[index_rolling].values
    wls_x = ser_factors[index_rolling].values
    wls_x = sm.add_constant(wls_x)
    wls_w = ser_weights[index_rolling].values        
    wls_model = sm.WLS(wls_y, wls_x, weights = wls_w)            
    wls_regression = wls_model.fit()
    wls_result = wls_regression.params[1] 
    
    return wls_result

In [16]:
### DEFINING BETA SERIES BUILDER
def get_beta_series(ser_market_membership, ser_returns, weighting_kind = 'equal', window_years = 5, halflife_months = 3, ser_factors = pd.Series(np.NaN)):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    ### Defining constants:
    num_year_work_days = 260
    num_month_work_days = 21
    ### Main loop performing:
    ser_beta = pd.Series(np.NaN, index = ser_market_membership.index)
    for iter_country in ser_market_membership.index.get_level_values(0).unique():        
        ### Extracting returns data vector for each country/date point:
        if (iter_country in ser_returns.index.get_level_values(0).unique()):
            for iter_date in ser_market_membership[iter_country].index.get_level_values(0).unique():
                ser_iter_returns = ser_returns[iter_country].loc[iter_date - pd.offsets.BusinessDay(num_year_work_days * window_years - 1) : iter_date]
                if (ser_iter_returns.count() > num_year_work_days):
                    ser_iter_weights = bind_exp_weights(ser_iter_returns, weighting_kind, window_years, halflife_months)
                    ser_iter_returns = ser_iter_returns.dropna()
                    ser_iter_factors = ser_factors.loc[iter_date - pd.offsets.BusinessDay(num_year_work_days * window_years - 1) : iter_date].dropna()
                    index_iter = ser_iter_returns.index.intersection(ser_iter_factors.index)
                    ser_iter_returns = ser_iter_returns[index_iter]
                    ser_iter_factors = ser_iter_factors[index_iter]
                    ser_iter_weights = ser_iter_weights[index_iter]                    
                    if (ser_iter_returns.count() > num_year_work_days):                    
                        ser_iter_returns = np.log(1 + ser_iter_returns)
                        ser_iter_returns = ser_iter_returns - ser_iter_returns.mean()                   
                        ser_iter_factors = ser_iter_factors - ser_iter_factors.mean()
                        beta_result = get_beta_value(ser_iter_returns, ser_iter_weights, ser_iter_factors)
                        ser_beta.loc[iter_country, iter_date] = beta_result
                    
    return ser_beta

In [17]:
### DEFINING SKEWNESS CALCULATOR
def get_skewness_value(ser_returns):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    import scipy.stats as sc    
    ### Defining constants:
    num_year_work_days = 260
    ### Skewness calculating:
    skewness_result = np.nan
    ser_returns = ser_returns.dropna()
    if (ser_returns.count() > 0):
        skewness_result = sc.skew(ser_returns, bias = False)
#    skewness_result = sc.skew(ser_returns, bias = False)
    return skewness_result  

In [18]:
### DEFINING SKEWNESS SERIES BUILDER
def get_skewness_series(ser_market_membership, ser_returns, window_years = 2):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    ### Defining constants:
    num_year_work_days = 260
    ### Main loop performing:
    ser_skewness = pd.Series(np.NaN, index = ser_market_membership.index)
    for iter_country in ser_market_membership.index.get_level_values(0).unique():        
        ### Extracting returns data vector for each country/date point:
        if (iter_country in ser_returns.index.get_level_values(0).unique()):
            for iter_date in ser_market_membership[iter_country].index.get_level_values(0).unique():
                ser_iter_returns = ser_returns[iter_country].loc[iter_date - pd.offsets.BusinessDay(num_year_work_days * window_years - 1) : iter_date]  
                if (ser_iter_returns.count() > num_year_work_days // 2):
                    ser_iter_returns.dropna(inplace = True)
                    ser_iter_returns = ser_iter_returns.iloc[- num_year_work_days * 2 : ]
#                    ser_iter_returns = ser_returns[iter_country].loc[iter_date - pd.offsets.BusinessDay(num_year_work_days * 2 - 1) : iter_date]     
                    skewness_result = get_skewness_value(ser_iter_returns)
                    ser_skewness.loc[iter_country, iter_date] = skewness_result

    return ser_skewness

In [19]:
### DEFINING READY TO GO FACTOR, i.e. VRP, SERIES BUILDER (no modifications to raw data), however, 
### it check the availability of rolling data to replicate some redundant logic in research code
def get_market_series(ser_market_membership, ser_returns, window_years = 5):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    ### Defining constants:
    num_year_work_days = 260
    ### Main loop performing:
    ser_market = pd.Series(np.NaN, index = ser_market_membership.index)
    for iter_country in ser_market_membership.index.get_level_values(0).unique():        
        ### Extracting returns data vector for each country/date point:
        if (iter_country in ser_returns.index.get_level_values(0).unique()):
            for iter_date in ser_market_membership[iter_country].index.get_level_values(0).unique():
                ser_iter_returns = ser_returns[iter_country].loc[iter_date - pd.offsets.BusinessDay(num_year_work_days * window_years - 1) : iter_date].dropna()     
                if (ser_iter_returns.count() > num_year_work_days // 4):
                    ser_market.loc[iter_country, iter_date] = ser_returns.loc[iter_country, iter_date]

    return ser_market

In [20]:
### DEFINING WEIGHTED AVERAGE CALCULATOR
def get_average_value(ser_returns, ser_weights):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    ### Rolling average calculating:
    average_result = np.NaN  
    ser_returns = ser_returns.dropna()
    index_rolling = ser_returns.index.intersection(ser_weights.index)           
    ### Exponential volatility calculating:
    average_x = ser_returns[index_rolling]
    average_w = ser_weights[index_rolling]                    
    average_result = average_x.dot(average_w) / sum(average_w)        
        
    return average_result  

In [21]:
### DEFINING WEIGHTED AVERAGE SERIES BUILDER
def get_average_series(ser_market_membership, ser_returns, weighting_kind = 'equal', window_years = 5, halflife_months = 3, ser_condition = pd.Series(np.NaN)):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    ### Defining constants:
    num_year_work_days = 260
    ### Initialising delta series:
    ser_condition.fillna(method = 'ffill', inplace = True)        
    ### Main loop performing:
    ser_average = pd.Series(np.NaN, index = ser_market_membership.index)
    for iter_country in ser_market_membership.index.get_level_values(0).unique():        
        ### Extracting returns data vector for each country/date point:
        if (iter_country in ser_returns.index.get_level_values(0).unique()):
            for iter_date in ser_market_membership[iter_country].index.get_level_values(0).unique():
                ser_iter_returns = ser_returns[iter_country].loc[iter_date - pd.offsets.BusinessDay(num_year_work_days * window_years - 1) : iter_date]
                if (ser_iter_returns.size > 0):
                    if (ser_condition.count() > 0):
                        ser_iter_condition = ser_condition[ser_iter_returns.index]
                    else:
                        ser_iter_condition = pd.Series(np.NaN)                      
                    ser_iter_weights = bind_exp_weights(ser_iter_returns, weighting_kind, window_years, halflife_months, ser_iter_condition)                 
                ser_iter_returns = ser_iter_returns - ser_iter_returns.shift(1)        
                ser_iter_returns = ser_iter_returns.dropna()[ser_iter_returns != 0]
                if (ser_iter_returns.count() > num_year_work_days // 4):
                    average_result = get_average_value(ser_iter_returns, ser_iter_weights)
                    ser_average.loc[iter_country, iter_date] = average_result
    ser_average.sort_index(level = [0, 1], inplace = True)
    
    return ser_average

In [22]:
### GET SHORT TERM EVENT RISK FACTOR
def get_short_term_event_risk_factor(iter_date):
    ### Importing standard modules and date-special modules (common for all factors):
    import numpy as np
    import pandas as pd    
    ### Constants declaring (common for all factors):  
    num_year_work_days = 260
    window_years = 5   
    ### Data preparing (common for all factors):
    index_iter_date = pd.date_range(end = iter_date, periods = num_year_work_days * window_years, freq = 'B')
    ser_iter_membership = get_date_membership(iter_date)
    ### Impossible to add country filter here:
    ser_iter_returns = ser_realized_ret_LOC.loc[:, index_iter_date]
    ### Removing effectively missing return observations    
    ser_iter_returns.replace(0, np.nan, inplace = True)
    ### Factor calculation:
    ser_iter_factor = - get_expvol_series(ser_iter_membership, ser_iter_returns, 'expo', window_years = window_years, halflife_months = 1)
            
    return ser_iter_factor

In [23]:
### GET LOW VOLATILITY ANOMALY FACTOR
def get_low_vol_factor(iter_date):
    ### Importing standard modules and date-special modules (common for all factors):
    import numpy as np
    import pandas as pd    
    ### Constants declaring (common for all factors):  
    num_year_work_days = 260
    window_years = 5       
    ### Data preparing (common for all factors):
    index_iter_date = pd.date_range(end = iter_date, periods = num_year_work_days * window_years, freq = 'B')
    ser_iter_membership = get_date_membership(iter_date)
    ### Impossible to add country filter here:    
    ser_iter_returns = ser_realized_ret_LOC.loc[:, index_iter_date]
    ### Removing effectively missing return observations    
    ser_iter_returns.replace(0, np.nan, inplace = True)    
    ### Factor calculation:
    ser_expvol24m = get_expvol_series(ser_iter_membership, ser_iter_returns, 'expo', window_years = window_years, halflife_months = 24)
    ser_lowvol_base = 1 / (ser_expvol24m * ser_expvol24m)
    ser_lowvol_base.replace([np.inf, -np.inf], np.nan, inplace = True)    
    ### the following lines are not necessary if factor gets standardized downstream (cross-sectional rescaling on the full universe)
    ser_lowvol_base = ser_lowvol_base.swaplevel()
    ser_lowvol_base.sort_index(inplace = True)
    ser_lowvol = pd.Series(np.NaN, index = ser_lowvol_base.index)
    for iter_date in ser_lowvol.index.get_level_values(0).unique():  
        ser_lowvol[iter_date] = (ser_lowvol_base[iter_date] / ser_lowvol_base[iter_date].sum())
    ser_lowvol = ser_lowvol.swaplevel()
    ser_iter_factor = ser_lowvol.sort_index()
            
    return ser_iter_factor

In [24]:
### GET VOLATILITY SURPRISE FACTOR
def get_vol_surprise_factor(iter_date):
    ### Importing standard modules and date-special modules (common for all factors):
    import numpy as np
    import pandas as pd   
    from datetime import date
    ### Global parameters declaring:
    date_start = date(1993, 12, 31)     
    ### Constants declaring (common for all factors):  
    num_year_work_days = 260
    window_years = 5        
    ### Data preparing (common for all factors):
    index_iter_date = pd.date_range(end = iter_date, periods = num_year_work_days * window_years, freq = 'B')
    ser_iter_membership = get_date_membership(iter_date)
    ser_iter_returns = ser_realized_ret_LOC.loc[:, index_iter_date]
    ### Removing effectively missing return observations    
    ser_iter_returns.replace(0, np.nan, inplace = True)    
    ser_gri_added = pd.concat([ser_gri_start_col[date_start].loc[ : date_start - pd.offsets.BusinessDay()], ser_gri_released])
    ser_iter_condition = ser_gri_added[index_iter_date]
    ### Factor calculation:
    ser_expvol1m = get_expvol_series(ser_iter_membership, ser_iter_returns, 'expo', window_years = window_years, halflife_months = 1)
    ser_expvol1m_cond = get_expvol_series(ser_iter_membership, ser_iter_returns, 'expo_cond', window_years = window_years, halflife_months = 1, 
                                          ser_condition = ser_iter_condition)
    ser_expvol1m_surp = -np.log(ser_expvol1m / ser_expvol1m_cond)
    ser_iter_factor = ser_expvol1m_surp 
    
    return ser_iter_factor

In [25]:
### GET TAIL RISK FACTOR
def get_tail_risk_factor(iter_date):
    ### Importing standard modules and date-special modules (common for all factors):
    import numpy as np
    import pandas as pd    
    ### Constants declaring (common for all factors):  
    num_year_work_days = 260
    window_years = 5       
    ### Data preparing (common for all factors):
    index_iter_date = pd.date_range(end = iter_date, periods = num_year_work_days * window_years, freq = 'B')
    ser_iter_membership = get_date_membership(iter_date)
    ser_iter_returns = ser_realized_ret_LOC.loc[:, index_iter_date]
    ### Removing effectively missing return observations
    ser_iter_returns.replace(0, np.nan, inplace = True)    
    ### Factor calculation:
    ser_iter_factor = - get_skewness_series(ser_iter_membership, ser_iter_returns, window_years = window_years) 
    
    return ser_iter_factor

In [26]:
### GET VRP FACTOR
def get_vrp_factor(iter_date):
    ### Importing standard modules and date-special modules (common for all factors):
    import numpy as np
    import pandas as pd    
    ### Constants declaring (common for all factors):  
    num_year_work_days = 260
    window_years = 5    
    ### Data preparing (common for all factors):
    index_iter_date = pd.date_range(end = iter_date, periods = num_year_work_days * window_years, freq = 'B')
    ser_iter_membership = get_date_membership(iter_date)
    ser_iter_vrp = ser_vrp3m.loc[:, index_iter_date]
    ### Factor calculation:
    ### This factor does not need a 5-year rolling dataset, used to reconcile with some data checks performed in research code   
    ser_iter_factor = get_market_series(ser_iter_membership, ser_iter_vrp, window_years = window_years) 
    
    return ser_iter_factor

In [27]:
### GET IMPLIED VOLATILITY SHORT TERM MOMENTUM FACTOR
def get_ivol_mom_1m_factor(iter_date):
    ### Importing standard modules and date-special modules (common for all factors):
    import numpy as np
    import pandas as pd    
    ### Constants declaring (common for all factors):  
    num_year_work_days = 260
    window_years = 5     
    ### Data preparing (common for all factors):
    index_iter_date = pd.date_range(end = iter_date, periods = num_year_work_days * window_years, freq = 'B')
    ser_iter_membership = get_date_membership(iter_date)
    ser_iter_ivol = ser_ivol3m.loc[:, index_iter_date]
    ### Factor calculation:
    ser_iter_factor = get_average_series(ser_iter_membership, ser_iter_ivol, 'expo', window_years = window_years, halflife_months = 1) 

    return ser_iter_factor

In [28]:
### GET IMPLIED VOLATILITY SHORT TERM MOMENTUM FACTOR
def get_ivol_mom_12m_factor(iter_date):
    ### Importing standard modules and date-special modules (common for all factors):
    import numpy as np
    import pandas as pd    
    ### Constants declaring (common for all factors):  
    num_year_work_days = 260
    window_years = 5       
    ### Data preparing (common for all factors):
    index_iter_date = pd.date_range(end = iter_date, periods = num_year_work_days * window_years, freq = 'B')
    ser_iter_membership = get_date_membership(iter_date)
    ser_iter_ivol = ser_ivol3m.loc[:, index_iter_date]
    ### Factor calculation:
    ser_iter_factor = get_average_series(ser_iter_membership, ser_iter_ivol, 'expo', window_years = window_years, halflife_months = 12) 
    
    return ser_iter_factor

In [29]:
### ADDED FOR BETA FACTORS CALCULATION
    
### GET BETA FACTOR
def get_beta_factor(iter_date):
    ### Importing standard modules and date-special modules (common for all factors):
    import numpy as np
    import pandas as pd 
    from datetime import date
    ### Global parameters declaring:
    date_start = date(1993, 12, 31)    
    ### Constants declaring (common for all factors):  
    num_year_work_days = 260
    num_month_work_days = 21
    window_years = 5 
    ### Data preparing (common for all factors):
    index_iter_date = pd.date_range(end = iter_date, periods = num_year_work_days * window_years, freq = 'B')
    index_iter_date_plus = pd.date_range(end = iter_date, periods = num_year_work_days * window_years + num_month_work_days, freq = 'B')    
    ser_iter_membership = get_date_membership(iter_date)
    ### Impossible to add country filter here:
    ser_iter_returns = ser_monthly_ret_USD.loc[:, index_iter_date]
    ### Removing effectively missing return observations    
    ser_iter_returns.replace(0, np.nan, inplace = True)
    ### Factor calculation:
    ser_gri_added = pd.concat([ser_gri_start_col[date_start].loc[ : date_start - pd.offsets.BusinessDay()], ser_gri_released])
    ser_iter_gri = ser_gri_released[index_iter_date_plus]
    ser_iter_gri = ser_iter_gri - ser_iter_gri.shift(num_month_work_days)
    ser_iter_gri = ser_iter_gri[index_iter_date] 
    ser_iter_factor = get_beta_series(ser_iter_membership, ser_iter_returns, 'equal', window_years = window_years, halflife_months = 24, ser_factors = ser_iter_gri)
            
    return ser_iter_factor

In [None]:
### LOOPER FOR SHORT TERM EVENT RISK FACTOR
#date_range_test = pd.date_range(start = '2010-10-25', periods = 6)
date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()
#date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()[155 : 160]
arr_short_term_event_risk_factor = []
iter_counter = 0
for iter_date in date_range_test:
    iter_counter = iter_counter + 1
    arr_short_term_event_risk_factor.append(get_short_term_event_risk_factor(iter_date))
    if ((iter_counter // 10) == (iter_counter / 10)):
        print('Progress printout', iter_counter, '/', iter_date)
ser_short_term_event_risk_factor = pd.concat(arr_short_term_event_risk_factor)

In [29]:
### SHORT TERM EVENT RISK FACTOR TESTING:
print('ser_short_term_event_risk_factor - AR 29-Dec-2006:', ser_short_term_event_risk_factor.loc['AR' , '2006-12-29'])
print('ser_short_term_event_risk_factor - US 29-Dec-2006:', ser_short_term_event_risk_factor.loc['US' , '2006-12-29'])
ser_short_term_event_risk_factor_mean = pd.Series(np.NaN, index = ser_short_term_event_risk_factor.index.get_level_values(1).unique())
for iter_date in ser_short_term_event_risk_factor_mean.index:  
    ser_short_term_event_risk_factor_mean[iter_date] = ser_short_term_event_risk_factor.loc[:, iter_date].mean()
ser_short_term_event_risk_factor_mean.sort_index(inplace = True)
print('ser_short_term_event_risk_factor - cross-sectional mean min:', ser_short_term_event_risk_factor_mean.min())
print('ser_short_term_event_risk_factor - cross-sectional mean mean:', ser_short_term_event_risk_factor_mean.mean())
print('ser_short_term_event_risk_factor - cross-sectional mean max:', ser_short_term_event_risk_factor_mean.max())
print('ser_short_term_event_risk_factor - cross-sectional mean stdev:', ser_short_term_event_risk_factor_mean.std())
print('ser_short_term_event_risk_factor - cross-sectional mean mean:', ser_short_term_event_risk_factor_mean.count())

ser_short_term_event_risk_factor - AR 29-Dec-2006: -0.21489979042709245
ser_short_term_event_risk_factor - US 29-Dec-2006: -0.07838774324887841
ser_short_term_event_risk_factor - cross-sectional mean min: -0.6654721997843369
ser_short_term_event_risk_factor - cross-sectional mean mean: -0.213007981528966
ser_short_term_event_risk_factor - cross-sectional mean max: -0.11613865884310284
ser_short_term_event_risk_factor - cross-sectional mean stdev: 0.07230243438728112
ser_short_term_event_risk_factor - cross-sectional mean mean: 234


In [None]:
### LOOPER FOR LOW VOLATILITY ANOMALY FACTOR
#date_range_test = pd.date_range(start = '2010-10-25', periods = 6)
date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()
#date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()[155 : 160]
arr_low_vol_factor = []
iter_counter = 0
for iter_date in date_range_test:
    iter_counter = iter_counter + 1
    arr_low_vol_factor.append(get_low_vol_factor(iter_date))
    if ((iter_counter // 10) == (iter_counter / 10)):
        print('Progress printout', iter_counter, '/', iter_date)
        
ser_low_vol_factor = pd.concat(arr_low_vol_factor)

In [30]:
### LOW VOLATILITY ANOMALY FACTOR TESTING:
print('ser_low_vol_factor - AR 29-Dec-2006:', ser_low_vol_factor.loc['AR' , '2006-12-29'])
print('ser_low_vol_factor - US 29-Dec-2006:', ser_low_vol_factor.loc['US' , '2006-12-29'])
ser_low_vol_factor_mean = pd.Series(np.NaN, index = ser_low_vol_factor.index.get_level_values(1).unique())
for iter_date in ser_low_vol_factor_mean.index:  
    ser_low_vol_factor_mean[iter_date] = ser_low_vol_factor.loc[:, iter_date].mean()
ser_low_vol_factor_mean.sort_index(inplace = True)
print('ser_low_vol_factor - cross-sectional mean min:', ser_low_vol_factor_mean.min())
print('ser_low_vol_factor - cross-sectional mean mean:', ser_low_vol_factor_mean.mean())
print('ser_low_vol_factor - cross-sectional mean max:', ser_low_vol_factor_mean.max())
print('ser_low_vol_factor - cross-sectional mean stdev:', ser_low_vol_factor_mean.std())
print('ser_low_vol_factor - cross-sectional mean mean:', ser_low_vol_factor_mean.count())

ser_low_vol_factor - AR 29-Dec-2006: 0.007339857068326828
ser_low_vol_factor - US 29-Dec-2006: 0.03595279045940818
ser_low_vol_factor - cross-sectional mean min: 0.02040816326530611
ser_low_vol_factor - cross-sectional mean mean: 0.02255744652417695
ser_low_vol_factor - cross-sectional mean max: 0.04545454545454546
ser_low_vol_factor - cross-sectional mean stdev: 0.006440980492665572
ser_low_vol_factor - cross-sectional mean mean: 234


In [None]:
### LOOPER FOR VOLATILITY SURPRISE FACTOR
#date_range_test = pd.date_range(start = '2010-10-25', periods = 6)
date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()
#date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()[155 : 156]
arr_vol_surprise_factor = []
iter_counter = 0
for iter_date in date_range_test:
    iter_counter = iter_counter + 1
    arr_vol_surprise_factor.append(get_vol_surprise_factor(iter_date))
    if ((iter_counter // 10) == (iter_counter / 10)):
        print('Progress printout', iter_counter, '/', iter_date)
        
ser_vol_surprise_factor = pd.concat(arr_vol_surprise_factor)

In [53]:
### VOLATILITY SURPRISE FACTOR TESTING:
print('ser_vol_surprise_factor - AR 29-Dec-2006:', ser_vol_surprise_factor.loc['AR' , '2006-12-29'])
print('ser_vol_surprise_factor - US 29-Dec-2006:', ser_vol_surprise_factor.loc['US' , '2006-12-29'])
ser_vol_surprise_factor_mean = pd.Series(np.NaN, index = ser_vol_surprise_factor.index.get_level_values(1).unique())
for iter_date in ser_vol_surprise_factor_mean.index:  
    ser_vol_surprise_factor_mean[iter_date] = ser_vol_surprise_factor.loc[:, iter_date].mean()
ser_vol_surprise_factor_mean.sort_index(inplace = True)
print('ser_vol_surprise_factor - cross-sectional mean min:', ser_vol_surprise_factor_mean.min())
print('ser_vol_surprise_factor - cross-sectional mean mean:', ser_vol_surprise_factor_mean.mean())
print('ser_vol_surprise_factor - cross-sectional mean max:', ser_vol_surprise_factor_mean.max())
print('ser_vol_surprise_factor - cross-sectional mean stdev:', ser_vol_surprise_factor_mean.std())
print('ser_vol_surprise_factor - cross-sectional mean mean:', ser_vol_surprise_factor_mean.count())

ser_vol_surprise_factor - AR 29-Dec-2006: -0.019338598670127435
ser_vol_surprise_factor - US 29-Dec-2006: 0.04195253755895069
ser_vol_surprise_factor - cross-sectional mean min: -0.04373786858119822
ser_vol_surprise_factor - cross-sectional mean mean: -0.04373786858119822
ser_vol_surprise_factor - cross-sectional mean max: -0.04373786858119822
ser_vol_surprise_factor - cross-sectional mean stdev: nan
ser_vol_surprise_factor - cross-sectional mean mean: 1


In [None]:
### LOOPER FOR TAIL RISK FACTOR
#date_range_test = pd.date_range(start = '2010-10-25', periods = 6)
date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()
#date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()[155 : 156]
arr_tail_risk_factor = []
iter_counter = 0
for iter_date in date_range_test:
    iter_counter = iter_counter + 1
    arr_tail_risk_factor.append(get_tail_risk_factor(iter_date))
    if ((iter_counter // 10) == (iter_counter / 10)):
        print('Progress printout', iter_counter, '/', iter_date)
        
ser_tail_risk_factor = pd.concat(arr_tail_risk_factor)

In [42]:
### TAIL RISK FACTOR TESTING:
print('ser_tail_risk_factor - AR 29-Dec-2006:', ser_tail_risk_factor.loc['AR' , '2006-12-29'])
print('ser_tail_risk_factor - US 29-Dec-2006:', ser_tail_risk_factor.loc['US' , '2006-12-29'])
ser_tail_risk_factor_mean = pd.Series(np.NaN, index = ser_tail_risk_factor.index.get_level_values(1).unique())
for iter_date in ser_tail_risk_factor_mean.index:  
    ser_tail_risk_factor_mean[iter_date] = ser_tail_risk_factor.loc[:, iter_date].mean()
ser_tail_risk_factor_mean.sort_index(inplace = True)
print('ser_tail_risk_factor - cross-sectional mean min:', ser_tail_risk_factor_mean.min())
print('ser_tail_risk_factor - cross-sectional mean mean:', ser_tail_risk_factor_mean.mean())
print('ser_tail_risk_factor - cross-sectional mean max:', ser_tail_risk_factor_mean.max())
print('ser_tail_risk_factor - cross-sectional mean stdev:', ser_tail_risk_factor_mean.std())
print('ser_tail_risk_factor - cross-sectional mean mean:', ser_tail_risk_factor_mean.count())

ser_tail_risk_factor - AR 29-Dec-2006: -0.01861014783183245
ser_tail_risk_factor - US 29-Dec-2006: -0.060239949254639255
ser_tail_risk_factor - cross-sectional mean min: -0.3221148968296049
ser_tail_risk_factor - cross-sectional mean mean: 0.0909717780143243
ser_tail_risk_factor - cross-sectional mean max: 0.5167322540605308
ser_tail_risk_factor - cross-sectional mean stdev: 0.16315908350625816
ser_tail_risk_factor - cross-sectional mean mean: 234


In [None]:
### LOOPER FOR VRP FACTOR
#date_range_test = pd.date_range(start = '2010-10-25', periods = 6)
date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()
#date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()[155 : 160]
arr_vrp_factor = []
iter_counter = 0
for iter_date in date_range_test:
    iter_counter = iter_counter + 1
    arr_vrp_factor.append(get_vrp_factor(iter_date))
    if ((iter_counter // 10) == (iter_counter / 10)):
        print('Progress printout', iter_counter, '/', iter_date)
        
ser_vrp_factor = pd.concat(arr_vrp_factor)

In [25]:
### VRP FACTOR TESTING:
print('ser_vrp_factor - AR 29-Dec-2006:', ser_vrp_factor.loc['AR' , '2006-12-29'])
print('ser_vrp_factor - US 29-Dec-2006:', ser_vrp_factor.loc['US' , '2006-12-29'])
ser_vrp_factor_mean = pd.Series(np.NaN, index = ser_vrp_factor.index.get_level_values(1).unique())
for iter_date in ser_vrp_factor_mean.index:  
    ser_vrp_factor_mean[iter_date] = ser_vrp_factor.loc[:, iter_date].mean()
ser_vrp_factor_mean.sort_index(inplace = True)
print('ser_vrp_factor - cross-sectional mean min:', ser_vrp_factor_mean.min())
print('ser_vrp_factor - cross-sectional mean mean:', ser_vrp_factor_mean.mean())
print('ser_vrp_factor - cross-sectional mean max:', ser_vrp_factor_mean.max())
print('ser_vrp_factor - cross-sectional mean stdev:', ser_vrp_factor_mean.std())
print('ser_vrp_factor - cross-sectional mean mean:', ser_vrp_factor_mean.count())

ser_vrp_factor - AR 29-Dec-2006: 0.00163
ser_vrp_factor - US 29-Dec-2006: -0.004396
ser_vrp_factor - cross-sectional mean min: -0.023387673469387766
ser_vrp_factor - cross-sectional mean mean: 0.002256157878726162
ser_vrp_factor - cross-sectional mean max: 0.036017938775510204
ser_vrp_factor - cross-sectional mean stdev: 0.00794579999880406
ser_vrp_factor - cross-sectional mean mean: 237


In [None]:
### LOOPER FOR IVOL MOMENTUM 1M FACTOR
#date_range_test = pd.date_range(start = '2010-10-25', periods = 6)
date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()
#date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()[155 : 160]
arr_ivol_mom_1m_factor = []
iter_counter = 0
for iter_date in date_range_test:
    iter_counter = iter_counter + 1
    arr_ivol_mom_1m_factor.append(get_ivol_mom_1m_factor(iter_date))
    if ((iter_counter // 10) == (iter_counter / 10)):
        print('Progress printout', iter_counter, '/', iter_date)
        
ser_ivol_mom_1m_factor = pd.concat(arr_ivol_mom_1m_factor)

In [27]:
### IVOL MOMENTUM 1M FACTOR TESTING:
print('ser_ivol_mom_1m_factor - AR 29-Dec-2006:', ser_ivol_mom_1m_factor.loc['AR' , '2006-12-29'])
print('ser_ivol_mom_1m_factor - US 29-Dec-2006:', ser_ivol_mom_1m_factor.loc['US' , '2006-12-29'])
ser_ivol_mom_1m_factor_mean = pd.Series(np.NaN, index = ser_ivol_mom_1m_factor.index.get_level_values(1).unique())
for iter_date in ser_ivol_mom_1m_factor_mean.index:  
    ser_ivol_mom_1m_factor_mean[iter_date] = ser_ivol_mom_1m_factor.loc[:, iter_date].mean()
ser_ivol_mom_1m_factor_mean.sort_index(inplace = True)
print('ser_ivol_mom_1m_factor - cross-sectional mean min:', ser_ivol_mom_1m_factor_mean.min())
print('ser_ivol_mom_1m_factor - cross-sectional mean mean:', ser_ivol_mom_1m_factor_mean.mean())
print('ser_ivol_mom_1m_factor - cross-sectional mean max:', ser_ivol_mom_1m_factor_mean.max())
print('ser_ivol_mom_1m_factor - cross-sectional mean stdev:', ser_ivol_mom_1m_factor_mean.std())
print('ser_ivol_mom_1m_factor - cross-sectional mean mean:', ser_ivol_mom_1m_factor_mean.count())

ser_ivol_mom_1m_factor - AR 29-Dec-2006: -5.728833933486413e-05
ser_ivol_mom_1m_factor - US 29-Dec-2006: -0.0001643642124898838
ser_ivol_mom_1m_factor - cross-sectional mean min: -0.0007719988881806511
ser_ivol_mom_1m_factor - cross-sectional mean mean: 6.00820444231159e-05
ser_ivol_mom_1m_factor - cross-sectional mean max: 0.0011461061607235408
ser_ivol_mom_1m_factor - cross-sectional mean stdev: 0.0002536959457065397
ser_ivol_mom_1m_factor - cross-sectional mean mean: 237


In [None]:
### LOOPER FOR IVOL MOMENTUM 12M FACTOR
#date_range_test = pd.date_range(start = '2010-10-25', periods = 6)
date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()
#date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()[155 : 160]
arr_ivol_mom_12m_factor = []
iter_counter = 0
for iter_date in date_range_test:
    iter_counter = iter_counter + 1
    arr_ivol_mom_12m_factor.append(get_ivol_mom_12m_factor(iter_date))
    if ((iter_counter // 10) == (iter_counter / 10)):
        print('Progress printout', iter_counter, '/', iter_date)
        
ser_ivol_mom_12m_factor = pd.concat(arr_ivol_mom_12m_factor)

In [29]:
### IVOL MOMENTUM 12M FACTOR TESTING:
print('ser_ivol_mom_12m_factor - AR 29-Dec-2006:', ser_ivol_mom_12m_factor.loc['AR' , '2006-12-29'])
print('ser_ivol_mom_12m_factor - US 29-Dec-2006:', ser_ivol_mom_12m_factor.loc['US' , '2006-12-29'])
ser_ivol_mom_12m_factor_mean = pd.Series(np.NaN, index = ser_ivol_mom_12m_factor.index.get_level_values(1).unique())
for iter_date in ser_ivol_mom_12m_factor_mean.index:  
    ser_ivol_mom_12m_factor_mean[iter_date] = ser_ivol_mom_12m_factor.loc[:, iter_date].mean()
ser_ivol_mom_12m_factor_mean.sort_index(inplace = True)
print('ser_ivol_mom_12m_factor - cross-sectional mean min:', ser_ivol_mom_12m_factor_mean.min())
print('ser_ivol_mom_12m_factor - cross-sectional mean mean:', ser_ivol_mom_12m_factor_mean.mean())
print('ser_ivol_mom_12m_factor - cross-sectional mean max:', ser_ivol_mom_12m_factor_mean.max())
print('ser_ivol_mom_12m_factor - cross-sectional mean stdev:', ser_ivol_mom_12m_factor_mean.std())
print('ser_ivol_mom_12m_factor - cross-sectional mean mean:', ser_ivol_mom_12m_factor_mean.count())

ser_ivol_mom_12m_factor - AR 29-Dec-2006: 2.4071504646865075e-05
ser_ivol_mom_12m_factor - US 29-Dec-2006: -1.281313465985619e-05
ser_ivol_mom_12m_factor - cross-sectional mean min: -8.265832098596306e-05
ser_ivol_mom_12m_factor - cross-sectional mean mean: 5.48849915627575e-06
ser_ivol_mom_12m_factor - cross-sectional mean max: 9.861384692768506e-05
ser_ivol_mom_12m_factor - cross-sectional mean stdev: 2.4997621734106227e-05
ser_ivol_mom_12m_factor - cross-sectional mean mean: 237


In [84]:
### LOOPER FOR BETA FACTOR
def get_returns_from_index(ser_index, ma_wnd, day_period):
    ### Importing standard modules and date-special modules:
    import numpy as np
    import pandas as pd     
    ### Moving average:
    dict_index_ma = {}
    for iter_country in ser_index.index.get_level_values(0).unique():
        ser_index_ma_iter = ser_index[iter_country].rolling(window = ma_wnd, win_type = None).mean()
        dict_index_ma[iter_country] = ser_index_ma_iter
    ser_index_ma = pd.concat(dict_index_ma)
    ### Monthly returns:
    dict_period_ret = {}
    for iter_country in ser_index_ma.index.get_level_values(0).unique():
        ser_period_ret_iter = (ser_index_ma[iter_country] / ser_index_ma[iter_country].shift(day_period) - 1)
        dict_period_ret[iter_country] = ser_period_ret_iter
    ser_period_ret = pd.concat(dict_period_ret)    
    
    return ser_period_ret


path_market_risk_source_hdf = 'Data_Files/Source_Files/market_risk_source.h5'
index_ret_USD_key = 'index_ret_USD_key'
monthly_ret_key = 'monthly_ret_key'
ser_index_ret_USD = pd.read_hdf(path_market_risk_source_hdf, index_ret_USD_key)
ser_monthly_ret_USD = get_returns_from_index(ser_index_ret_USD, ma_wnd = 5, day_period = 21)
ser_monthly_ret_USD.to_hdf(path_market_risk_source_hdf, monthly_ret_key, mode = 'a', format = 'table')
market_membership_key = 'market_membership_key'
ser_market_membership = pd.read_hdf(path_market_risk_source_hdf, market_membership_key)
date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()
#date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()[155 : 156]
arr_beta_factor = []
iter_counter = 0
for iter_date in date_range_test:
    iter_counter = iter_counter + 1
    arr_beta_factor.append(get_beta_factor(iter_date))
    if ((iter_counter // 10) == (iter_counter / 10)):
        print('Progress printout', iter_counter, '/', iter_date)
        
ser_beta_factor = pd.concat(arr_beta_factor)

In [77]:
### BETA FACTOR TESTING:
print('ser_beta_factor - AR 29-Dec-2006:', ser_beta_factor.loc['AR' , '2006-12-29'])
print('ser_beta_factor - US 29-Dec-2006:', ser_beta_factor.loc['US' , '2006-12-29'])
ser_beta_factor_mean = pd.Series(np.NaN, index = ser_beta_factor.index.get_level_values(1).unique())
for iter_date in ser_beta_factor_mean.index:  
    ser_beta_factor_mean[iter_date] = ser_beta_factor.loc[:, iter_date].mean()
ser_beta_factor_mean.sort_index(inplace = True)
print('ser_beta_factor - cross-sectional mean min:', ser_beta_factor_mean.min())
print('ser_beta_factor - cross-sectional mean mean:', ser_beta_factor_mean.mean())
print('ser_beta_factor - cross-sectional mean max:', ser_beta_factor_mean.max())
print('ser_beta_factor - cross-sectional mean stdev:', ser_beta_factor_mean.std())
print('ser_beta_factor - cross-sectional mean mean:', ser_beta_factor_mean.count())

ser_beta_factor - AR 29-Dec-2006: -0.03799388234480591
ser_beta_factor - US 29-Dec-2006: -0.058970323931999996
ser_beta_factor - cross-sectional mean min: -0.13326304502959702
ser_beta_factor - cross-sectional mean mean: -0.08160695129341232
ser_beta_factor - cross-sectional mean max: -0.015540779141930442
ser_beta_factor - cross-sectional mean stdev: 0.023424263557680902
ser_beta_factor - cross-sectional mean mean: 227


In [None]:
### LOOPER FOR BETA LEVEL PERCENTILE FACTOR
def get_returns_from_index(ser_index, ma_wnd, day_period):
    ### Importing standard modules and date-special modules:
    import numpy as np
    import pandas as pd     
    ### Moving average:
    dict_index_ma = {}
    for iter_country in ser_index.index.get_level_values(0).unique():
        ser_index_ma_iter = ser_index[iter_country].rolling(window = ma_wnd, win_type = None).mean()
        dict_index_ma[iter_country] = ser_index_ma_iter
    ser_index_ma = pd.concat(dict_index_ma)
    ### Monthly returns:
    dict_period_ret = {}
    for iter_country in ser_index_ma.index.get_level_values(0).unique():
        ser_period_ret_iter = (ser_index_ma[iter_country] / ser_index_ma[iter_country].shift(day_period) - 1)
        dict_period_ret[iter_country] = ser_period_ret_iter
    ser_period_ret = pd.concat(dict_period_ret)    
    
    return ser_period_ret


path_market_risk_source_hdf = 'Data_Files/Source_Files/market_risk_source.h5'
index_ret_USD_key = 'index_ret_USD_key'
monthly_ret_key = 'monthly_ret_key'
gri_level_perc_key = 'gri_level_perc_key'
ser_index_ret_USD = pd.read_hdf(path_market_risk_source_hdf, index_ret_USD_key)
ser_monthly_ret_USD = get_returns_from_index(ser_index_ret_USD, ma_wnd = 5, day_period = 21)
ser_monthly_ret_USD.to_hdf(path_market_risk_source_hdf, monthly_ret_key, mode = 'a', format = 'table')
ser_perc_gri = pd.read_hdf(path_market_risk_source_hdf, gri_level_perc_key)
### For testing purposes:
#ser_perc_gri = round(ser_perc_gri, 2)
market_membership_key = 'market_membership_key'
ser_market_membership = pd.read_hdf(path_market_risk_source_hdf, market_membership_key)
date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()
#date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()[155 : 156]
arr_beta_factor = []
iter_counter = 0
tumbler_to_minus = 0.60
tumbler_to_plus = 0.40
for iter_date in date_range_test:
    ### Sign defining:
    ser_beta_signs = pd.Series(np.NaN, index = ser_perc_gri[ : iter_date].index)
    ser_beta_signs.iloc[0] = 1
    for signs_date in ser_beta_signs.index:
        if ser_beta_signs.index.get_loc(signs_date) > 0:
            if (ser_beta_signs.loc[signs_date - pd.offsets.BusinessDay()] == 1):
                if (ser_perc_gri[signs_date] > tumbler_to_minus):
                    ser_beta_signs.loc[signs_date] = -1
                else:
                    ser_beta_signs.loc[signs_date] = 1
            else:
                if (ser_perc_gri[signs_date] < tumbler_to_plus):
                    ser_beta_signs.loc[signs_date] = 1
                else:
                    ser_beta_signs.loc[signs_date] = -1             
    
    arr_beta_factor.append(get_beta_factor(iter_date) * ser_beta_signs[iter_date])
    
    iter_counter = iter_counter + 1    
    if ((iter_counter // 10) == (iter_counter / 10)):
        print('Progress printout', iter_counter, '/', iter_date)
        
ser_beta_factor = pd.concat(arr_beta_factor)

In [48]:
### BETA LEVEL PERCENTILE FACTOR TESTING:
print('ser_beta_level_perc_factor - AR 29-Dec-2006:', ser_beta_factor.loc['AR' , '2006-12-29'])
print('ser_beta_level_perc_factor - US 29-Dec-2006:', ser_beta_factor.loc['US' , '2006-12-29'])
ser_beta_factor_mean = pd.Series(np.NaN, index = ser_beta_factor.index.get_level_values(1).unique())
for iter_date in ser_beta_factor_mean.index:
    ser_beta_factor_mean[iter_date] = ser_beta_factor.loc[:, iter_date].mean()
ser_beta_factor_mean.sort_index(inplace = True)
print('ser_beta_level_perc_factor - cross-sectional mean min:', ser_beta_factor_mean.min())
print('ser_beta_level_perc_factor - cross-sectional mean mean:', ser_beta_factor_mean.mean())
print('ser_beta_level_perc_factor - cross-sectional mean max:', ser_beta_factor_mean.max())
print('ser_beta_level_perc_factor - cross-sectional mean stdev:', ser_beta_factor_mean.std())
print('ser_beta_level_perc_factor - cross-sectional mean mean:', ser_beta_factor_mean.count())

ser_beta_level_perc_factor - AR 29-Dec-2006: -0.03799388234480591
ser_beta_level_perc_factor - US 29-Dec-2006: -0.058970323931999996
ser_beta_level_perc_factor - cross-sectional mean min: -0.13326304502959702
ser_beta_level_perc_factor - cross-sectional mean mean: -0.0120127690149135
ser_beta_level_perc_factor - cross-sectional mean max: 0.12980528767580887
ser_beta_level_perc_factor - cross-sectional mean stdev: 0.08421943453343116
ser_beta_level_perc_factor - cross-sectional mean mean: 227


In [49]:
ser_beta_signs_level = ser_beta_signs

In [None]:
### LOOPER FOR BETA MOMENTUM PERCENTILE FACTOR
def get_returns_from_index(ser_index, ma_wnd, day_period):
    ### Importing standard modules and date-special modules:
    import numpy as np
    import pandas as pd     
    ### Moving average:
    dict_index_ma = {}
    for iter_country in ser_index.index.get_level_values(0).unique():
        ser_index_ma_iter = ser_index[iter_country].rolling(window = ma_wnd, win_type = None).mean()
        dict_index_ma[iter_country] = ser_index_ma_iter
    ser_index_ma = pd.concat(dict_index_ma)
    ### Monthly returns:
    dict_period_ret = {}
    for iter_country in ser_index_ma.index.get_level_values(0).unique():
        ser_period_ret_iter = (ser_index_ma[iter_country] / ser_index_ma[iter_country].shift(day_period) - 1)
        dict_period_ret[iter_country] = ser_period_ret_iter
    ser_period_ret = pd.concat(dict_period_ret)    
    
    return ser_period_ret


path_market_risk_source_hdf = 'Data_Files/Source_Files/market_risk_source.h5'
index_ret_USD_key = 'index_ret_USD_key'
monthly_ret_key = 'monthly_ret_key'
gri_momentum_perc_key = 'gri_momentum_perc_key'
ser_index_ret_USD = pd.read_hdf(path_market_risk_source_hdf, index_ret_USD_key)
ser_monthly_ret_USD = get_returns_from_index(ser_index_ret_USD, ma_wnd = 5, day_period = 21)
ser_monthly_ret_USD.to_hdf(path_market_risk_source_hdf, monthly_ret_key, mode = 'a', format = 'table')
ser_perc_gri = pd.read_hdf(path_market_risk_source_hdf, gri_momentum_perc_key)
### For testing purposes:
#ser_perc_gri = round(ser_perc_gri, 2)
market_membership_key = 'market_membership_key'
ser_market_membership = pd.read_hdf(path_market_risk_source_hdf, market_membership_key)
date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()
#date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()[155 : 156]
arr_beta_factor = []
iter_counter = 0
tumbler_to_minus = 0.60
tumbler_to_plus = 0.40 
for iter_date in date_range_test:        
    ### Sign defining:
    ser_beta_signs = pd.Series(np.NaN, index = ser_perc_gri[ : iter_date].index)
    ser_beta_signs.iloc[0] = 1
    for signs_date in ser_beta_signs.index:
        if ser_beta_signs.index.get_loc(signs_date) > 0:
            if (ser_beta_signs.loc[signs_date - pd.offsets.BusinessDay()] == 1):
                if (ser_perc_gri[signs_date] > tumbler_to_minus):
                    ser_beta_signs.loc[signs_date] = -1
                else:
                    ser_beta_signs.loc[signs_date] = 1
            else:
                if (ser_perc_gri[signs_date] < tumbler_to_plus):
                    ser_beta_signs.loc[signs_date] = 1
                else:
                    ser_beta_signs.loc[signs_date] = -1             
    
    arr_beta_factor.append(get_beta_factor(iter_date) * ser_beta_signs[iter_date])
    
    iter_counter = iter_counter + 1    
    if ((iter_counter // 10) == (iter_counter / 10)):
        print('Progress printout', iter_counter, '/', iter_date)
        
ser_beta_factor = pd.concat(arr_beta_factor)

In [51]:
### BETA LEVEL PERCENTILE FACTOR TESTING:
print('ser_beta_momentum_perc_factor - AR 29-Dec-2006:', ser_beta_factor.loc['AR' , '2006-12-29'])
print('ser_beta_momentum_perc_factor - US 29-Dec-2006:', ser_beta_factor.loc['US' , '2006-12-29'])
ser_beta_factor_mean = pd.Series(np.NaN, index = ser_beta_factor.index.get_level_values(1).unique())
for iter_date in ser_beta_factor_mean.index:
    ser_beta_factor_mean[iter_date] = ser_beta_factor.loc[:, iter_date].mean()
ser_beta_factor_mean.sort_index(inplace = True)
print('ser_beta_momentum_perc_factor - cross-sectional mean min:', ser_beta_factor_mean.min())
print('ser_beta_momentum_perc_factor - cross-sectional mean mean:', ser_beta_factor_mean.mean())
print('ser_beta_momentum_perc_factor - cross-sectional mean max:', ser_beta_factor_mean.max())
print('ser_beta_momentum_perc_factor - cross-sectional mean stdev:', ser_beta_factor_mean.std())
print('ser_beta_momentum_perc_factor - cross-sectional mean mean:', ser_beta_factor_mean.count())

ser_beta_momentum_perc_factor - AR 29-Dec-2006: -0.03799388234480591
ser_beta_momentum_perc_factor - US 29-Dec-2006: -0.058970323931999996
ser_beta_momentum_perc_factor - cross-sectional mean min: -0.13326304502959702
ser_beta_momentum_perc_factor - cross-sectional mean mean: -0.020065067085754482
ser_beta_momentum_perc_factor - cross-sectional mean max: 0.12980528767580887
ser_beta_momentum_perc_factor - cross-sectional mean stdev: 0.0826648046856053
ser_beta_momentum_perc_factor - cross-sectional mean mean: 227


In [52]:
ser_beta_signs_momentum = ser_beta_signs

In [None]:
### PERCENTILE FACTORS TESTING

In [53]:
ser_beta_signs_level = ser_beta_signs_level[ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()]
ser_beta_signs_momentum = ser_beta_signs_momentum[ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()]

In [54]:
with pd.ExcelWriter('Data_Files/Test_Files/sign_testing_runner.xlsx') as sign_writer:
    ser_beta_signs_level.to_excel(sign_writer, sheet_name = 'Level - Runner')
    ser_beta_signs_momentum.to_excel(sign_writer, sheet_name = 'Momentum - Runner')

In [None]:
ser_gri_level_perc = pd.read_hdf(path_market_risk_source_hdf, gri_level_perc_key) ### ADDED FOR BETA FACTORS CALCULATION
ser_gri_momentum_perc = pd.read_hdf(path_market_risk_source_hdf, gri_momentum_perc_key) ### ADDED FOR BETA FACTORS CALCULATION
date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()

In [12]:
date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()
with pd.ExcelWriter('Data_Files/Test_Files/perc_testing_runner.xlsx') as sign_writer:
    ser_gri_level_perc[date_range_test].to_excel(sign_writer, sheet_name = 'Level - Runner')
    ser_gri_momentum_perc[date_range_test].to_excel(sign_writer, sheet_name = 'Momentum - Runner')
