In [1]:
#### THIS NOTEBOOK IS PREPARING FACTOR DATA VECTORS FOR MARKET RISK THEME (GLOBAL COUNTRY MODEL)

In [2]:
#### STANDART MODULES INITIALISING
### Importing standard modules and date-special modules:
import numpy as np
import pandas as pd
import math
from datetime import date

In [3]:
#### EXTRACTING DATA FROM MATLAB-STYLED XLSX FILE

In [4]:
### Constants declaring:
path_msci_data = 'Data_Files/Source_Files/sample_data.xlsx'
tab_monthly = 'monthly_data'
tab_daily = 'daily_returns'
tab_ivol = 'ivol_data'
tab_map = 'country_map'

In [5]:
### Extracting universe data:
df_universe = pd.read_excel(io = path_msci_data, sheet_name = tab_monthly, skiprows = [0, 2], header = 0)
df_universe = df_universe.loc[:, ['dates', 'region', 'ctry']]
df_universe.columns = ['Date', 'Market', 'Code']
df_universe.set_index(['Code', 'Date'], inplace = True)
ser_universe = df_universe.squeeze()
ser_universe.sort_index(level = [0, 1], inplace = True)
ser_universe.replace({50 : 'DM', 57 : 'EM', 504 : 'FM'}, inplace = True)

In [6]:
### Filtering market universe:
arr_markets_needed = ['DM', 'FM', 'EM']
ser_market_membership = ser_universe[ser_universe.isin(arr_markets_needed)]
index_market = ser_market_membership.index

In [7]:
### Extracting returns data:
df_returns = pd.read_excel(io = path_msci_data, sheet_name = tab_daily, skiprows = [0, 2], header = 0)
df_returns = df_returns.loc[:, ['dates', 'ctry', 'retusd', 'retloc']]
df_returns.columns = ['Date', 'Code', 'Ret USD', 'Ret LOC']
df_returns.set_index(['Code', 'Date'], inplace = True)
df_returns.sort_index(level = [0, 1], inplace = True)
ser_realized_ret_USD = df_returns['Ret USD']
ser_realized_ret_LOC = df_returns['Ret LOC']
### Appending returns on ethalon date vector:
date_first = date(1992, 1, 1)
date_last = date(2018, 12, 31)
index_dates = pd.date_range(date_first, date_last, freq = 'B')
ser_ret_index_USD = pd.Series(np.NaN, index = ser_realized_ret_USD.index)
for iter_country in ser_realized_ret_USD.index.get_level_values(0).unique():
    ser_ret_index_USD[iter_country] = (1 + ser_realized_ret_USD[iter_country]).cumprod()
    ser_ret_index_USD[iter_country].fillna(method = 'ffill', inplace = True)
    ser_ret_index_USD[iter_country].iloc[0] = 1
    ser_ret_index_USD[iter_country].reindex(index_dates, method = 'ffill')
    ser_realized_ret_USD[iter_country] = (ser_ret_index_USD[iter_country] / ser_ret_index_USD[iter_country].shift(1) - 1)
ser_ret_index_LOC = pd.Series(np.NaN, index = ser_realized_ret_LOC.index)
for iter_country in ser_realized_ret_LOC.index.get_level_values(0).unique():
    ser_ret_index_LOC[iter_country] = (1 + ser_realized_ret_LOC[iter_country]).cumprod()
    ser_ret_index_LOC[iter_country].fillna(method = 'ffill', inplace = True)
    ser_ret_index_LOC[iter_country].iloc[0] = 1   
    ser_ret_index_LOC[iter_country].reindex(index_dates, method = 'ffill')
    ser_realized_ret_LOC[iter_country] = (ser_ret_index_LOC[iter_country] / ser_ret_index_LOC[iter_country].shift(1) - 1)   

In [8]:
### Extracting ivol data:
df_ivol = pd.read_excel(io = path_msci_data, sheet_name = tab_ivol, skiprows = [0, 2], header = 0)
df_ivol = df_ivol.loc[:, ['dates', 'ctry', 'ivol3m', 'vrp3m']]
df_ivol.columns = ['Date', 'Code', 'IVol 3m', 'VRP 3m']
df_ivol.set_index(['Code', 'Date'], inplace = True)
df_ivol.sort_index(level = [0, 1], inplace = True)
ser_ivol3m = df_ivol['IVol 3m']
ser_vrp3m = df_ivol['VRP 3m']
### Appending returns on ethalon date vector:
date_first = date(1992, 1, 1)
date_last = date(2018, 12, 31)
index_dates = pd.date_range(date_first, date_last, freq = 'B')
for iter_country in ser_ivol3m.index.get_level_values(0).unique():
    ser_ivol3m[iter_country].reindex(index_dates, method = 'ffill')    
#    ser_ivol3m[iter_country] = ser_ivol3m[iter_country] - ser_ivol3m[iter_country].shift(1)
for iter_country in ser_vrp3m.index.get_level_values(0).unique():    
    ser_vrp3m[iter_country].reindex(index_dates, method = 'ffill')    

In [9]:
### EXTRACTING MRI INDEX
path_mri_index_hdf = 'Data_Files/Source_Files/mri_released_index.h5'
object_released_mri_hdf = 'released_MRI_data'
ser_mri_released = pd.read_hdf(path_mri_index_hdf, object_released_mri_hdf)

In [10]:
#### DEFINING EXPONENTIAL WEIGHTS GENERATOR
def get_exp_weights(window_years = 5, halflife_months = 3):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    import math     
    ### Defining constants:
    num_year_work_days = 260
    num_year_months = 12
    ### Array of regressioon window day numbers descending:
#    arr_weight_days = np.arange(num_year_work_days * window_years + 1, 0, -1)
    arr_weight_days = np.arange(num_year_work_days * window_years, 0, -1) - 1
    ### Creating weights series:
    num_period_factor = math.exp(math.log(0.5) / round((num_year_work_days / num_year_months * halflife_months)))
    arr_weights = np.exp(math.log(num_period_factor) * arr_weight_days)
    ser_weights = pd.Series(arr_weights)        
    ser_weights.name = 'Weight'
    
    return ser_weights

In [11]:
#### DEFINING WEIGHTS TO SERIES BINDER
def bind_exp_weights(ser_returns, weighting_kind = 'equal', window_years = 5, halflife_months = 3, ser_condition = pd.Series(np.NaN)):
    ### Need to replace ser_returns with ser_returns.index !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!    
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    ### Creating weights series:
    if (weighting_kind == 'equal'):
        ser_weights = pd.Series(1, index = ser_returns.index)
    if (weighting_kind == 'expo'):       
        ser_weights = get_exp_weights(window_years, halflife_months)[- ser_returns.size : ]
#        ser_weights = get_exp_weights(window_years, halflife_months)[- ser_returns.count() : ]        
        ser_weights.index = ser_returns.index
    if (weighting_kind == 'expo_cond'):
        ### Need to move conditonal logic outside of the loop. leave only sorting inside of the function !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
        ser_condition = abs(ser_condition - ser_condition.iloc[-1])
        ser_condition = ser_condition.sort_values(ascending = False)
#        ser_condition = ser_condition.sort_values(ascending = False, kind = 'mergesort')        
        ser_weights = get_exp_weights(window_years, halflife_months)[- ser_returns.size : ]
#        ser_weights = get_exp_weights(window_years, halflife_months)[- ser_returns.count() : ]            
        ser_weights = pd.Series(ser_weights.values, ser_condition.index)
        ser_weights.sort_index(inplace = True)
        ser_weights.name = 'Weight'
        
    return ser_weights

In [60]:
#### DEFINING EXPONENTIAL VOLATILITY CALCULATOR
def get_expvol_value(ser_returns, ser_weights):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    ### Defining constants:
    num_year_work_days = 260
    num_year_months = 12
    ### Exponential volatility calculating:
    expvol_result = np.NaN
    ser_returns = ser_returns.dropna()
#    if (ser_returns.count() > num_year_work_days // 2):
        ### Need to have minimum data count parameter!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   
#        ser_weights = bind_exp_weights(ser_returns, weighting_kind, window_years, halflife_months, ser_condition)
    index_rolling = ser_returns.index.intersection(ser_weights.index)           
    ### Exponential volatility calculating:
    expvol_y = ser_returns[index_rolling]
    expvol_w = ser_weights[index_rolling]             
    expvol_w = expvol_w / expvol_w.sum()
    expvol_result = np.sqrt(expvol_w.dot(expvol_y * expvol_y)) * np.sqrt(num_year_work_days)
        
    return expvol_result  

In [17]:
#### DEFINING EXPONENTIAL VOLATILITY SERIES BUILDER
def get_expvol_series(ser_market_membership, ser_returns, weighting_kind = 'equal', window_years = 5, halflife_months = 3, ser_condition = pd.Series(np.NaN)):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    ### Defining constants:
    num_year_work_days = 260
    num_year_months = 12 
    ### Flattening MSCI changes by logarythm
    ### Need to move flattening logic outside of the function !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    ser_returns = np.log(1 + ser_returns)
    ser_condition.fillna(method = 'ffill', inplace = True)
    ser_window_weights = get_exp_weights(window_years, halflife_months)
    ### Main loop performing:
    ser_expvol = pd.Series(np.NaN, index = ser_market_membership.index)
    for iter_country in ser_market_membership.index.get_level_values(0).unique():        
        ### Extracting returns data vector for each country/date point:
        if (iter_country in ser_returns.index.get_level_values(0).unique()):
            for iter_date in ser_market_membership[iter_country].index.get_level_values(0).unique():
                index_iter_full = pd.date_range(end = iter_date, periods = num_year_work_days * window_years, freq = 'B')
                ser_iter_returns = ser_returns[iter_country][index_iter_full]
                ser_iter_returns = ser_iter_returns - ser_iter_returns.mean() 
                ser_iter_returns.dropna(inplace = True)
                index_iter_ret = ser_iter_returns.index                              
                if (ser_iter_returns.count() > num_year_work_days // 2):
                    if (weighting_kind == 'equal'):
                        ser_iter_weights = pd.Series(1, index = index_iter_ret)                    
                    if (weighting_kind == 'expo'):                  
                        ser_iter_weights = pd.Series(ser_window_weights.values, index = index_iter_full)
                        ser_iter_weights = ser_iter_weights[index_iter_ret]                    
                    if (weighting_kind == 'expo_cond'):
                        ser_iter_weights = pd.Series(ser_window_weights.values, index = index_iter_full)
                        ser_iter_weights = ser_iter_weights[index_iter_ret]                   
                        ser_iter_condition = ser_condition[index_iter_ret]
                        ser_iter_condition = abs(ser_iter_condition - ser_iter_condition.iloc[-1])
                        ser_iter_condition = ser_iter_condition.sort_values(ascending = False)
                        ser_iter_weights = pd.Series(ser_iter_weights.values, ser_iter_condition.index)
                        ser_iter_weights.sort_index(inplace = True)                     
                    ### Need to have minimum data count parameter !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
                    ### Exponential volatility calculating:
                    expvol_y = ser_iter_returns
                    expvol_w = ser_iter_weights            
                    expvol_w = expvol_w / expvol_w.sum()
                    expvol_result = np.sqrt(expvol_w.dot(expvol_y * expvol_y)) * np.sqrt(num_year_work_days)                    
                    ser_expvol.loc[iter_country, iter_date] = expvol_result
                    
    return ser_expvol

In [None]:
ser_returns = ser_realized_ret_LOC.copy()
ser_returns = np.log(1 + ser_returns)
ser_condition = ser_mri_released.copy()
iter_country = 'BE'
iter_date = pd.Timestamp('2000-10-31')
num_year_work_days = 260
window_years = 5
halflife_months = 3
ser_iter_returns = ser_realized_ret_LOC[iter_country].loc[iter_date - pd.offsets.BusinessDay(num_year_work_days * window_years - 1) : iter_date]
ser_iter_returns = ser_iter_returns - ser_iter_returns.mean() #### Need to transfer
ser_iter_condition = ser_condition[ser_iter_returns.index]
ser_iter_condition = abs(ser_iter_condition - ser_iter_condition.iloc[-1])
ser_iter_condition = ser_iter_condition.sort_values(ascending = False)
ser_iter_weights = get_exp_weights(window_years, halflife_months)[- ser_iter_returns.size : ]
ser_iter_weights = pd.Series(ser_iter_weights.values, ser_iter_condition.index)
ser_iter_weights.sort_index(inplace = True)
ser_iter_weights.name = 'Weight'
ser_iter_returns.dropna(inplace = True)

ser_iter_returns = ser_iter_returns.dropna()
index_rolling = ser_iter_returns.index.intersection(ser_iter_weights.index)           
expvol_y = ser_iter_returns[index_rolling]
expvol_w = ser_iter_weights[index_rolling]             
expvol_w = expvol_w / expvol_w.sum()
expvol_result = np.sqrt(expvol_w.dot(expvol_y * expvol_y)) * np.sqrt(num_year_work_days)
print('Test result:', expvol_result)
print('Old result: 0.21947494831168715')

In [12]:
### EVENT RISK FACTOR STANDALONE CALCULATION
ser_expvol1m = get_expvol_series(ser_market_membership, ser_realized_ret_LOC, weighting_kind = 'expo', window_years = 5, halflife_months = 1)

In [13]:
### EVENT RISK FACTOR TESTING:
print('ser_expvol1m - AR 29-Dec-2006:', ser_expvol1m.loc['AR' , '2006-12-29'])
print('ser_expvol1m - US 29-Dec-2006:', ser_expvol1m.loc['US' , '2006-12-29'])
ser_expvol1m_mean = pd.Series(np.NaN, index = ser_expvol1m.index.get_level_values(1).unique())
for iter_date in ser_expvol1m_mean.index:  
    ser_expvol1m_mean[iter_date] = ser_expvol1m.loc[:, iter_date].mean()
ser_expvol1m_mean.sort_index(inplace = True)
print('ser_expvol1m - cross-sectional mean min:', ser_expvol1m_mean.min())
print('ser_expvol1m - cross-sectional mean mean:', ser_expvol1m_mean.mean())
print('ser_expvol1m - cross-sectional mean max:', ser_expvol1m_mean.max())
print('ser_expvol1m - cross-sectional mean stdev:', ser_expvol1m_mean.std())
print('ser_expvol1m - cross-sectional mean mean:', ser_expvol1m_mean.count())

ser_expvol1m - AR 29-Dec-2006: 0.20481093607134887
ser_expvol1m - US 29-Dec-2006: 0.07665226549320425
ser_expvol1m - cross-sectional mean min: 0.11338560985788693
ser_expvol1m - cross-sectional mean mean: 0.20651407109697165
ser_expvol1m - cross-sectional mean max: 0.6481642751441703
ser_expvol1m - cross-sectional mean stdev: 0.07026043107191231
ser_expvol1m - cross-sectional mean mean: 234


In [14]:
### LOWVOL FACTOR STANDALONE CALCULATION
ser_expvol24m = get_expvol_series(ser_market_membership, ser_realized_ret_LOC, weighting_kind = 'expo', window_years = 5, halflife_months = 24)
ser_lowvol_base = 1 / (ser_expvol24m * ser_expvol24m)
ser_lowvol_base = ser_lowvol_base.swaplevel()
ser_lowvol_base.sort_index(inplace = True)
ser_lowvol = pd.Series(np.NaN, index = ser_lowvol_base.index)
for iter_date in ser_lowvol.index.get_level_values(0).unique():  
    ser_lowvol[iter_date] = (ser_lowvol_base[iter_date] / ser_lowvol_base[iter_date].sum())
ser_lowvol = ser_lowvol.swaplevel()
ser_lowvol.sort_index(inplace = True)

In [15]:
### LOWVOL FACTOR TESTING:
print('ser_lowvol - AR 29-Dec-2006:', ser_lowvol.loc['AR' , '2006-12-29'])
print('ser_lowvol - US 29-Dec-2006:', ser_lowvol.loc['US' , '2006-12-29'])
ser_lowvol_mean = pd.Series(np.NaN, index = ser_lowvol.index.get_level_values(1).unique())
for iter_date in ser_lowvol_mean.index:  
    ser_lowvol_mean[iter_date] = ser_lowvol.loc[:, iter_date].mean()
ser_lowvol_mean.sort_index(inplace = True)
print('ser_lowvol - cross-sectional mean min:', ser_lowvol_mean.min())
print('ser_lowvol - cross-sectional mean mean:', ser_lowvol_mean.mean())
print('ser_lowvol - cross-sectional mean max:', ser_lowvol_mean.max())
print('ser_lowvol - cross-sectional mean stdev:', ser_lowvol_mean.std())
print('ser_lowvol - cross-sectional mean mean:', ser_lowvol_mean.count())

ser_lowvol - AR 29-Dec-2006: 0.006992748913125383
ser_lowvol - US 29-Dec-2006: 0.034252552068587024
ser_lowvol - cross-sectional mean min: 0.02040816326530611
ser_lowvol - cross-sectional mean mean: 0.02253254854186519
ser_lowvol - cross-sectional mean max: 0.04545454545454546
ser_lowvol - cross-sectional mean stdev: 0.006446261259307378
ser_lowvol - cross-sectional mean mean: 234


In [18]:
### VOLATILITY SURPRISE FACTOR STANDALONE CALCULATION
ser_expvol1m_cond = get_expvol_series(ser_market_membership, ser_realized_ret_LOC, weighting_kind = 'expo_cond', window_years = 5, halflife_months = 1,
                                      ser_condition = ser_mri_released)
ser_expvol1m_surp = -np.log(ser_expvol1m / ser_expvol1m_cond)

In [19]:
### VOLATILITY SURPRISE FACTOR TESTING:
print('ser_expvol1m_cond - AR 29-Dec-2006:', ser_expvol1m_cond.loc['AR' , '2006-12-29'])
print('ser_expvol1m_cond - US 29-Dec-2006:', ser_expvol1m_cond.loc['US' , '2006-12-29'])
ser_expvol1m_cond_mean = pd.Series(np.NaN, index = ser_expvol1m_cond.index.get_level_values(1).unique())
for iter_date in ser_expvol1m_cond_mean.index:  
    ser_expvol1m_cond_mean[iter_date] = ser_expvol1m_cond.loc[:, iter_date].mean()
ser_expvol1m_cond_mean.sort_index(inplace = True)
print('ser_expvol1m_cond - cross-sectional mean min:', ser_expvol1m_cond_mean.min())
print('ser_expvol1m_cond - cross-sectional mean mean:', ser_expvol1m_cond_mean.mean())
print('ser_expvol1m_cond - cross-sectional mean max:', ser_expvol1m_cond_mean.max())
print('ser_expvol1m_cond - cross-sectional mean stdev:', ser_expvol1m_cond_mean.std())
print('ser_expvol1m_cond - cross-sectional mean mean:', ser_expvol1m_cond_mean.count())
print('ser_expvol1m_surp - AR 29-Dec-2006:', ser_expvol1m_surp.loc['AR' , '2006-12-29'])
print('ser_expvol1m_surp - US 29-Dec-2006:', ser_expvol1m_surp.loc['US' , '2006-12-29'])
ser_expvol1m_surp_mean = pd.Series(np.NaN, index = ser_expvol1m_surp.index.get_level_values(1).unique())
for iter_date in ser_expvol1m_surp_mean.index:  
    ser_expvol1m_surp_mean[iter_date] = ser_expvol1m_surp.loc[:, iter_date].mean()
ser_expvol1m_mean.sort_index(inplace = True)
print('ser_expvol1m_surp - cross-sectional mean min:', ser_expvol1m_surp_mean.min())
print('ser_expvol1m_surp - cross-sectional mean mean:', ser_expvol1m_surp_mean.mean())
print('ser_expvol1m_surp - cross-sectional mean max:', ser_expvol1m_surp_mean.max())
print('ser_expvol1m_surp - cross-sectional mean stdev:', ser_expvol1m_surp_mean.std())
print('ser_expvol1m_surp - cross-sectional mean mean:', ser_expvol1m_surp_mean.count())

ser_expvol1m_cond - AR 29-Dec-2006: 0.20111505918084296
ser_expvol1m_cond - US 29-Dec-2006: 0.08030428355472215
ser_expvol1m_cond - cross-sectional mean min: 0.1235471422932931
ser_expvol1m_cond - cross-sectional mean mean: 0.20781125053599298
ser_expvol1m_cond - cross-sectional mean max: 0.6554988121197827
ser_expvol1m_cond - cross-sectional mean stdev: 0.07721835002621177
ser_expvol1m_cond - cross-sectional mean mean: 234
ser_expvol1m_surp - AR 29-Dec-2006: -0.01821011250865118
ser_expvol1m_surp - US 29-Dec-2006: 0.04654380270481559
ser_expvol1m_surp - cross-sectional mean min: -0.4467730689545792
ser_expvol1m_surp - cross-sectional mean mean: 0.0060044612564703
ser_expvol1m_surp - cross-sectional mean max: 0.2573344798530834
ser_expvol1m_surp - cross-sectional mean stdev: 0.10663886264647311
ser_expvol1m_surp - cross-sectional mean mean: 234


In [28]:
ser_expvol1m_cond[:, '2000-10-31']

Code
AT    0.184245
AU    0.165522
BE    0.218800
CA    0.373403
CH    0.198943
DE    0.303769
DK    0.199148
ES    0.359159
FI    0.639754
FR    0.279475
GB    0.217396
HK    0.331460
IE    0.237979
IT    0.299907
JP    0.193879
NL    0.264758
NO    0.267469
NZ    0.240489
PT    0.228004
SE    0.365983
SG    0.328698
US    0.229374
dtype: float64

In [20]:
#### DEFINING SKEWNESS CALCULATOR
def get_skewness_value(ser_returns):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    import scipy.stats as sc    
    ### Defining constants:
    num_year_work_days = 260
    ### Skewness calculating:
    skewness_result = np.NaN
    ser_returns = ser_returns.dropna()
    if (ser_returns.count() > num_year_work_days // 2):
        ### Need to have minimum data count parameter!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!   
        skewness_result = sc.skew(ser_returns, bias = False)
        
    return skewness_result  

In [21]:
#### DEFINING SKEWNESS SERIES BUILDER
def get_skewness_series(ser_market_membership, ser_returns, window_years = 2):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    ### Defining constants:
    num_year_work_days = 260
    num_year_months = 12 
    ### Main loop performing:
    ser_skewness = pd.Series(np.NaN, index = ser_market_membership.index)
    for iter_country in ser_market_membership.index.get_level_values(0).unique():        
        ### Extracting returns data vector for each country/date point:
        if (iter_country in ser_returns.index.get_level_values(0).unique()):
            for iter_date in ser_market_membership[iter_country].index.get_level_values(0).unique():
                ser_iter_returns = ser_returns[iter_country].loc[iter_date - pd.offsets.BusinessDay(num_year_work_days * window_years - 1) : iter_date].dropna()     
                ### Change loc to iloc !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
                if (ser_iter_returns.count() > num_year_work_days // 2):
                    ### Need to have minimum data count parameter !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
                    skewness_result = get_skewness_value(ser_iter_returns)
                    ser_skewness.loc[iter_country, iter_date] = skewness_result

    return ser_skewness

In [22]:
### TAIL RISK FACTOR STANDALONE CALCULATION
ser_tailrisk = get_skewness_series(ser_market_membership, ser_realized_ret_LOC, window_years = 2)

In [23]:
### TAIL RISK FACTOR TESTING:
print('ser_tailrisk - AR 29-Dec-2006:', ser_tailrisk.loc['AR' , '2006-12-29'])
print('ser_tailrisk - US 29-Dec-2006:', ser_tailrisk.loc['US' , '2006-12-29'])
ser_tailrisk_mean = pd.Series(np.NaN, index = ser_tailrisk.index.get_level_values(1).unique())
for iter_date in ser_tailrisk_mean.index:  
    ser_tailrisk_mean[iter_date] = ser_tailrisk.loc[:, iter_date].mean()
ser_tailrisk_mean.sort_index(inplace = True)
print('ser_tailrisk - cross-sectional mean min:', ser_tailrisk_mean.min())
print('ser_tailrisk - cross-sectional mean mean:', ser_tailrisk_mean.mean())
print('ser_tailrisk - cross-sectional mean max:', ser_tailrisk_mean.max())
print('ser_tailrisk - cross-sectional mean stdev:', ser_tailrisk_mean.std())
print('ser_tailrisk - cross-sectional mean mean:', ser_tailrisk_mean.count())

ser_tailrisk - AR 29-Dec-2006: 0.03928688949001212
ser_tailrisk - US 29-Dec-2006: 0.08036479847853283
ser_tailrisk - cross-sectional mean min: -0.49624621808319275
ser_tailrisk - cross-sectional mean mean: -0.08681784121651422
ser_tailrisk - cross-sectional mean max: 0.3488438785320589
ser_tailrisk - cross-sectional mean stdev: 0.1673956680922374
ser_tailrisk - cross-sectional mean mean: 234


In [24]:
#### VRP FACTOR SERIES BUILDER
def get_market_series(ser_market_membership, ser_returns, window_years = 5):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    ### Defining constants:
    num_year_work_days = 260
    num_year_months = 12 
    ### Main loop performing:
    ser_market = pd.Series(np.NaN, index = ser_market_membership.index)
    for iter_country in ser_market_membership.index.get_level_values(0).unique():        
        ### Extracting returns data vector for each country/date point:
        if (iter_country in ser_returns.index.get_level_values(0).unique()):
            for iter_date in ser_market_membership[iter_country].index.get_level_values(0).unique():
                ser_iter_returns = ser_returns[iter_country].loc[iter_date - pd.offsets.BusinessDay(num_year_work_days * window_years - 1) : iter_date].dropna()     
                ### Change loc to iloc !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
                if (ser_iter_returns.count() > num_year_work_days // 4):
                    ### Need to have minimum data count parameter !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
                    ser_market.loc[iter_country, iter_date] = ser_returns.loc[iter_country, iter_date]

    return ser_market

In [25]:
### VRP FACTOR STANDALONE CALCULATION
#ser_vrp_factor = ser_vrp3m.reindex(ser_market_membership.index)
ser_vrp_factor = get_market_series(ser_market_membership, ser_vrp3m, window_years = 5)

In [26]:
### VRP FACTOR TESTING:
print('ser_vrp_factor - AR 29-Dec-2006:', ser_vrp_factor.loc['AR' , '2006-12-29'])
print('ser_vrp_factor - US 29-Dec-2006:', ser_vrp_factor.loc['US' , '2006-12-29'])
ser_vrp_factor_mean = pd.Series(np.NaN, index = ser_vrp_factor.index.get_level_values(1).unique())
for iter_date in ser_vrp_factor_mean.index:  
    ser_vrp_factor_mean[iter_date] = ser_vrp_factor.loc[:, iter_date].mean()
ser_vrp_factor_mean.sort_index(inplace = True)
print('ser_vrp_factor - cross-sectional mean min:', ser_vrp_factor_mean.min())
print('ser_vrp_factor - cross-sectional mean mean:', ser_vrp_factor_mean.mean())
print('ser_vrp_factor - cross-sectional mean max:', ser_vrp_factor_mean.max())
print('ser_vrp_factor - cross-sectional mean stdev:', ser_vrp_factor_mean.std())
print('ser_vrp_factor - cross-sectional mean mean:', ser_vrp_factor_mean.count())

ser_vrp_factor - AR 29-Dec-2006: 0.00163
ser_vrp_factor - US 29-Dec-2006: -0.004396
ser_vrp_factor - cross-sectional mean min: -0.023387673469387766
ser_vrp_factor - cross-sectional mean mean: 0.002256157878726162
ser_vrp_factor - cross-sectional mean max: 0.036017938775510204
ser_vrp_factor - cross-sectional mean stdev: 0.00794579999880406
ser_vrp_factor - cross-sectional mean mean: 237


In [27]:
#### DEFINING WEIGHTED AVERAGE CALCULATOR
def get_average_value(ser_returns, ser_weights):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    ### Defining constants:
    num_year_work_days = 260
    num_year_months = 12
    ### Rolling average calculating:
    average_result = np.NaN  
    ser_returns = ser_returns.dropna()
    if (ser_returns.count() > num_year_work_days // 4):
#        ser_weights = bind_exp_weights(ser_returns, weighting_kind, window_years, halflife_months, ser_condition)       
        index_rolling = ser_returns.index.intersection(ser_weights.index)           
        ### Exponential volatility calculating:
        average_x = ser_returns[index_rolling]
        average_w = ser_weights[index_rolling]                    
        average_result = average_x.dot(average_w) / sum(average_w)        
        
    return average_result  

In [23]:
#### DEFINING WEIGHTED AVERAGE SERIES BUILDER
def get_average_series(ser_market_membership, ser_returns, weighting_kind = 'equal', window_years = 5, halflife_months = 3, ser_condition = pd.Series(np.NaN)):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    ### Defining constants:
    num_year_work_days = 260
    num_year_months = 12 
    ser_condition.fillna(method = 'ffill', inplace = True)
    ser_window_weights = get_exp_weights(window_years, halflife_months)
    ### Main loop performing:
    ser_average = pd.Series(np.NaN, index = ser_market_membership.index)
    for iter_country in ser_market_membership.index.get_level_values(0).unique():        
        ### Extracting returns data vector for each country/date point:
        if (iter_country in ser_returns.index.get_level_values(0).unique()):
            for iter_date in ser_market_membership[iter_country].index.get_level_values(0).unique():
                index_iter_full = pd.date_range(end = iter_date, periods = num_year_work_days * window_years, freq = 'B')
                ser_iter_returns = ser_returns[iter_country][index_iter_full]
                ser_iter_returns = ser_iter_returns - ser_iter_returns.shift(1)        
                ser_iter_returns.dropna(inplace = True)                
                ser_iter_returns = ser_iter_returns[ser_iter_returns != 0]                
                index_iter_ret = ser_iter_returns.index                              
                if (ser_iter_returns.count() > num_year_work_days // 4):
                    if (weighting_kind == 'equal'):
                        ser_iter_weights = pd.Series(1, index = index_iter_ret)                    
                    if (weighting_kind == 'expo'):                  
                        ser_iter_weights = pd.Series(ser_window_weights.values, index = index_iter_full)
                        ser_iter_weights = ser_iter_weights[index_iter_ret]                    
                    if (weighting_kind == 'expo_cond'):
                        ser_iter_weights = pd.Series(ser_window_weights.values, index = index_iter_full)
                        ser_iter_weights = ser_iter_weights[index_iter_ret]                   
                        ser_iter_condition = ser_condition[index_iter_ret]
                        ser_iter_condition = abs(ser_iter_condition - ser_iter_condition.iloc[-1])
                        ser_iter_condition = ser_iter_condition.sort_values(ascending = False)
                        ser_iter_weights = pd.Series(ser_iter_weights.values, ser_iter_condition.index)
                        ser_iter_weights.sort_index(inplace = True)                     
                    ### Need to have minimum data count parameter !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
                    ### Exponential average calculating:
                    average_x = ser_iter_returns
                    average_w = ser_iter_weights                    
                    average_result = average_x.dot(average_w) / sum(average_w)                  
                    ser_average.loc[iter_country, iter_date] = average_result
                    
    return ser_average

In [24]:
### IMPLIED VOLATILITY MOMENTUM FACTOR STANDALONE CALCULATION
ser_ivolmom_1m = get_average_series(ser_market_membership, ser_ivol3m, weighting_kind = 'expo', window_years = 5, halflife_months = 1)

In [25]:
### IMPLIED VOLATILITY MOMENTUM FACTOR TESTING:
print('ser_ivolmom_1m - AR 29-Dec-2006:', ser_ivolmom_1m.loc['AR' , '2006-12-29'])
print('ser_ivolmom_1m - US 29-Dec-2006:', ser_ivolmom_1m.loc['US' , '2006-12-29'])
ser_ivolmom_1m_mean = pd.Series(np.NaN, index = ser_ivolmom_1m.index.get_level_values(1).unique())
for iter_date in ser_ivolmom_1m_mean.index:  
    ser_ivolmom_1m_mean[iter_date] = ser_ivolmom_1m.loc[:, iter_date].mean()
ser_ivolmom_1m_mean.sort_index(inplace = True)
print('ser_ivolmom_1m - cross-sectional mean min:', ser_ivolmom_1m_mean.min())
print('ser_ivolmom_1m - cross-sectional mean mean:', ser_ivolmom_1m_mean.mean())
print('ser_ivolmom_1m - cross-sectional mean max:', ser_ivolmom_1m_mean.max())
print('ser_ivolmom_1m - cross-sectional mean stdev:', ser_ivolmom_1m_mean.std())
print('ser_ivolmom_1m - cross-sectional mean mean:', ser_ivolmom_1m_mean.count())

ser_ivolmom_1m - AR 29-Dec-2006: -5.728833933486413e-05
ser_ivolmom_1m - US 29-Dec-2006: -0.0001643642124898838
ser_ivolmom_1m - cross-sectional mean min: -0.0007719988881806511
ser_ivolmom_1m - cross-sectional mean mean: 6.00820444231159e-05
ser_ivolmom_1m - cross-sectional mean max: 0.0011461061607235408
ser_ivolmom_1m - cross-sectional mean stdev: 0.0002536959457065397
ser_ivolmom_1m - cross-sectional mean mean: 237


In [26]:
### IMPLIED VOLATILITY MOMENTUM FACTOR STANDALONE CALCULATION
ser_ivolmom_12m = get_average_series(ser_market_membership, ser_ivol3m, weighting_kind = 'expo', window_years = 5, halflife_months = 12)

In [27]:
### IMPLIED VOLATILITY MOMENTUM FACTOR TESTING:
print('ser_ivolmom_12m - AR 29-Dec-2006:', ser_ivolmom_12m.loc['AR' , '2006-12-29'])
print('ser_ivolmom_12m - US 29-Dec-2006:', ser_ivolmom_12m.loc['US' , '2006-12-29'])
ser_ivolmom_12m_mean = pd.Series(np.NaN, index = ser_ivolmom_12m.index.get_level_values(1).unique())
for iter_date in ser_ivolmom_12m_mean.index:  
    ser_ivolmom_12m_mean[iter_date] = ser_ivolmom_12m.loc[:, iter_date].mean()
ser_ivolmom_12m_mean.sort_index(inplace = True)
print('ser_ivolmom_12m - cross-sectional mean min:', ser_ivolmom_12m_mean.min())
print('ser_ivolmom_12m - cross-sectional mean mean:', ser_ivolmom_12m_mean.mean())
print('ser_ivolmom_12m - cross-sectional mean max:', ser_ivolmom_12m_mean.max())
print('ser_ivolmom_12m - cross-sectional mean stdev:', ser_ivolmom_12m_mean.std())
print('ser_ivolmom_12m - cross-sectional mean mean:', ser_ivolmom_12m_mean.count())

ser_ivolmom_12m - AR 29-Dec-2006: 2.4071504646865075e-05
ser_ivolmom_12m - US 29-Dec-2006: -1.281313465985619e-05
ser_ivolmom_12m - cross-sectional mean min: -8.265832098596306e-05
ser_ivolmom_12m - cross-sectional mean mean: 5.48849915627575e-06
ser_ivolmom_12m - cross-sectional mean max: 9.861384692768506e-05
ser_ivolmom_12m - cross-sectional mean stdev: 2.4997621734106227e-05
ser_ivolmom_12m - cross-sectional mean mean: 237


In [83]:
### DEFINING MULTI-STEP STANDARTIZATION FUNCTION
def iter_standartize(ser_to_manage, arr_truncates = [2.5, 2.0], reuse_outliers = False, center_result = True):
    ### Importing standard modules:
    import numpy as np
    import pandas as pd     
    ### Arrays of iterations properties:
    arr_mean = []
    arr_std = []
    ### Workhorse and resulting data vectors initialising:
    ser_data_full = ser_to_manage.copy()
    ser_data_full = ser_data_full.dropna()
    ser_data_iter = ser_data_full.copy() 
    ser_data_full.replace(ser_data_full.values, 0, inplace = True)    
    ### Looping by boundaries array:
    for num_bound_iter in arr_truncates:
        ### Properties calculating and saving:
        num_mean_iter = ser_data_iter.mean()
        num_std_iter = ser_data_iter.std()
        arr_mean.append(num_mean_iter)
        arr_std.append(num_std_iter)
        ser_data_iter = (ser_data_iter - num_mean_iter) / num_std_iter       
        ### Standartizing:
        ser_data_iter[ser_data_iter.abs() >= num_bound_iter] = np.sign(ser_data_iter) * num_bound_iter 
        if not (reuse_outliers):
            ### Saving to result and excluding from further calculations truncated values:     
            ser_data_full.where(ser_data_iter.abs() < num_bound_iter, np.sign(ser_data_iter) * num_bound_iter, inplace = True)
            ser_data_iter = ser_data_iter[ser_data_iter.abs() < num_bound_iter]           
    ### Aggregating result:
    if (reuse_outliers):
        ser_data_full = ser_data_iter
    else:     
        ser_data_full[ser_data_iter.index] = ser_data_iter
    ### Centering result:
    if (center_result):      
        ser_result = ser_data_full - ser_data_full.mean()
    else:
        ser_result = ser_data_full    
            
    return [ser_result, arr_mean, arr_std]

In [94]:
### FACTOR SCORING GENERATOR
def get_scored_factors(dict_factors, ser_market_membership, score_grouping = 'within', 
                       score_boundaries = [2.5, 2.0], score_reuse_outliers = False, score_center_result = True):
    ### Importing standard modules:
    import numpy as np
    import pandas as pd   
 
    ### Defining loop variants:
    dict_result_raw = {}
    raw_factor_suffix = '_raw'
    dict_result_scored = {}    
    ### Defining constants:
    num_year_work_days = 260    
    num_year_months = 12
    ### Looping factors:
    for iter_factor in dict_factors:
        ### Reforming and naming series for future performing:
        ser_factor = dict_factors[iter_factor].swaplevel().sort_index(level = [0, 1])
        ser_factor.name = iter_factor      
        dict_result_raw[iter_factor + raw_factor_suffix] = ser_factor        
        ### Scoring factor:
        ### Defining constants for standatize procedure:        
        arr_ser_scored = []
        arr_dates = []                
        ### Scoring for no grouping:            
        if (score_grouping == 'full'):
            for iter_date in ser_factor.index.get_level_values(0).unique():
                ser_iter_factor = ser_factor.loc[iter_date].dropna()
                if (ser_iter_factor.count() > 0):
                    ser_iter_score = iter_standartize(ser_iter_factor, score_boundaries, score_reuse_outliers, score_center_result)[0]
                    arr_ser_scored.append(ser_iter_score)
                    arr_dates.append(iter_date)
            ser_factor_scored = pd.concat(arr_ser_scored, axis = 0, keys = arr_dates).sort_index(level = [0, 1])
        ### Scoring for markets grouping:                            
        if (score_grouping == 'within'):               
            df_to_score = pd.concat([ser_factor, ser_market_membership.swaplevel().sort_index(level = [0, 1])], axis = 1, join = 'inner')
            df_to_score.index.names = ['Date', 'Code']
            df_to_score.set_index('Market', append = True, inplace = True)
            df_to_score.sort_index(level = [0, 1, 2], inplace = True)                
            arr_ser_scored = []
            for iter_date in df_to_score.index.get_level_values(0).unique():
                for iter_market in df_to_score.loc[iter_date, :, :].index.get_level_values(2).unique():
                    df_to_score_iter = df_to_score.loc[iter_date, :, iter_market]
                    ser_iter_factor = df_to_score_iter[iter_factor].dropna()
                    if (ser_iter_factor.count() > 0):
                        ser_iter_score = iter_standartize(ser_iter_factor, score_boundaries, score_reuse_outliers, score_center_result)[0]
                        ser_iter_score.reset_index('Market', drop = True, inplace = True)
                        arr_ser_scored.append(ser_iter_score)
            ser_factor_scored = pd.concat(arr_ser_scored, axis = 0).sort_index(level = [0, 1])
        ### Aggregating factors to dictionary:    
        ser_factor_scored.index.names = ['Date', 'Code']    
        dict_result_scored[iter_factor] = ser_factor_scored
        print(iter_factor, 'prepared')
    ### Collecting factor tables to dictionary:
    df_factors_raw = pd.concat(dict_result_raw, axis = 1, join = 'outer') 
    df_factors_scored = pd.concat(dict_result_scored, axis = 1, join = 'outer')    
    
    return [df_factors_raw, df_factors_scored]

In [95]:
dict_factors = {'short_term_event_risk': -ser_expvol1m,
                'low_vol_anomaly': ser_lowvol,
                'vol_surprise_event_risk': ser_expvol1m_surp,
                'tail_risk': -ser_tailrisk,
                'ivol_momentum_1m': ser_ivolmom_1m,
                'ivol_momentum_12m': ser_ivolmom_12m,                
                'vrp': ser_vrp_factor}
[df_factors_raw, df_factors_scored] = get_scored_factors(dict_factors, ser_market_membership, score_grouping = 'within', score_boundaries = [2.5, 2.0])

short_term_event_risk prepared
low_vol_anomaly prepared
vol_surprise_event_risk prepared
tail_risk prepared
ivol_momentum_1m prepared
ivol_momentum_12m prepared




vrp prepared


In [97]:
df_factors_raw.loc['2006-12-29', 'AR']

ivol_momentum_12m_raw         -0.000003
ivol_momentum_1m_raw          -0.000058
low_vol_anomaly_raw            0.006993
short_term_event_risk_raw     -0.204811
tail_risk_raw                 -0.039287
vol_surprise_event_risk_raw   -0.018210
vrp_raw                        0.001600
Name: (2006-12-29 00:00:00, AR), dtype: float64

In [10]:
### OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD 
def market_risk_factors(dict_factors, ser_market_membership, score_all = True, 
                        score_grouping = 'within', score_boundaries = [2.5, 2.0], score_reuse_outliers = False, score_center_result = True):
    ### Importing standard modules:
    import numpy as np
    import pandas as pd   
    import scipy.stats as sc
    ### Defining loop variants:
    dict_ser_factor = {}
    ### Defining constants:
    num_year_work_days = 260    
    num_year_months = 12    
    index_market = ser_market_membership.index
    ### Looping factors:
    for iter_factor in dict_factors:
        arr_ser_source = dict_factors[iter_factor].copy()
        if (iter_factor == 'eventrisk'):
            ser_factor = (-1) * get_expvol_series(index_market, arr_ser_source[0], arr_ser_source[1], arr_ser_source[2], arr_ser_source[3])
        if (iter_factor == 'lowvol'):
            ser_factor = (-1) * get_expvol_series(index_market, arr_ser_source[0], arr_ser_source[1], arr_ser_source[2], arr_ser_source[3])              
        if (iter_factor == 'volsurprise'):
            ser_factor = get_expvol_series(index_market, arr_ser_source[0], arr_ser_source[1], arr_ser_source[2], arr_ser_source[3])
            ser_factor = ser_factor.divide(get_expvol_series(index_market, arr_ser_source[0], arr_ser_source[4], arr_ser_source[5], arr_ser_source[6], arr_ser_source[7]))
            ser_factor = (-1) * np.log(ser_factor)
        if (iter_factor == 'tailrisk'):
            ser_source = pd.Series(np.NaN, index = arr_ser_source[0].index)
            for iter_country in arr_ser_source[0].index.get_level_values(0).unique():
                ser_source[iter_country] = arr_ser_source[0][iter_country] - arr_ser_source[0][iter_country].shift(1)            
            ser_factor = pd.Series(np.NaN, index = index_market)
            for iter_country in index_market.get_level_values(0).unique():
                if (iter_country in ser_source.index.get_level_values(0).unique()):
                    for iter_date in index_market.get_level_values(1).unique():
                        ser_iter_source = ser_source.loc[iter_country, iter_date - pd.offsets.BusinessDay(num_year_work_days * arr_ser_source[1] - 1) : iter_date].dropna()
                        if (len(ser_iter_source) > 0):
                            ser_factor.loc[iter_country, iter_date] = sc.skew(ser_iter_source) 
#        if (iter_factor == 'vrp'):
#            ser_factor = arr_ser_source[0]                    
        if (iter_factor == 'ivolmom1m'):
            ser_factor = get_average_series(index_market, arr_ser_source[0], arr_ser_source[1], arr_ser_source[2], arr_ser_source[3])
        if (iter_factor == 'ivolmom12m'):
            ser_factor = get_average_series(index_market, arr_ser_source[0], arr_ser_source[1], arr_ser_source[2], arr_ser_source[3])
        ### Reforming and Naming series for future performing:
        ser_factor = ser_factor.swaplevel().sort_index(level = [0, 1])
        ser_factor.name = iter_factor      
        ### Scoring factor:
        if (score_all):
        ### Defining constants for standatize procedure:        
            arr_ser_scored = []
            arr_dates = []                
            ### Scoring for no grouping:            
            if (score_grouping == 'full'):
                for iter_date in ser_factor.index.get_level_values(0).unique():
                    ser_iter_factor = ser_factor.loc[iter_date].dropna()
                    if (ser_iter_factor.count() > 0):
                        ser_iter_score = iter_standartize(ser_iter_factor, score_boundaries, score_reuse_outliers, score_center_result)[0]
                        arr_ser_scored.append(ser_iter_score)
                        arr_dates.append(iter_date)
                ser_factor = pd.concat(arr_ser_scored, axis = 0, keys = arr_dates).sort_index(level = [0, 1])
            ### Scoring for markets grouping:                            
            if (score_grouping == 'within'):               
                df_to_score = pd.concat([ser_factor, ser_market_membership.swaplevel().sort_index(level = [0, 1])], axis = 1, join = 'inner')
                df_to_score.index.names = ['Date', 'Code']
                df_to_score.set_index('Market', append = True, inplace = True)
                df_to_score.sort_index(level = [0, 1, 2], inplace = True)                
                arr_ser_scored = []
                for iter_date in df_to_score.index.get_level_values(0).unique():
                    for iter_market in df_to_score.loc[iter_date, :, :].index.get_level_values(2).unique():
                        df_to_score_iter = df_to_score.loc[iter_date, :, iter_market]
                        ser_iter_factor = df_to_score_iter[iter_factor].dropna()
                        if (ser_iter_factor.count() > 0):
                            ser_iter_score = iter_standartize(ser_iter_factor, score_boundaries, score_reuse_outliers, score_center_result)[0]
                            ser_iter_score.reset_index('Market', drop = True, inplace = True)
                            arr_ser_scored.append(ser_iter_score)
                ser_factor = pd.concat(arr_ser_scored, axis = 0).sort_index(level = [0, 1])
        ### Aggregating factors to dictionary:    
        ser_factor.index.names = ['Date', 'Code']    
        dict_ser_factor[iter_factor] = ser_factor.copy()
        print(iter_factor, 'prepared')
    ### Collecting factor tables to dictinary:
    df_factors = pd.concat(list(dict_ser_factor.values()), axis = 1, join = 'outer')   
    
    return df_factors

In [12]:
### OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD OLD 
dict_factors = {'eventrisk': [ser_realized_ret_LOC, 'expo', 5, 1], 
                'lowvol': [ser_realized_ret_LOC, 'expo', 5, 24], 
                'volsurprise': [ser_realized_ret_LOC, 'expo', 5, 1, 'expo_cond', 5, 1, ser_mri_released], 
                'tailrisk': [ser_realized_ret_LOC, 2], 
#                'VRP': [ser_fake_vrp], 
                'ivolmom1m': [ser_realized_ret_LOC, 'expo', 5, 1], 
                'ivolmom12m': [ser_realized_ret_LOC, 'expo', 5, 12]}


eventrisk prepared
lowvol prepared
volsurprise prepared
tailrisk prepared
ivolmom1m prepared
ivolmom12m prepared


In [None]:
#### DEFINING WEIGHTED AVERAGE SERIES BUILDER
def get_average_series_old(ser_market_membership, ser_returns, weighting_kind = 'equal', window_years = 5, halflife_months = 3, ser_condition = pd.Series(np.NaN)):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    ### Defining constants:
    num_year_work_days = 260
    num_year_months = 12 
    ### Initialising delta series:
    ser_condition.fillna(method = 'ffill', inplace = True)        
    ### Main loop performing:
    ser_average = pd.Series(np.NaN, index = ser_market_membership.index)
    for iter_country in ser_market_membership.index.get_level_values(0).unique():        
        ### Extracting returns data vector for each country/date point:
        if (iter_country in ser_returns.index.get_level_values(0).unique()):
            for iter_date in ser_market_membership[iter_country].index.get_level_values(0).unique():
                ser_iter_returns = ser_returns[iter_country].loc[iter_date - pd.offsets.BusinessDay(num_year_work_days * window_years - 1) : iter_date]
                if (ser_iter_returns.size > 0):
                    if (ser_condition.count() > 0):
                        ser_iter_condition = ser_condition[ser_iter_returns.index]
                    else:
                        ser_iter_condition = pd.Series(np.NaN)                      
                    ser_iter_weights = bind_exp_weights(ser_iter_returns, weighting_kind, window_years, halflife_months, ser_iter_condition)                 
                ser_iter_returns = ser_iter_returns - ser_iter_returns.shift(1)        
                ser_iter_returns = ser_iter_returns.dropna()[ser_iter_returns != 0]
                ### Need to shift by iloc!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
                if (ser_iter_returns.count() > num_year_work_days // 4):
                ### Need to have minimum data count parameter!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
                    average_result = get_average_value(ser_iter_returns, ser_iter_weights)
                    ser_average.loc[iter_country, iter_date] = average_result
#                if ((iter_country == 'EG') & (iter_date == pd.Timestamp('2011-10-31'))):
#                    print(iter_country, '/', iter_date, '/', average_result)
#                    print(ser_iter_returns.size)                    
#                    print(ser_iter_returns.count())
#                    print(ser_iter_returns.sum())                    
#                    print(ser_iter_returns.min())
#                    print(ser_iter_returns.max())
#                    print(ser_iter_returns.mean())
#                    print(ser_iter_returns.head())
#                    print(ser_iter_returns.tail())                     
#                    break
    ser_average.sort_index(level = [0, 1], inplace = True)
    
    return ser_average

In [None]:
#### DEFINING EXPONENTIAL VOLATILITY SERIES BUILDER
def get_expvol_series_old(ser_market_membership, ser_returns, weighting_kind = 'equal', window_years = 5, halflife_months = 3, ser_condition = pd.Series(np.NaN)):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    ### Defining constants:
    num_year_work_days = 260
    num_year_months = 12 
    ### Flattening MSCI changes by logarythm
    ### Need to move flattening logic outside of the function !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    ser_returns = np.log(1 + ser_returns)
    ser_condition.fillna(method = 'ffill', inplace = True)
    ### Main loop performing:
    ser_expvol = pd.Series(np.NaN, index = ser_market_membership.index)
    for iter_country in ser_market_membership.index.get_level_values(0).unique():        
        ### Extracting returns data vector for each country/date point:
        if (iter_country in ser_returns.index.get_level_values(0).unique()):
            for iter_date in ser_market_membership[iter_country].index.get_level_values(0).unique():
                ser_iter_returns = ser_returns[iter_country].loc[iter_date - pd.offsets.BusinessDay(num_year_work_days * window_years - 1) : iter_date]
                ser_iter_returns = ser_iter_returns - ser_iter_returns.mean()
                if (ser_iter_returns.size > 0):
                    if (ser_condition.count() > 0):
                        ser_iter_condition = ser_condition[ser_iter_returns.index]
                    else:
                        ser_iter_condition = pd.Series(np.NaN)                     
                    ser_iter_weights = bind_exp_weights(ser_iter_returns, weighting_kind, window_years, halflife_months, ser_iter_condition)                           
                ser_iter_returns.dropna(inplace = True)     
                ### Change loc to iloc !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
                
                if (ser_iter_returns.count() > num_year_work_days // 2):
                    ### Need to have minimum data count parameter !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
                    expvol_result = get_expvol_value(ser_iter_returns, ser_iter_weights)
                    ser_expvol.loc[iter_country, iter_date] = expvol_result
                    
    return ser_expvol