In [1]:
### EXTRACTING UNIVERSE DATA FROM GENERAL MS EXCEL SOURCE
def get_market_membership_from_excel():
    ### Importing standard modules and date-special modules:
    import numpy as np
    import pandas as pd
    ### Defining constants:
    path_msci_data = 'Data_Files/Source_Files/sample_data_gaps.xlsx'
    tab_monthly = 'monthly_data'    
    arr_markets_needed = ['DM', 'FM', 'EM']   
    dict_markets = {50 : 'DM', 57 : 'EM', 504 : 'FM'}
    ### Extracting universe data:
    df_universe = pd.read_excel(io = path_msci_data, sheet_name = tab_monthly, skiprows = [0, 2], header = 0,
                                na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                             '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null'])
    df_universe = df_universe.loc[:, ['dates', 'region', 'ctry']]
    df_universe.columns = ['Date', 'Market', 'Code']
    df_universe.set_index(['Code', 'Date'], inplace = True)
    ser_universe = df_universe.squeeze()
    ser_universe.sort_index(level = [0, 1], inplace = True)
    ser_universe.replace({50 : 'DM', 57 : 'EM', 504 : 'FM'}, inplace = True)
    ser_market_membership = ser_universe[ser_universe.isin(arr_markets_needed)]
    
    return ser_market_membership

### EXTRACTING RETURNS DATA FROM GENERAL MS EXCEL SOURCE
def get_universe_returns_from_excel():
    ### Importing standard modules and date-special modules:
    import numpy as np
    import pandas as pd    
    from datetime import date
    ### Constants declaring:
    path_msci_data = 'Data_Files/Source_Files/sample_data_gaps.xlsx'
    tab_daily = 'daily_returns'
    date_first = date(1992, 1, 1)
    date_last = date(2018, 12, 31)
    index_dates = pd.date_range(date_first, date_last, freq = 'B')    
    ### Extracting returns data:
    df_returns = pd.read_excel(io = path_msci_data, sheet_name = tab_daily, skiprows = [0, 2], header = 0,
                               na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                            '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null'])
    df_returns = df_returns.loc[:, ['dates', 'ctry', 'retusd', 'retloc']]
    df_returns.columns = ['Date', 'Code', 'Ret_USD', 'Ret_LOC']
    df_returns.set_index(['Code', 'Date'], inplace = True)
    df_returns.sort_index(level = [0, 1], inplace = True)
    ser_realized_ret_USD = df_returns['Ret_USD'].copy()
    ser_realized_ret_LOC = df_returns['Ret_LOC'].copy()
    ### Appending returns on ethalon date vector:
    date_first = date(1992, 1, 1)
    date_last = date(2018, 12, 31)
    index_dates = pd.date_range(date_first, date_last, freq = 'B')
    ### Reindexation and forward filling procedure for USD returns:
    dict_realized_ret_USD = {}
    ser_ret_index_USD = pd.Series(np.NaN, index = ser_realized_ret_USD.index)
    for iter_country in ser_realized_ret_USD.index.get_level_values(0).unique():
        ser_ret_index_USD[iter_country] = (1 + ser_realized_ret_USD[iter_country]).cumprod()
        ser_ret_index_USD[iter_country].iloc[0] = 1
        ser_ret_index_USD_iter = ser_ret_index_USD[iter_country].reindex(index_dates, method = 'ffill')
        ser_ret_index_USD_iter.fillna(method = 'ffill', inplace = True)    
        ser_realized_ret_USD_iter = (ser_ret_index_USD_iter / ser_ret_index_USD_iter.shift(1) - 1)
        dict_realized_ret_USD[iter_country] = ser_realized_ret_USD_iter
    ser_realized_ret_USD = pd.concat(dict_realized_ret_USD)  
    ser_realized_ret_USD.index.names = ['Code', 'Date']
    ser_realized_ret_USD.sort_index(level = [0, 1], inplace = True)
    ### Reindexation and forward filling procedure for LOC returns:
    dict_realized_ret_LOC = {}
    ser_ret_index_LOC = pd.Series(np.NaN, index = ser_realized_ret_LOC.index)
    for iter_country in ser_realized_ret_LOC.index.get_level_values(0).unique():
        ser_ret_index_LOC[iter_country] = (1 + ser_realized_ret_LOC[iter_country]).cumprod()
        ser_ret_index_LOC[iter_country].iloc[0] = 1   
        ser_ret_index_LOC_iter = ser_ret_index_LOC[iter_country].reindex(index_dates, method = 'ffill')
        ser_ret_index_LOC_iter.fillna(method = 'ffill', inplace = True)
        ser_realized_ret_LOC_iter = (ser_ret_index_LOC_iter / ser_ret_index_LOC_iter.shift(1) - 1)   
        dict_realized_ret_LOC[iter_country] = ser_realized_ret_LOC_iter
    ser_realized_ret_LOC = pd.concat(dict_realized_ret_LOC)    
    ser_realized_ret_LOC.index.names = ['Code', 'Date']
    ser_realized_ret_LOC.sort_index(level = [0, 1], inplace = True)
    
    return [ser_realized_ret_USD, ser_realized_ret_LOC]

### EXTRACTING IMPLIED VOLATILITY DATA AND VRP FACTOR DATA FROM GENERAL MS EXCEL SOURCE
def get_universe_ivol_from_excel():
    ### Importing standard modules and date-special modules:
    import numpy as np
    import pandas as pd    
    from datetime import date    
    ### Constants declaring:
    path_msci_data = 'Data_Files/Source_Files/sample_data_gaps.xlsx'
    tab_ivol = 'ivol_data'
    date_first = date(1992, 1, 1)
    date_last = date(2018, 12, 31)
    index_dates = pd.date_range(date_first, date_last, freq = 'B')    
    ### Extracting ivol data:
    df_ivol = pd.read_excel(io = path_msci_data, sheet_name = tab_ivol, skiprows = [0, 2], header = 0,
                            na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                         '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null'])
    df_ivol = df_ivol.loc[:, ['dates', 'ctry', 'ivol3m', 'vrp3m']]
    df_ivol.columns = ['Date', 'Code', 'IVol_3m', 'VRP_3m']
    df_ivol.set_index(['Code', 'Date'], inplace = True)
    df_ivol.sort_index(level = [0, 1], inplace = True)
    ser_ivol3m = df_ivol['IVol_3m']
    ser_vrp3m = df_ivol['VRP_3m']    
    ### Appending returns on ethalon date vector:
    date_first = date(1992, 1, 1)
    date_last = date(2018, 12, 31)
    index_dates = pd.date_range(date_first, date_last, freq = 'B')
    dict_ivol3m = {}
    for iter_country in ser_ivol3m.index.get_level_values(0).unique():  
        dict_ivol3m[iter_country] = ser_ivol3m[iter_country].reindex(index_dates, method = 'ffill')
    ser_ivol3m = pd.concat(dict_ivol3m)    
    ser_ivol3m.index.names = ['Code', 'Date']
    ser_ivol3m.sort_index(level = [0, 1], inplace = True)
    dict_vrp3m = {}
    for iter_country in ser_vrp3m.index.get_level_values(0).unique():    
        dict_vrp3m[iter_country] = ser_vrp3m[iter_country].reindex(index_dates, method = 'ffill')    
    ser_vrp3m = pd.concat(dict_vrp3m)    
    ser_vrp3m.index.names = ['Code', 'Date']
    ser_vrp3m.sort_index(level = [0, 1], inplace = True)
    
    return [ser_ivol3m, ser_vrp3m]

### EXTRACTING MRI INDEX FROM HDF5 SOURCE
def get_universe_gri_from_hdf():
    ### Importing standard modules and date-special modules:
    import numpy as np
    import pandas as pd    
    ### Constants declaring:
    path_gri_index_hdf = 'Data_Files/Source_Files/mri_released_index.h5'
    object_released_gri_hdf = 'released_MRI_data'
    ### Extracting MRI:
    ser_gri_released = pd.read_hdf(path_gri_index_hdf, object_released_gri_hdf)
        
    return ser_gri_released

### SOURCE DATA EXTRACTING FROM MS EXCEL FILES AND SAVING TO HDF FILES
import pandas as pd
import numpy as np
### Extracting data from xlsx files
ser_market_membership = get_market_membership_from_excel()
[ser_realized_ret_USD, ser_realized_ret_LOC] = get_universe_returns_from_excel()
[ser_ivol3m, ser_vrp3m] = get_universe_ivol_from_excel()
ser_gri_released = get_universe_gri_from_hdf()
ser_gri_released.name = 'GRI'
### Declaring constants:
path_market_risk_source_hdf = 'Data_Files/Source_Files/market_risk_source.h5'
market_membership_key = 'market_membership_key'
realized_ret_USD_key = 'realized_ret_USD_key'
realized_ret_LOC_key = 'realized_ret_LOC_key'
ivol3m_key = 'ivol3m_key'
vrp3m_key = 'vrp3m_key'
gri_released_key = 'gri_released_key'
### Saving data to hdf5 table formatted files:
import tables
tables.file._open_files.close_all()
ser_market_membership.to_hdf(path_market_risk_source_hdf, market_membership_key, mode = 'w', format = 'table')
ser_realized_ret_USD.to_hdf(path_market_risk_source_hdf, realized_ret_USD_key, mode = 'a', format = 'table')
ser_realized_ret_LOC.to_hdf(path_market_risk_source_hdf, realized_ret_LOC_key, mode = 'a', format = 'table')
ser_ivol3m.to_hdf(path_market_risk_source_hdf, ivol3m_key, mode = 'a', format = 'table')
ser_vrp3m.to_hdf(path_market_risk_source_hdf, vrp3m_key, mode = 'a', format = 'table')
ser_gri_released.to_hdf(path_market_risk_source_hdf, gri_released_key, mode = 'a', format = 'table')

In [2]:
### EXTRACTING UNIVERSE DATA FOR A PARTICULAR DATE
def get_date_membership(iter_date):
    ### Defining constants:    
    path_market_risk_source_hdf = 'Data_Files/Source_Files/market_risk_source.h5'
    market_membership_key = 'market_membership_key'  
    ### Preparing data for universe filtering:
    if (pd.to_datetime(iter_date) == pd.to_datetime(iter_date - pd.offsets.BusinessMonthEnd(0))):
        iter_month_end = iter_date
    else: 
        iter_month_end = pd.to_datetime(iter_date - pd.offsets.BusinessMonthEnd(1))    
    ser_iter_membership = pd.read_hdf(path_market_risk_source_hdf, market_membership_key, where = 'Date = iter_month_end')
    ser_iter_membership.rename(index = {iter_month_end : iter_date}, inplace = True)
    
    return ser_iter_membership

In [3]:
### DEFINING EXPONENTIAL WEIGHTS GENERATOR
def get_exp_weights(window_years = 5, halflife_months = 3):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    import math     
    ### Defining constants:
    num_year_work_days = 260
    num_year_months = 12
    ### Array of regressioon window day numbers descending:
    arr_weight_days = np.arange(num_year_work_days * window_years, 0, -1) - 1
    ### Creating weights series:
    num_period_factor = math.exp(math.log(0.5) / round((num_year_work_days / num_year_months * halflife_months)))
    arr_weights = np.exp(math.log(num_period_factor) * arr_weight_days)
    ser_weights = pd.Series(arr_weights)        
    ser_weights.name = 'Weight'
    
    return ser_weights

In [4]:
### DEFINING WEIGHTS TO SERIES BINDER
def bind_exp_weights(ser_returns, weighting_kind = 'equal', window_years = 5, halflife_months = 3, ser_condition = pd.Series(np.NaN)):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    ### Creating weights series:
    if (weighting_kind == 'equal'):
        ser_weights = pd.Series(1, index = ser_returns.index)
    if (weighting_kind == 'expo'):       
        ser_weights = get_exp_weights(window_years, halflife_months)[- ser_returns.size : ]
        ser_weights.index = ser_returns.index
    if (weighting_kind == 'expo_cond'):
        ser_condition = abs(ser_condition - ser_condition.iloc[-1])
        ser_condition = ser_condition.sort_values(ascending = False)
        ser_weights = get_exp_weights(window_years, halflife_months)[- ser_returns.size : ]
        ser_weights = pd.Series(ser_weights.values, ser_condition.index)
        ser_weights.sort_index(inplace = True)
        ser_weights.name = 'Weight'
        
    return ser_weights

In [5]:
### DEFINING EXPONENTIAL VOLATILITY CALCULATOR
def get_expvol_value(ser_returns, ser_weights):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    ### Exponential volatility calculating:
    expvol_result = np.NaN
    ser_returns = ser_returns.dropna()
    index_rolling = ser_returns.index.intersection(ser_weights.index)           
    ### Exponential volatility calculating:
    expvol_y = ser_returns[index_rolling]
    expvol_w = ser_weights[index_rolling]             
    expvol_w = expvol_w / expvol_w.sum()
    expvol_result = np.sqrt(expvol_w.dot(expvol_y * expvol_y))
        
    return expvol_result

In [6]:
#### DEFINING EXPONENTIAL VOLATILITY SERIES BUILDER
def get_expvol_series(ser_market_membership, ser_returns, weighting_kind = 'equal', window_years = 5, halflife_months = 3, ser_condition = pd.Series(np.NaN)):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    ### Defining constants:
    num_year_work_days = 260
    ### Flattening MSCI changes by logarythm
    ser_returns = np.log(1 + ser_returns)
    ser_condition.fillna(method = 'ffill', inplace = True)
    ### Main loop performing:
    ser_expvol = pd.Series(np.NaN, index = ser_market_membership.index)
    for iter_country in ser_market_membership.index.get_level_values(0).unique():        
        ### Extracting returns data vector for each country/date point:
        if (iter_country in ser_returns.index.get_level_values(0).unique()):
            for iter_date in ser_market_membership[iter_country].index.get_level_values(0).unique():
                ser_iter_returns = ser_returns[iter_country].loc[iter_date - pd.offsets.BusinessDay(num_year_work_days * window_years - 1) : iter_date]
                ser_iter_returns = ser_iter_returns - ser_iter_returns.mean()
                if (ser_iter_returns.size > 0):
                    if (ser_condition.count() > 0):
                        ser_iter_condition = ser_condition[ser_iter_returns.index]
                    else:
                        ser_iter_condition = pd.Series(np.NaN)                     
                    ser_iter_weights = bind_exp_weights(ser_iter_returns, weighting_kind, window_years, halflife_months, ser_iter_condition) ## CHANGES: ADDED       
                ser_iter_returns.dropna(inplace = True)     
                if (ser_iter_returns.count() > num_year_work_days // 2):
                    expvol_result = get_expvol_value(ser_iter_returns, ser_iter_weights) * np.sqrt(num_year_work_days)
                    ser_expvol.loc[iter_country, iter_date] = expvol_result
                    
    return ser_expvol

In [7]:
### DEFINING SKEWNESS CALCULATOR
def get_skewness_value(ser_returns):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    import scipy.stats as sc    
    ### Defining constants:
    num_year_work_days = 260
    ### Skewness calculating:
    skewness_result = np.NaN
    ser_returns = ser_returns.dropna()
    if (ser_returns.count() > num_year_work_days // 2):
        skewness_result = sc.skew(ser_returns, bias = False)
        
    return skewness_result  

In [8]:
### DEFINING SKEWNESS SERIES BUILDER
def get_skewness_series(ser_market_membership, ser_returns, window_years = 2):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    ### Defining constants:
    num_year_work_days = 260
    ### Main loop performing:
    ser_skewness = pd.Series(np.NaN, index = ser_market_membership.index)
    for iter_country in ser_market_membership.index.get_level_values(0).unique():        
        ### Extracting returns data vector for each country/date point:
        if (iter_country in ser_returns.index.get_level_values(0).unique()):
            for iter_date in ser_market_membership[iter_country].index.get_level_values(0).unique():
                ser_iter_returns = ser_returns[iter_country].loc[iter_date - pd.offsets.BusinessDay(num_year_work_days * window_years - 1) : iter_date].dropna()     
                if (ser_iter_returns.count() > num_year_work_days // 2):
                    skewness_result = get_skewness_value(ser_iter_returns)
                    ser_skewness.loc[iter_country, iter_date] = skewness_result

    return ser_skewness

In [9]:
### DEFINING READY TO GO FACTOR SERIES BUILDER
def get_market_series(ser_market_membership, ser_returns, window_years = 5):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    ### Defining constants:
    num_year_work_days = 260
    ### Main loop performing:
    ser_market = pd.Series(np.NaN, index = ser_market_membership.index)
    for iter_country in ser_market_membership.index.get_level_values(0).unique():        
        ### Extracting returns data vector for each country/date point:
        if (iter_country in ser_returns.index.get_level_values(0).unique()):
            for iter_date in ser_market_membership[iter_country].index.get_level_values(0).unique():
                ser_iter_returns = ser_returns[iter_country].loc[iter_date - pd.offsets.BusinessDay(num_year_work_days * window_years - 1) : iter_date].dropna()     
                if (ser_iter_returns.count() > num_year_work_days // 4):
                    ser_market.loc[iter_country, iter_date] = ser_returns.loc[iter_country, iter_date]

    return ser_market

In [35]:
### DEFINING WEIGHTED AVERAGE CALCULATOR
def get_average_value(ser_returns, ser_weights):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    ### Rolling average calculating:
    average_result = np.NaN  
    ser_returns = ser_returns.dropna()
    index_rolling = ser_returns.index.intersection(ser_weights.index)           
    ### Exponential volatility calculating:
    average_x = ser_returns[index_rolling]
    average_w = ser_weights[index_rolling]                    
    average_result = average_x.dot(average_w) / sum(average_w)        
        
    return average_result  

In [36]:
### DEFINING WEIGHTED AVERAGE SERIES BUILDER
def get_average_series(ser_market_membership, ser_returns, weighting_kind = 'equal', window_years = 5, halflife_months = 3, ser_condition = pd.Series(np.NaN)):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    ### Defining constants:
    num_year_work_days = 260
    ### Initialising delta series:
    ser_condition.fillna(method = 'ffill', inplace = True)        
    ### Main loop performing:
    ser_average = pd.Series(np.NaN, index = ser_market_membership.index)
    for iter_country in ser_market_membership.index.get_level_values(0).unique():        
        ### Extracting returns data vector for each country/date point:
        if (iter_country in ser_returns.index.get_level_values(0).unique()):
            for iter_date in ser_market_membership[iter_country].index.get_level_values(0).unique():
                ser_iter_returns = ser_returns[iter_country].loc[iter_date - pd.offsets.BusinessDay(num_year_work_days * window_years - 1) : iter_date]
                if (ser_iter_returns.size > 0):
                    if (ser_condition.count() > 0):
                        ser_iter_condition = ser_condition[ser_iter_returns.index]
                    else:
                        ser_iter_condition = pd.Series(np.NaN)                      
                    ser_iter_weights = bind_exp_weights(ser_iter_returns, weighting_kind, window_years, halflife_months, ser_iter_condition)                 
                ser_iter_returns = ser_iter_returns - ser_iter_returns.shift(1)        
                ser_iter_returns = ser_iter_returns.dropna()[ser_iter_returns != 0]
                if (ser_iter_returns.count() > num_year_work_days // 4):
                    average_result = get_average_value(ser_iter_returns, ser_iter_weights)
                    ser_average.loc[iter_country, iter_date] = average_result
    ser_average.sort_index(level = [0, 1], inplace = True)
    
    return ser_average

In [12]:
### GET SHORT TERM EVENT RISK FACTOR
def get_short_term_event_risk_factor(iter_date):
    ### Importing standard modules and date-special modules (common for all factors):
    import numpy as np
    import pandas as pd    
    ### Constants declaring (common for all factors):  
    num_year_work_days = 260
    window_years = 5
    path_market_risk_source_hdf = 'Data_Files/Source_Files/market_risk_source.h5'
    realized_ret_LOC_key = 'realized_ret_LOC_key'    
    ### Data preparing (common for all factors):
    index_iter_date = pd.date_range(end = iter_date, periods = num_year_work_days * window_years, freq = 'B')
    ser_iter_membership = get_date_membership(iter_date)
    ### Impossible to add country filter here:
    ser_iter_returns = pd.read_hdf(path_market_risk_source_hdf, realized_ret_LOC_key, where = 'Date in index_iter_date')
    ### Factor calculation:
    ser_iter_factor = - get_expvol_series(ser_iter_membership, ser_iter_returns, 'expo', window_years, 1)
            
    return ser_iter_factor

In [13]:
### GET LOW VOLATILITY ANOMALY FACTOR
def get_low_vol_factor(iter_date):
    ### Importing standard modules and date-special modules (common for all factors):
    import numpy as np
    import pandas as pd    
    ### Constants declaring (common for all factors):  
    num_year_work_days = 260
    window_years = 5  
    path_market_risk_source_hdf = 'Data_Files/Source_Files/market_risk_source.h5'
    realized_ret_LOC_key = 'realized_ret_LOC_key'     
    ### Data preparing (common for all factors):
    index_iter_date = pd.date_range(end = iter_date, periods = num_year_work_days * window_years, freq = 'B')
    ser_iter_membership = get_date_membership(iter_date)
    ### Impossible to add country filter here:    
    ser_iter_returns = pd.read_hdf(path_market_risk_source_hdf, realized_ret_LOC_key, where = 'Date in index_iter_date')
    ### Factor calculation:
    ser_expvol24m = get_expvol_series(ser_iter_membership, ser_iter_returns, 'expo', window_years, 24)
    ser_lowvol_base = 1 / (ser_expvol24m * ser_expvol24m)
    ser_lowvol_base.replace([np.inf, -np.inf], np.nan, inplace = True)    
    ser_lowvol_base = ser_lowvol_base.swaplevel()
    ser_lowvol_base.sort_index(inplace = True)
    ser_lowvol = pd.Series(np.NaN, index = ser_lowvol_base.index)
    for iter_date in ser_lowvol.index.get_level_values(0).unique():  
        ser_lowvol[iter_date] = (ser_lowvol_base[iter_date] / ser_lowvol_base[iter_date].sum())
    ser_lowvol = ser_lowvol.swaplevel()
    ser_iter_factor = ser_lowvol.sort_index()
            
    return ser_iter_factor

In [24]:
### GET VOLATILITY SURPRISE FACTOR
def get_vol_surprise_factor(iter_date):
    ### Importing standard modules and date-special modules (common for all factors):
    import numpy as np
    import pandas as pd     
    ### Constants declaring (common for all factors):  
    num_year_work_days = 260
    window_years = 5    
    path_market_risk_source_hdf = 'Data_Files/Source_Files/market_risk_source.h5'
    realized_ret_LOC_key = 'realized_ret_LOC_key'
    gri_released_key = 'gri_released_key'    
    ### Data preparing (common for all factors):
    index_iter_date = pd.date_range(end = iter_date, periods = num_year_work_days * window_years, freq = 'B')
    ser_iter_membership = get_date_membership(iter_date)
    ser_iter_returns = pd.read_hdf(path_market_risk_source_hdf, realized_ret_LOC_key, where = 'Date in index_iter_date')
    ser_iter_condition = pd.read_hdf(path_market_risk_source_hdf, gri_released_key, where = 'index in index_iter_date')
    ### Factor calculation:
    ser_expvol1m = get_expvol_series(ser_iter_membership, ser_iter_returns, 'expo', window_years, 1)
    ser_expvol1m_cond = get_expvol_series(ser_iter_membership, ser_iter_returns, 'expo_cond', window_years, 1, ser_iter_condition)
    ser_expvol1m_surp = -np.log(ser_expvol1m / ser_expvol1m_cond)
    ser_iter_factor = ser_expvol1m_surp 
    
    return ser_iter_factor

In [15]:
### GET TAIL RISK FACTOR
def get_tail_risk_factor(iter_date):
    ### Importing standard modules and date-special modules (common for all factors):
    import numpy as np
    import pandas as pd    
    ### Constants declaring (common for all factors):  
    num_year_work_days = 260
    window_years = 5    
    path_market_risk_source_hdf = 'Data_Files/Source_Files/market_risk_source.h5'
    realized_ret_LOC_key = 'realized_ret_LOC_key'    
    ### Data preparing (common for all factors):
    index_iter_date = pd.date_range(end = iter_date, periods = num_year_work_days * window_years, freq = 'B')
    ser_iter_membership = get_date_membership(iter_date)
    ser_iter_returns = pd.read_hdf(path_market_risk_source_hdf, realized_ret_LOC_key, where = 'Date in index_iter_date')
    ### Factor calculation:
    ser_iter_factor = - get_skewness_series(ser_iter_membership, ser_iter_returns, 2) 
    
    return ser_iter_factor

In [25]:
### GET VRP FACTOR
def get_vrp_factor(iter_date):
    ### Importing standard modules and date-special modules (common for all factors):
    import numpy as np
    import pandas as pd    
    ### Constants declaring (common for all factors):  
    num_year_work_days = 260
    window_years = 5  
    path_market_risk_source_hdf = 'Data_Files/Source_Files/market_risk_source.h5'
    vrp3m_key = 'vrp3m_key'   
    ### Data preparing (common for all factors):
    index_iter_date = pd.date_range(end = iter_date, periods = num_year_work_days * window_years, freq = 'B')
    ser_iter_membership = get_date_membership(iter_date)
    ser_iter_vrp = pd.read_hdf(path_market_risk_source_hdf, vrp3m_key, where = 'Date in index_iter_date')
    ### Factor calculation:
    ser_iter_factor = get_market_series(ser_iter_membership, ser_iter_vrp, 5) 
    
    return ser_iter_factor

In [26]:
### GET IMPLIED VOLATILITY SHORT TERM MOMENTUM FACTOR
def get_ivol_mom_1m_factor(iter_date):
    ### Importing standard modules and date-special modules (common for all factors):
    import numpy as np
    import pandas as pd    
    ### Constants declaring (common for all factors):  
    num_year_work_days = 260
    window_years = 5    
    path_market_risk_source_hdf = 'Data_Files/Source_Files/market_risk_source.h5'
    ivol3m_key = 'ivol3m_key'  
    ### Data preparing (common for all factors):
    index_iter_date = pd.date_range(end = iter_date, periods = num_year_work_days * window_years, freq = 'B')
    ser_iter_membership = get_date_membership(iter_date)
    ser_iter_ivol = pd.read_hdf(path_market_risk_source_hdf, ivol3m_key, where = 'Date in index_iter_date') 
    ### Factor calculation:
    ser_iter_factor = get_average_series(ser_iter_membership, ser_iter_ivol, 'expo', 5, 1) 

    return ser_iter_factor

In [27]:
### GET IMPLIED VOLATILITY SHORT TERM MOMENTUM FACTOR
def get_ivol_mom_12m_factor(iter_date):
    ### Importing standard modules and date-special modules (common for all factors):
    import numpy as np
    import pandas as pd    
    ### Constants declaring (common for all factors):  
    num_year_work_days = 260
    window_years = 5   
    path_market_risk_source_hdf = 'Data_Files/Source_Files/market_risk_source.h5'
    ivol3m_key = 'ivol3m_key'      
    ### Data preparing (common for all factors):
    index_iter_date = pd.date_range(end = iter_date, periods = num_year_work_days * window_years, freq = 'B')
    ser_iter_membership = get_date_membership(iter_date)
    ser_iter_ivol = pd.read_hdf(path_market_risk_source_hdf, ivol3m_key, where = 'Date in index_iter_date')
    ### Factor calculation:
    ser_iter_factor = get_average_series(ser_iter_membership, ser_iter_ivol, 'expo', 5, 12) 
    
    return ser_iter_factor

In [None]:
### LOOPER FOR SHORT TERM EVENT RISK FACTOR
#date_range_test = pd.date_range(start = '2010-10-25', periods = 6)
date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()
#date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()[155 : 160]
arr_short_term_event_risk_factor = []
iter_counter = 0
for iter_date in date_range_test:
    iter_counter = iter_counter + 1
    arr_short_term_event_risk_factor.append(get_short_term_event_risk_factor(iter_date))
    if ((iter_counter // 10) == (iter_counter / 10)):
        print('Progress printout',iter_counter, '/', iter_date)
ser_short_term_event_risk_factor = pd.concat(arr_short_term_event_risk_factor)

In [20]:
### SHORT TERM EVENT RISK FACTOR TESTING:
print('ser_short_term_event_risk_factor - AR 29-Dec-2006:', ser_short_term_event_risk_factor.loc['AR' , '2006-12-29'])
print('ser_short_term_event_risk_factor - US 29-Dec-2006:', ser_short_term_event_risk_factor.loc['US' , '2006-12-29'])
ser_short_term_event_risk_factor_mean = pd.Series(np.NaN, index = ser_short_term_event_risk_factor.index.get_level_values(1).unique())
for iter_date in ser_short_term_event_risk_factor_mean.index:  
    ser_short_term_event_risk_factor_mean[iter_date] = ser_short_term_event_risk_factor.loc[:, iter_date].mean()
ser_short_term_event_risk_factor_mean.sort_index(inplace = True)
print('ser_short_term_event_risk_factor - cross-sectional mean min:', ser_short_term_event_risk_factor_mean.min())
print('ser_short_term_event_risk_factor - cross-sectional mean mean:', ser_short_term_event_risk_factor_mean.mean())
print('ser_short_term_event_risk_factor - cross-sectional mean max:', ser_short_term_event_risk_factor_mean.max())
print('ser_short_term_event_risk_factor - cross-sectional mean stdev:', ser_short_term_event_risk_factor_mean.std())
print('ser_short_term_event_risk_factor - cross-sectional mean mean:', ser_short_term_event_risk_factor_mean.count())

ser_short_term_event_risk_factor - AR 29-Dec-2006: -0.20481093607134887
ser_short_term_event_risk_factor - US 29-Dec-2006: -0.07665226549320425
ser_short_term_event_risk_factor - cross-sectional mean min: -0.6481452307554286
ser_short_term_event_risk_factor - cross-sectional mean mean: -0.20580347657372047
ser_short_term_event_risk_factor - cross-sectional mean max: -0.11338560985788693
ser_short_term_event_risk_factor - cross-sectional mean stdev: 0.07019715778814863
ser_short_term_event_risk_factor - cross-sectional mean mean: 234


In [None]:
### LOOPER FOR LOW VOLATILITY ANOMALY FACTOR
#date_range_test = pd.date_range(start = '2010-10-25', periods = 6)
date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()
#date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()[155 : 160]
arr_low_vol_factor = []
iter_counter = 0
for iter_date in date_range_test:
    iter_counter = iter_counter + 1
    arr_low_vol_factor.append(get_low_vol_factor(iter_date))
    if ((iter_counter // 10) == (iter_counter / 10)):
        print('Progress printout',iter_counter, '/', iter_date)
        
ser_low_vol_factor = pd.concat(arr_low_vol_factor)

In [22]:
### LOW VOLATILITY ANOMALY FACTOR TESTING:
print('ser_low_vol_factor - AR 29-Dec-2006:', ser_low_vol_factor.loc['AR' , '2006-12-29'])
print('ser_low_vol_factor - US 29-Dec-2006:', ser_low_vol_factor.loc['US' , '2006-12-29'])
ser_low_vol_factor_mean = pd.Series(np.NaN, index = ser_low_vol_factor.index.get_level_values(1).unique())
for iter_date in ser_low_vol_factor_mean.index:  
    ser_low_vol_factor_mean[iter_date] = ser_low_vol_factor.loc[:, iter_date].mean()
ser_low_vol_factor_mean.sort_index(inplace = True)
print('ser_low_vol_factor - cross-sectional mean min:', ser_low_vol_factor_mean.min())
print('ser_low_vol_factor - cross-sectional mean mean:', ser_low_vol_factor_mean.mean())
print('ser_low_vol_factor - cross-sectional mean max:', ser_low_vol_factor_mean.max())
print('ser_low_vol_factor - cross-sectional mean stdev:', ser_low_vol_factor_mean.std())
print('ser_low_vol_factor - cross-sectional mean mean:', ser_low_vol_factor_mean.count())

ser_low_vol_factor - AR 29-Dec-2006: 0.007339857068326828
ser_low_vol_factor - US 29-Dec-2006: 0.03595279045940818
ser_low_vol_factor - cross-sectional mean min: 0.02040816326530611
ser_low_vol_factor - cross-sectional mean mean: 0.02255744652417695
ser_low_vol_factor - cross-sectional mean max: 0.04545454545454546
ser_low_vol_factor - cross-sectional mean stdev: 0.006440980492665572
ser_low_vol_factor - cross-sectional mean mean: 234


In [None]:
### LOOPER FOR VOLATILITY SURPRISE FACTOR
#date_range_test = pd.date_range(start = '2010-10-25', periods = 6)
date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()
#date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()[155 : 157]
arr_vol_surprise_factor = []
iter_counter = 0
for iter_date in date_range_test:
    iter_counter = iter_counter + 1
    arr_vol_surprise_factor.append(get_vol_surprise_factor(iter_date))
    if ((iter_counter // 10) == (iter_counter / 10)):
        print('Progress printout',iter_counter, '/', iter_date)
        
ser_vol_surprise_factor = pd.concat(arr_vol_surprise_factor)

In [29]:
### VOLATILITY SURPRISE FACTOR TESTING:
print('ser_vol_surprise_factor - AR 29-Dec-2006:', ser_vol_surprise_factor.loc['AR' , '2006-12-29'])
print('ser_vol_surprise_factor - US 29-Dec-2006:', ser_vol_surprise_factor.loc['US' , '2006-12-29'])
ser_vol_surprise_factor_mean = pd.Series(np.NaN, index = ser_vol_surprise_factor.index.get_level_values(1).unique())
for iter_date in ser_vol_surprise_factor_mean.index:  
    ser_vol_surprise_factor_mean[iter_date] = ser_vol_surprise_factor.loc[:, iter_date].mean()
ser_vol_surprise_factor_mean.sort_index(inplace = True)
print('ser_vol_surprise_factor - cross-sectional mean min:', ser_vol_surprise_factor_mean.min())
print('ser_vol_surprise_factor - cross-sectional mean mean:', ser_vol_surprise_factor_mean.mean())
print('ser_vol_surprise_factor - cross-sectional mean max:', ser_vol_surprise_factor_mean.max())
print('ser_vol_surprise_factor - cross-sectional mean stdev:', ser_vol_surprise_factor_mean.std())
print('ser_vol_surprise_factor - cross-sectional mean mean:', ser_vol_surprise_factor_mean.count())

ser_vol_surprise_factor - AR 29-Dec-2006: -0.01821011250865118
ser_vol_surprise_factor - US 29-Dec-2006: 0.04654380270481559
ser_vol_surprise_factor - cross-sectional mean min: -0.4444349951854927
ser_vol_surprise_factor - cross-sectional mean mean: 0.008079627718946126
ser_vol_surprise_factor - cross-sectional mean max: 0.2751807419294589
ser_vol_surprise_factor - cross-sectional mean stdev: 0.11092389390929216
ser_vol_surprise_factor - cross-sectional mean mean: 234


In [None]:
### LOOPER FOR TAIL RISK FACTOR
#date_range_test = pd.date_range(start = '2010-10-25', periods = 6)
date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()
#date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()[155 : 160]
arr_tail_risk_factor = []
iter_counter = 0
for iter_date in date_range_test:
    iter_counter = iter_counter + 1
    arr_tail_risk_factor.append(get_tail_risk_factor(iter_date))
    if ((iter_counter // 10) == (iter_counter / 10)):
        print('Progress printout',iter_counter, '/', iter_date)
        
ser_tail_risk_factor = pd.concat(arr_tail_risk_factor)

In [31]:
### TAIL RISK FACTOR TESTING:
print('ser_tail_risk_factor - AR 29-Dec-2006:', ser_tail_risk_factor.loc['AR' , '2006-12-29'])
print('ser_tail_risk_factor - US 29-Dec-2006:', ser_tail_risk_factor.loc['US' , '2006-12-29'])
ser_tail_risk_factor_mean = pd.Series(np.NaN, index = ser_tail_risk_factor.index.get_level_values(1).unique())
for iter_date in ser_tail_risk_factor_mean.index:  
    ser_tail_risk_factor_mean[iter_date] = ser_tail_risk_factor.loc[:, iter_date].mean()
ser_tail_risk_factor_mean.sort_index(inplace = True)
print('ser_tail_risk_factor - cross-sectional mean min:', ser_tail_risk_factor_mean.min())
print('ser_tail_risk_factor - cross-sectional mean mean:', ser_tail_risk_factor_mean.mean())
print('ser_tail_risk_factor - cross-sectional mean max:', ser_tail_risk_factor_mean.max())
print('ser_tail_risk_factor - cross-sectional mean stdev:', ser_tail_risk_factor_mean.std())
print('ser_tail_risk_factor - cross-sectional mean mean:', ser_tail_risk_factor_mean.count())

ser_tail_risk_factor - AR 29-Dec-2006: -0.03928688949001212
ser_tail_risk_factor - US 29-Dec-2006: -0.08036479847853283
ser_tail_risk_factor - cross-sectional mean min: -0.3488438785320589
ser_tail_risk_factor - cross-sectional mean mean: 0.08670576665299318
ser_tail_risk_factor - cross-sectional mean max: 0.49624621808319275
ser_tail_risk_factor - cross-sectional mean stdev: 0.16573629072456478
ser_tail_risk_factor - cross-sectional mean mean: 234


In [None]:
### LOOPER FOR VRP FACTOR
#date_range_test = pd.date_range(start = '2010-10-25', periods = 6)
date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()
#date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()[155 : 160]
arr_vrp_factor = []
iter_counter = 0
for iter_date in date_range_test:
    iter_counter = iter_counter + 1
    arr_vrp_factor.append(get_vrp_factor(iter_date))
    if ((iter_counter // 10) == (iter_counter / 10)):
        print('Progress printout',iter_counter, '/', iter_date)
        
ser_vrp_factor = pd.concat(arr_vrp_factor)

In [33]:
### VRP FACTOR TESTING:
print('ser_vrp_factor - AR 29-Dec-2006:', ser_vrp_factor.loc['AR' , '2006-12-29'])
print('ser_vrp_factor - US 29-Dec-2006:', ser_vrp_factor.loc['US' , '2006-12-29'])
ser_vrp_factor_mean = pd.Series(np.NaN, index = ser_vrp_factor.index.get_level_values(1).unique())
for iter_date in ser_vrp_factor_mean.index:  
    ser_vrp_factor_mean[iter_date] = ser_vrp_factor.loc[:, iter_date].mean()
ser_vrp_factor_mean.sort_index(inplace = True)
print('ser_vrp_factor - cross-sectional mean min:', ser_vrp_factor_mean.min())
print('ser_vrp_factor - cross-sectional mean mean:', ser_vrp_factor_mean.mean())
print('ser_vrp_factor - cross-sectional mean max:', ser_vrp_factor_mean.max())
print('ser_vrp_factor - cross-sectional mean stdev:', ser_vrp_factor_mean.std())
print('ser_vrp_factor - cross-sectional mean mean:', ser_vrp_factor_mean.count())

ser_vrp_factor - AR 29-Dec-2006: 0.00163
ser_vrp_factor - US 29-Dec-2006: -0.004396
ser_vrp_factor - cross-sectional mean min: -0.023387673469387766
ser_vrp_factor - cross-sectional mean mean: 0.002256157878726162
ser_vrp_factor - cross-sectional mean max: 0.036017938775510204
ser_vrp_factor - cross-sectional mean stdev: 0.00794579999880406
ser_vrp_factor - cross-sectional mean mean: 237


In [None]:
### LOOPER FOR IVOL MOMENTUM 1M FACTOR
#date_range_test = pd.date_range(start = '2010-10-25', periods = 6)
date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()
#date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()[155 : 160]
arr_ivol_mom_1m_factor = []
iter_counter = 0
for iter_date in date_range_test:
    iter_counter = iter_counter + 1
    arr_ivol_mom_1m_factor.append(get_ivol_mom_1m_factor(iter_date))
    if ((iter_counter // 10) == (iter_counter / 10)):
        print('Progress printout',iter_counter, '/', iter_date)
        
ser_ivol_mom_1m_factor = pd.concat(arr_ivol_mom_1m_factor)

In [39]:
### IVOL MOMENTUM 1M FACTOR TESTING:
print('ser_ivol_mom_1m_factor - AR 29-Dec-2006:', ser_ivol_mom_1m_factor.loc['AR' , '2006-12-29'])
print('ser_ivol_mom_1m_factor - US 29-Dec-2006:', ser_ivol_mom_1m_factor.loc['US' , '2006-12-29'])
ser_ivol_mom_1m_factor_mean = pd.Series(np.NaN, index = ser_ivol_mom_1m_factor.index.get_level_values(1).unique())
for iter_date in ser_ivol_mom_1m_factor_mean.index:  
    ser_ivol_mom_1m_factor_mean[iter_date] = ser_ivol_mom_1m_factor.loc[:, iter_date].mean()
ser_ivol_mom_1m_factor_mean.sort_index(inplace = True)
print('ser_ivol_mom_1m_factor - cross-sectional mean min:', ser_ivol_mom_1m_factor_mean.min())
print('ser_ivol_mom_1m_factor - cross-sectional mean mean:', ser_ivol_mom_1m_factor_mean.mean())
print('ser_ivol_mom_1m_factor - cross-sectional mean max:', ser_ivol_mom_1m_factor_mean.max())
print('ser_ivol_mom_1m_factor - cross-sectional mean stdev:', ser_ivol_mom_1m_factor_mean.std())
print('ser_ivol_mom_1m_factor - cross-sectional mean mean:', ser_ivol_mom_1m_factor_mean.count())

ser_ivol_mom_1m_factor - AR 29-Dec-2006: -5.728833933486413e-05
ser_ivol_mom_1m_factor - US 29-Dec-2006: -0.0001643642124898838
ser_ivol_mom_1m_factor - cross-sectional mean min: -0.0007719988881806511
ser_ivol_mom_1m_factor - cross-sectional mean mean: 6.00820444231159e-05
ser_ivol_mom_1m_factor - cross-sectional mean max: 0.0011461061607235408
ser_ivol_mom_1m_factor - cross-sectional mean stdev: 0.0002536959457065397
ser_ivol_mom_1m_factor - cross-sectional mean mean: 237


In [40]:
### LOOPER FOR IVOL MOMENTUM 12M FACTOR
#date_range_test = pd.date_range(start = '2010-10-25', periods = 6)
date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()
#date_range_test = ser_market_membership.sort_index(level = 1).index.get_level_values(1).unique()[155 : 160]
arr_ivol_mom_12m_factor = []
iter_counter = 0
for iter_date in date_range_test:
    iter_counter = iter_counter + 1
    arr_ivol_mom_12m_factor.append(get_ivol_mom_12m_factor(iter_date))
    if ((iter_counter // 10) == (iter_counter / 10)):
        print('Progress printout',iter_counter, '/', iter_date)
        
ser_ivol_mom_12m_factor = pd.concat(arr_ivol_mom_12m_factor)

Progress printout 10 / 1994-10-31 00:00:00
Progress printout 20 / 1995-08-31 00:00:00
Progress printout 30 / 1996-06-28 00:00:00
Progress printout 40 / 1997-04-30 00:00:00
Progress printout 50 / 1998-02-27 00:00:00
Progress printout 60 / 1998-12-31 00:00:00
Progress printout 70 / 1999-10-29 00:00:00
Progress printout 80 / 2000-08-31 00:00:00
Progress printout 90 / 2001-06-29 00:00:00
Progress printout 100 / 2002-04-30 00:00:00
Progress printout 110 / 2003-02-28 00:00:00
Progress printout 120 / 2003-12-31 00:00:00
Progress printout 130 / 2004-10-29 00:00:00
Progress printout 140 / 2005-08-31 00:00:00
Progress printout 150 / 2006-06-30 00:00:00
Progress printout 160 / 2007-04-30 00:00:00
Progress printout 170 / 2008-02-29 00:00:00
Progress printout 180 / 2008-12-31 00:00:00
Progress printout 190 / 2009-10-30 00:00:00
Progress printout 200 / 2010-08-31 00:00:00
Progress printout 210 / 2011-06-30 00:00:00
Progress printout 220 / 2012-04-30 00:00:00
Progress printout 230 / 2013-02-28 00:00:

In [41]:
### IVOL MOMENTUM 12M FACTOR TESTING:
print('ser_ivol_mom_12m_factor - AR 29-Dec-2006:', ser_ivol_mom_12m_factor.loc['AR' , '2006-12-29'])
print('ser_ivol_mom_12m_factor - US 29-Dec-2006:', ser_ivol_mom_12m_factor.loc['US' , '2006-12-29'])
ser_ivol_mom_12m_factor_mean = pd.Series(np.NaN, index = ser_ivol_mom_12m_factor.index.get_level_values(1).unique())
for iter_date in ser_ivol_mom_12m_factor_mean.index:  
    ser_ivol_mom_12m_factor_mean[iter_date] = ser_ivol_mom_12m_factor.loc[:, iter_date].mean()
ser_ivol_mom_12m_factor_mean.sort_index(inplace = True)
print('ser_ivol_mom_12m_factor - cross-sectional mean min:', ser_ivol_mom_12m_factor_mean.min())
print('ser_ivol_mom_12m_factor - cross-sectional mean mean:', ser_ivol_mom_12m_factor_mean.mean())
print('ser_ivol_mom_12m_factor - cross-sectional mean max:', ser_ivol_mom_12m_factor_mean.max())
print('ser_ivol_mom_12m_factor - cross-sectional mean stdev:', ser_ivol_mom_12m_factor_mean.std())
print('ser_ivol_mom_12m_factor - cross-sectional mean mean:', ser_ivol_mom_12m_factor_mean.count())

ser_ivol_mom_12m_factor - AR 29-Dec-2006: 2.4071504646865075e-05
ser_ivol_mom_12m_factor - US 29-Dec-2006: -1.281313465985619e-05
ser_ivol_mom_12m_factor - cross-sectional mean min: -8.265832098596306e-05
ser_ivol_mom_12m_factor - cross-sectional mean mean: 5.48849915627575e-06
ser_ivol_mom_12m_factor - cross-sectional mean max: 9.861384692768506e-05
ser_ivol_mom_12m_factor - cross-sectional mean stdev: 2.4997621734106227e-05
ser_ivol_mom_12m_factor - cross-sectional mean mean: 237
