In [1]:
### EER FACTORS REVISION

In [2]:
### INITIALIZATION

import pandas as pd
import numpy as np
from datetime import date, datetime
import statsmodels.api as sm
from scipy import stats as ss
import math     
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
    
%load_ext line_profiler

In [3]:
### GENERAL DATA PREPARATION

### Constants:
All = slice(None)
### Universe path:
str_path_universe = 'Data_Files/Source_Files/acadian_universe.xlsx'
### Bloomberg structured data extraction parameters:
str_path_bb_hdf = 'Data_Files/Source_Files/Bloomberg_prepared.h5'
str_key_ret_daily = 'bb_ret_daily'
str_key_ret_monthly = 'bb_ret_monthly'
str_key_mmr = 'bb_mmr'
str_key_fx_country = 'bb_fx_country'
str_key_fx_currency = 'bb_fx_currency'
str_key_mcap = 'bb_mcap'
str_key_reer = 'bb_reer'
str_key_neer = 'bb_neer'
str_key_reer_sourced = 'bb_reer_sourced'
str_key_neer_sourced = 'bb_neer_sourced'
str_key_xcra = 'bb_xcra'
### NEER usage scheme:
bool_NEER_raw = False
### Standartization parameters:
list_truncate = [2.5, 2.0] # Standartization boundaries
bool_within_market = True # Standartization way
### Factors parameters:
str_date_start = '1996-08-01' # Start date for efficacy measures
str_date_end = '2020-08-31' # End date for efficacy measures
#str_date_end = '2020-06-30' # End date for efficacy measures
idx_date_range = pd.date_range(str_date_start, str_date_end, freq = 'BM')
list_ison = ['DM', 'EM', 'FM']
list_filter = ['DM', 'EM', 'FM']
list_countries_to_exclude = ['VE'] # Countries not to play the game
flo_returns_similarity = 0.0025 # Selecting countries with currencies bound to the USD
flo_returns_completeness = 1 / 3
int_concept_lag = 3 ### Lag in months for GDP like concepts, months
int_concept_divider = 1000 # Divider to equalize concepts and GDP scales
int_concept_min = 0.0 # Minimal value to compare with log(1 + EXPORT/GDP)
int_concept_max = 0.3 # Maximal value to compare with log(1 + EXPORT/GDP)
int_eer_fill_limit = 260 * 50 # Days for forward fill NEER and REER inside country vectors ### For product version we need value = 66, days
int_factor_addendum = 2.5 # list_truncate[0] # Factor scaler
int_mom_length = 5 # Years of momentum vector
int_mom_min_win = 260 // 4 # Days
dict_mom_hl = {} # Half-life period for momentum factor, months:
dict_mom_hl['LONG_TERM'] = 30
dict_mom_hl['SHORT_TERM'] = 3
### Factors options:
dict_combinations = {}
dict_combinations['LONG_TERM'] = ('MOMENTUM', 'EXP_GDP_rate', 'REER', 'HEDGED')
dict_combinations['SHORT_TERM'] = ('MOMENTUM', 'EXP_GDP_rate', 'NEER', 'HEDGED')
dict_combinations['COMBO'] = ('COMBO', 'COMBO', 'COMBO', 'COMBO')
### Work periods:
ser_work_periods = pd.Series(1 , index = pd.MultiIndex.from_product([['Year', 'Month'], ['Y', 'M', 'D']], names = ['Period', 'Frequency']))
ser_work_periods['Year', 'M'] = 12
ser_work_periods['Year', 'D'] = 260
ser_work_periods['Month', 'Y'] = 0
ser_work_periods['Month', 'D'] = 22

In [4]:
### DEFINING MEAN MOMENTUM FUNCTION (TO CALCULATE FACTOR ONLY FOR MONTHENDS):

def rolling_cond_mean_momentum(ser_country_matrix, ser_full_source, int_numer_win, int_numer_min, int_denom_win, int_denom_min):
    ### Country saving:
    str_country = ser_country_matrix.index[0][1]
    ### Checking for country presence in source vector:
    if (str_country in ser_full_source.index.get_level_values(1)):
        ### Filtering country vector from source:
        ser_country_source = ser_full_source.loc[All, str_country]
        ### Looping over matrix index dates:
        for iter_bm_date in ser_country_matrix.index.get_level_values(0):
            try:
                ### Defining monthend date number in source country vector:
                int_idx_num = ser_country_source.index.get_loc(iter_bm_date)
                ### Creating vectors for numerator and denominator means calculation:
                ser_rolled_numer = -ser_country_source.iloc[max((int_idx_num - int_numer_win + 1), 0) : int_idx_num + 1]        
                ser_rolled_denom = -ser_country_source.iloc[max((int_idx_num - int_denom_win + 1), 0) : int_idx_num + 1]
                ### Checking for minimal data presence:
                if ((ser_rolled_numer.count() >= int_numer_min) & (ser_rolled_denom.count() >= int_denom_min)):
                    ### Mena momentum value calculation:
                    ser_country_matrix.loc[iter_bm_date, str_country] = np.log(ser_rolled_numer.mean() / ser_rolled_denom.mean())
            except KeyError:
                pass
    ### Resulting vector output:
    return ser_country_matrix

In [5]:
### DEFINING EXPONENTIAL WEIGHT

def exp_weight_single(halflife_len = 3, num_element = 0):
    ### Weight calculating:
    num_period_factor = math.exp(math.log(0.5) / round(halflife_len))
    num_weight = np.exp(math.log(num_period_factor) * num_element)
    ### Result output:
    return num_weight

In [6]:
def geom_weight_single(flo_ratio, flo_factor = 1, num_element = 0):
    ### Results output:
    return flo_factor * (flo_ratio ** num_element)

In [7]:
### DEFINING WEIGHTED AVERAGE

def weighted_average(ser_data, ser_weight = False, int_min_count = 0):
    ### Default output:
    num_result = np.NaN
    ### Checking for data presence:
    if (ser_data.count() > int_min_count):       
        ### Checking for weights dataset:
        if isinstance(ser_weight, bool):
            ### Calculating of simple average:
            num_result = np.nanmean(ser_data.values)
        else:
            ### Weights filtering:
            list_weight = ser_weight[ser_data.dropna().index].values
            ### Checking for weights presence:
            if np.nansum(list_weight):
                ### Data filtering:
                list_data = ser_data.dropna().values
                ### Weighted average calculating:
                num_result = np.nansum(list_data * list_weight) / np.nansum(list_weight)
    ### Results output:
    return num_result

In [8]:
### DEFINING MEAN MOMENTUM FUNCTION (TO CALCULATE FACTOR ONLY FOR MONTHENDS):

def rolling_cond_weighted_mean(ser_country_matrix, ser_full_source, int_mean_win, int_mean_min, list_weight = False, ser_full_cond = False):
    ### Defining conditional average calculator:
    def conditional_average(ser_source, list_weight, int_min_count = 0, ser_condition = False):
        ### Weights setting:
        ser_weight = pd.Series(list_weight[ : len(ser_source.index)], ser_source.index)
        ### If we have condition we should resort the weight array:
        if not isinstance(ser_condition, bool):
            ser_condition_sorted = pd.Series(ser_condition.sort_values().index, ser_condition.index)
            ser_condition_sorted.name = 'Condition'
            ser_weight = pd.concat([ser_weight, ser_condition_sorted], axis = 1).reset_index(drop = True).set_index('Condition').squeeze().sort_index()            
        ### Results output:
        return weighted_average(ser_source, ser_weight, int_min_count)    
    ### Country saving:
    str_country = ser_country_matrix.index[0][1]
    ### Checking for country presence in source vector:
    if (str_country in ser_full_source.index.get_level_values(1)):
        ### Filtering country vector from source:
        ser_country_source = ser_full_source.loc[All, str_country]
        if not isinstance(ser_full_cond, bool):
            ser_country_cond = ser_full_cond.loc[All, str_country]
        ### Looping over matrix index dates:
        for iter_bm_date in ser_country_matrix.index.get_level_values(0):
            try:
                ### Defining monthend date number in source country vector:
                int_idx_num = ser_country_source.index.get_loc(iter_bm_date)
                ### Creating vectors for numerator and denominator means calculation:
                ser_rolled_source = ser_country_source.iloc[max((int_idx_num - int_mean_win + 1), 0) : int_idx_num + 1]
                if not isinstance(ser_full_cond, bool):
                    ser_rolled_cond = ser_country_cond.loc[ser_rolled_source.index]
                else:
                    ser_rolled_cond = False
                ### Simple mean calculation:
                if isinstance(list_weight, bool):
                    ser_country_matrix.loc[iter_bm_date, str_country] = weighted_average(ser_rolled_source, False, int_mean_min)
                else:
                    ### Weighted mean calculation:
                    ser_country_matrix.loc[iter_bm_date, str_country] = conditional_average(ser_rolled_source, list_weight, int_mean_min, ser_rolled_cond)
            except KeyError:
                pass
    ### Resulting vector output:
    return ser_country_matrix

In [9]:
### DEFINING MULTI-STEP STANDARTIZATION FOR SEPARATE SERIES

def multistep_standartize(ser_data_source, arr_truncate, ser_weight = False, reuse_outliers = False, center_result = True, full_result = False):  
    ### Arrays of iterations properties:
    arr_mean = []
    arr_std = []
    ### Adding equal weights, when weights are absent:
    if isinstance(ser_weight, bool):
        ser_weight = pd.Series(1, index = ser_data_source.index)
        ser_weight.name = 'Weight'    
    ### Workhorse and resulting data vectors initialising:
    ser_data_iter = ser_data_source.dropna()
    ser_weight_iter = ser_weight.copy()
    ser_data_full = pd.Series(np.NaN, index = ser_data_iter.index)
    ### Looping by boundaries array:
    for num_bound_iter in arr_truncate:
        ### Properties calculating and saving:
        num_mean_iter = weighted_average(ser_data_iter, ser_weight_iter)
        num_std_iter = ser_data_iter.std()
        arr_mean.append(num_mean_iter)
        arr_std.append(num_std_iter)
        ser_data_iter = (ser_data_iter - num_mean_iter) / num_std_iter       
        ### Standartizing:
        if reuse_outliers:
            ser_data_iter[ser_data_iter.abs() >= num_bound_iter] = np.sign(ser_data_iter) * num_bound_iter 
        else:
            ### Saving to result and excluding from further calculations truncated values:             
            ser_data_full.where(ser_data_iter.abs() < num_bound_iter, np.sign(ser_data_iter) * num_bound_iter, inplace = True)
            ser_data_iter = ser_data_iter[ser_data_iter.abs() < num_bound_iter]           
    ### Aggregating result:
    if (reuse_outliers):
        ser_data_full = ser_data_iter
    else:     
        ser_data_full[ser_data_iter.index] = ser_data_iter
    ### Centering result:
    if (center_result):
        ser_result = ser_data_full - weighted_average(ser_data_full, ser_weight) 
    else:
        ser_result = ser_data_full    
    ### Result output:
    ser_result.name = str(ser_data_source.name) + '_standartized'
    if (full_result):
        return (ser_result, arr_mean, arr_std)
    else:
        return ser_result

In [10]:
### DEFINING MULTI-STEP STANDARTIZATION BY MARKET FOR CROSS-SECTION

def ison_standartize(ser_to_manage, arr_truncate, ser_weight = False, reuse_outliers = False, center_result = True, full_result = False, within_market = False):
    ### Multi-step standartizing:
    if (within_market):
    ### Within market standartizing:
        ser_result = ser_to_manage.groupby(by = 'Market', group_keys = False).apply(multistep_standartize, arr_truncate, ser_weight, 
                                                                                                  reuse_outliers, center_result, full_result)
    else:
    ### Full universe standartizing:
        ser_result = multistep_standartize(ser_to_manage, arr_truncate, ser_weight, reuse_outliers, center_result, full_result)
    ### Results output:
    return ser_result

In [11]:
# DEFINING MULTI-STEP STANDARTIZATION BY MARKET FOR FULL FACTOR STACK

def single_factor_standartize(ser_factor, arr_truncate, ser_weight = False, reuse_outliers = False, center_result = True, within_market = False, 
                              flag_tha = False, flo_similarity = 5 * (10 ** (-8))):
    ### Local constants:
    dict_tha_pow = {}
    dict_tha_pow['monthly'] = 1
    dict_tha_pow['quarterly'] = 1 / 3
    dict_tha_pow['annual'] = 1 / 12
    ### Weights preparing:
    if isinstance(ser_weight, bool):
        ser_weight = pd.Series(1, index = ser_factor.index)
        ser_weight.name = 'Weight'
    ### Multi-step standartizing:        
    df_factor = ser_factor.to_frame().join(ser_weight, how = 'left')
    df_factor.columns = ['Factor', 'Weight']
    ### Time-horizon adjusted standartization:
    if (flag_tha):
        ### Z-scored vector calculating:
        ser_stand_z = df_factor.groupby('Date', group_keys = False)\
                               .apply(lambda iter_df: tha_standartize(iter_df['Factor'], arr_truncate, iter_df['Weight'], reuse_outliers, center_result, False))
        ### Results output:
        ser_stand_z.name = ser_factor.name
        ### Autocorrelation vector calculating:
        ser_autocorr_vector = ser_stand_z.groupby('Market').apply(vector_autocorr, 1)
        ser_autocorr_vector.name = 'Autocorr'
        ser_autocorr_cum_mean = ser_autocorr_vector.loc[np.abs(ser_autocorr_vector - 1) > flo_similarity].groupby('Market', group_keys = False).expanding().mean()
        ### THA-coeficcient calculating:
        ser_tha_coeff = ser_autocorr_cum_mean.transform(lambda iter_mean: max(iter_mean, 0.0) ** dict_tha_pow[flag_tha])
        ser_tha_coeff = ser_tha_coeff.transform(lambda iter_mean: 
                                                sum(map(lambda iter_num: geom_weight_single(flo_tha_ratio * iter_mean, 1, iter_num), range(int_tha_length))) / 2)
        ser_tha_coeff = ser_tha_coeff.swaplevel()
        ser_tha_coeff = ser_tha_coeff.unstack('Market').reindex(ser_stand_z.index.levels[0]).stack('Market', dropna = False).sort_index(level = ['Date', 'Market'])        
        ### THA-adjusted z-score calculating:
#        ser_stand_s = (ser_stand_z * ser_tha_coeff)
        ### Artifical filling values for first date of region appearance (not to loose observations):
        ser_stand_s = (ser_stand_z * ser_tha_coeff.fillna(0.5))        
        ser_stand_s = ser_stand_s[ser_stand_s.index.dropna()].reorder_levels(['Date', 'Country', 'Market']).sort_index()
        ### Standart deviation for THA-adjusted z-score calculating:
        ser_region_std = ser_stand_s.groupby(['Date', 'Market']).std()
        ser_universe_std = ser_stand_s.groupby(['Date']).std()
        ser_universe_std = pd.concat([ser_universe_std], keys = ['Overall'], names = ['Market']).swaplevel()
        ser_std = pd.concat([ser_region_std, ser_universe_std], axis = 0).sort_index()
        ### Results output:
        return (ser_stand_s, ser_stand_z, ser_autocorr_vector, ser_tha_coeff, ser_std)
    ### Simple standartization:    
    else:    
        ser_result = df_factor.groupby('Date', group_keys = False).apply\
                     (lambda iter_df: ison_standartize(iter_df['Factor'], arr_truncate, iter_df['Weight'], reuse_outliers, center_result, False, within_market))
        ### Results output:
        ser_result.name = ser_factor.name
        return ser_result   

In [12]:
### DEFINING EXTRACTION UNIVERSE DATA FROM MS EXCEL SOURCE

def ison_membership_converting(str_path_universe, date_end):
    ### Defining business-month-end reindexation on country level:
    def country_modify(ser_raw_country, date_end):
        ser_res_country = ser_raw_country.droplevel(0).resample('MS').last().resample('BM').last()
        range_country = pd.date_range(ser_res_country.index[0], date_end, freq = 'BM')
        return ser_res_country.reindex(range_country).ffill()
    ### Markets encoding table:
    dict_markets = {50 : 'DM', 57 : 'EM', 504 : 'FM', 0: np.NaN}     
    ### Loading source file:
    df_raw_universe = pd.read_excel(io = str_path_universe, sheet_name = 0, header = 0, parse_dates = True, index_col = [0, 1],
                                 na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                             '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null'], keep_default_na = False)
    ### Converting source file:
    df_raw_universe.index.names = ['Country', 'Date']
    ser_raw_universe = df_raw_universe['Region']
    ser_raw_universe.fillna(0, inplace = True)
    ser_raw_universe.name = 'Market'
    ### By country reindexation and translation:
    ser_res_universe = ser_raw_universe.groupby('Country').apply(country_modify, date_end)
    ser_res_universe.index.names = ['Country', 'Date']
    ser_res_universe = ser_res_universe.replace(dict_markets).reorder_levels([1, 0]).sort_index()    
    ### Results output:
    return ser_res_universe

In [13]:
### MAIN SCRIPT: BLOOMBERG STRUCTURED DATA & ISON MEMBERSHIP EXTRACTION (NO PRELIMINARY DATA USING)

ser_returns = pd.read_hdf(str_path_bb_hdf, key = str_key_ret_monthly)
ser_mmr = pd.read_hdf(str_path_bb_hdf, key = str_key_mmr)
ser_fx_country = pd.read_hdf(str_path_bb_hdf, key = str_key_fx_country)
ser_mcap = pd.read_hdf(str_path_bb_hdf, key = str_key_mcap)
ser_reer = pd.read_hdf(str_path_bb_hdf, key = str_key_reer)
ser_neer = pd.read_hdf(str_path_bb_hdf, key = str_key_neer)
ser_reer_sourced = pd.read_hdf(str_path_bb_hdf, key = str_key_reer_sourced)
ser_neer_sourced = pd.read_hdf(str_path_bb_hdf, key = str_key_neer_sourced)
df_xcra_filled = pd.read_hdf(str_path_bb_hdf, key = str_key_xcra)
ser_ison = ison_membership_converting(str_path_universe, datetime.strptime(str_date_end, '%Y-%m-%d'))

In [14]:
### MAIN SCRIPT: DATA PREPARING (NO PRELIMINARY DATA USING)

### List of countries with de-facto equal returns (to impact on hedged returns calculating)
ser_ret_similarity_test = ser_returns.unstack('Currency').groupby('Country').apply(lambda df_country: (df_country['LOC'] - df_country['USD']).abs().mean())
set_ret_usd_only = set(ser_ret_similarity_test.loc[ser_ret_similarity_test < flo_returns_similarity].index)
### List of countries with unsufficient data quantity:
ser_ret_completeness_test = ser_returns.groupby('Country').apply(lambda ser_country: ser_country.count() / len(ser_country.index))
set_not_complete = set(ser_ret_completeness_test.loc[ser_ret_completeness_test < flo_returns_completeness].index)
### Filtering uncomplete countries:
ser_returns.loc[All, All, set_not_complete] = np.NaN
### Returns options preparing:
dict_ser_ret = {}
### Returns in local currency:
dict_ser_ret['LOC'] = ser_returns.loc['LOC', All, All].droplevel(0)
### Returns in USD:
dict_ser_ret['USD'] = ser_returns.loc['USD', All, All].droplevel(0)
### Hedged returns in local currency:
dict_ser_hedged = {}
### Filling data for countries with no MMR data:
set_ison_countries = set(dict_ser_ret['LOC'].index.get_level_values(1).unique())
set_mmr_countries = set(ser_mmr.index.get_level_values(1).unique())
set_no_mmr_countries = (set_ison_countries - set_mmr_countries) | set_ret_usd_only
set_to_hedge_countries = set_mmr_countries - set_no_mmr_countries
dict_ser_hedged['No_MMR'] = dict_ser_ret['USD'].loc[All, set_no_mmr_countries]
### Money Market rates shifting forward:
ser_mmr_shifted = ser_mmr.groupby('Country').shift(1)
### Filling data for other countries:
df_ser_hedged = pd.DataFrame()
df_ser_hedged['Returns LOC'] = dict_ser_ret['LOC'].loc[All, set_to_hedge_countries]
df_ser_hedged = df_ser_hedged.join(ser_mmr_shifted, how = 'left')
df_ser_hedged.columns = ['Returns LOC', 'MMR LOC']
dict_ser_hedged['MMR_Based'] = df_ser_hedged.groupby('Country', group_keys = False)\
                               .apply(lambda df_country: (1 + df_country['Returns LOC']) * (1 + ser_mmr_shifted.loc[All, 'US'] / 12) / (1 + df_country['MMR LOC'] / 12) - 1)
#dict_ser_hedged['MMR_Based'] = df_ser_hedged.groupby('Country', group_keys = False)\
#                               .apply(lambda df_country: (1 + df_country['Returns LOC']) * (((1 + ser_mmr.loc[All, 'US']) / (1 + df_country['MMR LOC'])) ** (1 /12)) - 1)
### Aggregating hedged returns:
dict_ser_ret['HEDGED'] = pd.concat(dict_ser_hedged).droplevel(0).sort_index()
### Effective exchange rates options preparing:
dict_ser_eer = {}
dict_ser_eer['REER'] = ser_reer.groupby('Country').ffill(limit = int_eer_fill_limit)
if bool_NEER_raw:
    ### Simple NEER usage:
    dict_ser_eer['NEER'] = ser_neer.groupby('Country').ffill(limit = int_eer_fill_limit)
else:
    ### Alternative NEER usage:
    set_REER_monthly = set(ser_reer_sourced.loc[All, All, ['IMF', 'BIS']].index.get_level_values(1).unique())
    set_REER_all = set(ser_reer.index.get_level_values(1).unique())
    list_from_REER = sorted(list(set_REER_all - set_REER_monthly))
    list_from_NEER = sorted(list(set_REER_monthly))
    dict_ser_eer['NEER'] = pd.concat([ser_reer.loc[All, list_from_REER], ser_neer.loc[All, list_from_NEER]], axis = 0).groupby('Country').ffill(limit = int_eer_fill_limit)
#dict_ser_eer['FX'] = ser_fx_country.groupby('Country').ffill(limit = int_eer_fill_limit)
### Concepts options preparing:
dict_ser_concept = {}
### XCRA concept data shifting:
df_xcra_shifted = df_xcra_filled.groupby('Country').shift(int_concept_lag)
### XCRA concepts calculating:
#dict_ser_concept['EXPIMP_GDP_rate'] = (df_xcra_shifted['Imports'] + df_xcra_shifted['Exports']) / df_xcra_shifted['GDP']
dict_ser_concept['EXP_GDP_rate'] = df_xcra_shifted['Exports'] / df_xcra_shifted['GDP']
#dict_ser_concept['CA_GDP_rate'] = df_xcra_shifted['Current Account'] / df_xcra_shifted['GDP']
### XCRA concepts adjusting:
for iter_concept in dict_ser_concept:
    dict_ser_concept[iter_concept] = dict_ser_concept[iter_concept] / int_concept_divider
### XCRA concepts adjusting:
for iter_concept in dict_ser_concept:
    dict_ser_concept[iter_concept].loc[dict_ser_concept[iter_concept] <= -1] = -0.99
    dict_ser_concept[iter_concept] = np.maximum(int_concept_min, (np.minimum(int_concept_max, np.log(1 + dict_ser_concept[iter_concept]))))
#    dict_ser_concept[iter_concept] = np.log(1 + dict_ser_concept[iter_concept])             
### Neutral concept adding:
dict_ser_concept['NO_CONCEPT'] = pd.Series(1, index = dict_ser_concept['EXP_GDP_rate'].index)
### Concept series renaming:
for iter_concept in dict_ser_concept:
    dict_ser_concept[iter_concept].name = 'Multiplicator'

In [15]:
### TESTING: MINIMAL VALUE CONTROL:

ser_concept_test = df_xcra_shifted['Exports'] / df_xcra_shifted['GDP'] / int_concept_divider
ser_logged_test = np.minimum(int_concept_max, np.log(1 + ser_concept_test))
ser_logged_test[ser_logged_test == int_concept_max].count() / ser_logged_test.count()

0.3544357469015003

In [17]:
### TESTING: RECONCILIATION WITH THE ORIGINAL SOURCE 

str_test_date = '1999-05-31'
str_test_factor = 'LONG_TERM'
list_test_country = ['RU'] # All # ['BR'] #
str_test_eer = 'REER'

In [26]:
### TESTING: CALCULATION PREPARING

### Part of canvas matrix:
ser_test_matrix = pd.Series(index = pd.MultiIndex.from_product([idx_date_range, ser_ison.index.get_level_values(1).unique()])).loc[str_test_date, list_test_country, All]
### EER delta:
ser_test_delta = dict_ser_eer[str_test_eer].groupby('Country').diff()
ser_test_delta = ser_test_delta / dict_ser_eer[str_test_eer].groupby('Country').shift()
### Momentum parameters:
int_mom_hl = round(24 * 260 / 12) # = 660
int_mom_win = 5 * 260 # = 1300
int_mom_min = 260 // 4 # = 65
### Weights array:
list_weight = list(map(lambda iter_num: exp_weight_single(int_mom_hl, iter_num), range(int_mom_win)))[::-1]

528

In [28]:
### TESTING: CALCULATION MODELLING

# rolling_cond_weighted_mean(ser_test_matrix, -ser_test_delta, int_mom_win, int_mom_min, list_weight, False)

### Test parameters converting to function internal parameters:
ser_full_source = ser_test_delta
str_country = list_test_country[0]
iter_bm_date = str_test_date
int_mean_win = int_mom_win
int_mean_min = int_mom_min
### Filtering country vector from source:
ser_country_source = ser_full_source.loc[All, str_country]
### Defining monthend date number in source country vector:
int_idx_num = ser_country_source.index.get_loc(iter_bm_date)
### Creating vectors for numerator and denominator means calculation:
ser_rolled_source = ser_country_source.iloc[max((int_idx_num - int_mean_win + 1), 0) : int_idx_num + 1]
ser_rolled_source.iloc[0] = np.NaN
### Test parameters converting to function internal parameters:
ser_source = ser_rolled_source
int_min_count = int_mean_min
### Weights setting:
ser_weight = pd.Series(list_weight[ : len(ser_source.index)], ser_source.index)
### Test parameters converting to function internal parameters:
ser_data = ser_source
### Weights filtering:
list_weight = ser_weight[ser_data.dropna().index].values
### Data filtering:
list_data = ser_data.dropna().values
### Weighted average calculating:
num_result = np.nansum(list_data * list_weight) / np.nansum(list_weight)
num_result

0.00027089566205617974

In [22]:
### TESTING: REVISED DATA SAVING

dict_ser_eer[str_test_eer].loc[All, list_test_country].to_excel('Data_Files/Test_Files/Revision_EER_Level_All.xlsx', merge_cells = False)
ser_test_delta.loc[All, list_test_country].to_excel('Data_Files/Test_Files/Revision_EER_Ret_All.xlsx', merge_cells = False)
ser_weight.to_excel('Data_Files/Test_Files/Revision_EER_Weights_1300.xlsx', merge_cells = False)
ser_rolled_source.to_excel('Data_Files/Test_Files/Revision_EER_Ret_1300.xlsx', merge_cells = False)

In [75]:
### MAIN SCRIPT: FACTORS CALCULATING (NO PRELIMINARY DATA USING)

### Containers for preliminary data:
dict_trans_factor_hdf = {}
### Factors looping:
iter_factor = str_test_factor
### Parameters loading:
iter_algo = dict_combinations[iter_factor][0]
iter_concept = dict_combinations[iter_factor][1]
iter_eer = dict_combinations[iter_factor][2]
iter_ret = dict_combinations[iter_factor][3]    
print(f'{iter_algo} / {iter_factor} / {iter_ret} / {iter_concept} / {iter_eer}')                
### Iteration data loading:
ser_iter_eer = dict_ser_eer[iter_eer]
### Factor matrix creating:
ser_iter_factor = pd.Series(index = pd.MultiIndex.from_product([idx_date_range, ser_ison.index.get_level_values(1).unique()])).sort_index()
ser_iter_factor.index.set_names(['Date', 'Country'], inplace = True)                
### Momentum factor data preparing:
ser_iter_delta = ser_iter_eer.groupby('Country').diff().dropna()
### Momentum parameters:
int_mom_hl = dict_mom_hl[iter_factor] * ser_work_periods['Month', 'D']
int_mom_win = int_mom_length * ser_work_periods['Year', 'D']
int_mom_min = int_mom_min_win
### Weights array:
list_weight = list(map(lambda iter_num: exp_weight_single(int_mom_hl, iter_num), range(int_mom_win)))[::-1]
### Momentum factor calculation:
ser_iter_factor = ser_iter_factor.groupby('Country').transform(rolling_cond_weighted_mean, -ser_iter_delta, int_mom_win, int_mom_min, list_weight, False)
### Factor ISONing:
ser_iter_factor = ser_iter_factor.to_frame().join(ser_ison, how = 'left').set_index('Market', append = True).squeeze()
ser_iter_factor.name = 'Factor'
### Regions clearing:
ser_iter_factor = ser_iter_factor.loc[idx_date_range, All, list_ison]
### Countries filtering:
ser_iter_factor = ser_iter_factor.drop(list_countries_to_exclude, level = 'Country') 
### Factor and Multiplicator standartizing (Multiplicator shifting), multiplying and restandartizing:
ser_iter_factor_std = single_factor_standartize(ser_iter_factor, list_truncate, within_market = bool_within_market)
ser_iter_factor_std.name = 'Factor'                   

MOMENTUM / LONG_TERM / HEDGED / EXP_GDP_rate / REER


In [81]:
### TEMP



65

In [110]:
### TEMP

ser_iter_factor_std.loc[str_test_date, list_test_country, All]
#ser_iter_factor.loc[str_test_date, list_test_country, All]

Date        Country  Market
1999-05-31  RU       EM        1.364668
Name: Factor, dtype: float64