In [2]:
### EER FACTOR DAILY GENERATOR (SHOULD BE IGNORED)

In [3]:
### MODULES IMPORT (PART OF THE PRODUCT CODE)

import pandas as pd
import numpy as np
from datetime import date, datetime
import math

In [4]:
### INTERNAL PARAMETERS INITIALIZATION (SHOULD BE IGNORED)

### Universe path:
str_path_universe = 'Data_Files/Source_Files/acadian_universe.xlsx'
### Bloomberg structured data extraction keys:
str_path_bb_hdf = 'Data_Files/Source_Files/Bloomberg_prepared.h5'
str_key_fx_country = 'bb_fx_country'
str_key_fx_demeaned = 'bb_fx_demeaned'
str_key_mcap = 'bb_mcap'
str_key_reer = 'bb_reer'
str_key_neer = 'bb_neer'
str_key_reer_sourced = 'bb_reer_sourced'
str_key_neer_sourced = 'bb_neer_sourced'
str_key_export_monthly = 'bb_export'
str_key_gdp = 'bb_gdp'
str_key_cpi = 'bb_cpi'
### General daily-mone ranges parameters:
str_source_date_start = '1992-01-01' ### Start date for source vectors
str_measure_date_start = '1996-08-01' ### Start date for efficacy measures
str_ison_date_start = '1994-01-31' ### Start date for ISON Universe
str_measure_date_end = '2020-08-31' ### End date for efficacy measures
idx_source_date_range = pd.date_range(str_source_date_start, str_measure_date_end, freq = 'B') ### Range for source data filtering
idx_test_date_range = pd.date_range(str_ison_date_start, str_measure_date_end, freq = 'B') ### Range for source data filtering
idx_factor_date_range = pd.date_range(str_source_date_start, str_measure_date_end, freq = 'BM') ### Range for factor data filtering
idx_measure_date_range = pd.date_range(str_measure_date_start, str_measure_date_end, freq = 'BM') ### Range for measures calculation

In [5]:
### GENERAL PARAMETERS INITIALIZATION (PART OF THE PRODUCT CODE)

### Common constants:
All = slice(None)

### Standartization parameters:
list_truncate = [2.5, 2.0] ### Standartization boundaries
bool_within_market = True ### Standartization option

### ISON filtering options:
list_ison = ['DM', 'EM', 'FM'] ### Regions filter to drop NaN region values
list_filter = ['DM', 'EM', 'FM'] ### Additional regions filter
list_countries_to_exclude = ['VE'] ### Countries not to play the game

### Export annualizing options (for interaction variable calculation):
int_rolling_exp_max = 12 ### Rolling window length, months
int_rolling_exp_min = int_rolling_exp_max // 2 ### Minimal rolling window length, months
### Lag shifts (for interaction variable calculation):
int_gdp_lag = 3 ### Lag for GDP, months
int_export_lag = 1 ### Lag for MA12 Annualized Export, months
### General interaction variable calculation options:
int_concept_divider = 1000 ### Divider to equalize Export and GDP scales
int_concept_pctile_top = 0.75 ### Maximal prctile to winsorize log(1 + EXP/GDP) vector
int_prctile_scale = 4 ### Ranking scale for interaction variable

### Export adjustment parameters (for factor source tuning):
int_season_adj_ma = 12 ### Moving average length for seasonal adjustment, months
int_season_adj_shift = math.ceil(int_season_adj_ma / 2) ### Moving average shift for seasonal adjustment, months
int_cpi_lag = 1 ### Lag for CPI, months
### FX Rate parameters (for factor source tuning):
list_extreme_fx_ret = [-0.5, 2.0] ### Boundaries to avoid denomination distortions
### General source parameters:
int_eer_fill_limit = 22 * 3 ### Days for forward fill sources inside country vectors

### General factor calculation parameters:
int_mom_length = 5 ### Years of momentum vector
### Half-life period, months:
dict_mom_hl = {} 
dict_mom_hl['LONG_TERM'] = 24
dict_mom_hl['SHORT_TERM'] = 3
### Minimal values number, months:
dict_mom_min = {} 
dict_mom_min['LONG_TERM'] = dict_mom_hl['LONG_TERM']
dict_mom_min['SHORT_TERM'] = dict_mom_hl['SHORT_TERM']
### Factors options:
dict_combinations = {}
dict_combinations['LONG_TERM_EER'] = ('LONG_TERM', 'REER')
dict_combinations['SHORT_TERM_MIXED'] = ('SHORT_TERM', 'MIXED')
dict_combinations['LONG_TERM_EXPORT'] = ('LONG_TERM', 'EXPORT')
### Factor averaging weights:
dict_factors_weights = {}
dict_factors_weights['LONG_TERM_EER'] = 1.0
dict_factors_weights['SHORT_TERM_MIXED'] = 1.0
dict_factors_weights['LONG_TERM_EXPORT'] = 1.0
### Factors signs:
dict_factors_signs = {}
dict_factors_signs['LONG_TERM_EER'] = -1.0
dict_factors_signs['SHORT_TERM_MIXED'] = -1.0
dict_factors_signs['LONG_TERM_EXPORT'] = 1.0
### Periods lengths for different frequencies:
ser_work_periods = pd.Series(1 , index = pd.MultiIndex.from_product([['Year', 'Month'], ['Y', 'M', 'D']], names = ['Period', 'Frequency']))
ser_work_periods['Year', 'M'] = 12
ser_work_periods['Year', 'D'] = 260
ser_work_periods['Month', 'Y'] = 0
ser_work_periods['Month', 'D'] = 22
flo_exp_weight_month = ser_work_periods['Year', 'D'] / ser_work_periods['Year', 'M'] ### Day in BMonth number for specific exponential weight calculation
### Standalone factors weights:
list_static_weights = [2.0, 1.0, 1.0]

In [6]:
### DEFINING MATLAB STYLE PRCTILE (PART OF THE PRODUCT CODE)

def prctile_matlab(ser_to_perc, p):
    ### Sorted list preparing:
    list_to_perc = ser_to_perc.dropna().values
    list_sorted = np.sort(list_to_perc)
    ### Length calculating:
    num_len = len(list_to_perc)    
    ### Prctile calculating:
    num_result = np.interp(np.array(p), np.linspace(1 / (2 * num_len), (2 * num_len - 1) / (2 * num_len), num_len), list_sorted)
    ### Results output:
    return num_result

In [7]:
### DEFINING EXPONENTIAL WEIGHT (PART OF THE PRODUCT CODE)

def exp_weight_single(halflife_len = 3, num_element = 0):
    ### Weight calculating:
    num_period_factor = math.exp(math.log(0.5) / round(halflife_len))
    num_weight = np.exp(math.log(num_period_factor) * num_element)
    ### Result output:
    return num_weight

In [8]:
### DEFINING GEOMETRICAL WEIGHT (PART OF THE PRODUCT CODE)

def geom_weight_single(flo_ratio, flo_factor = 1, num_element = 0):
    ### Results output:
    return flo_factor * (flo_ratio ** num_element)

In [9]:
### DEFINING WEIGHTED AVERAGE (PART OF THE PRODUCT CODE)

def weighted_average(ser_data, ser_weight = False, int_min_count = 0):
    ### Default output:
    num_result = np.NaN
    ### Checking for data presence:
    if (ser_data.count() > int_min_count):       
        ### Checking for weights dataset:
        if isinstance(ser_weight, bool):
            ### Calculating of simple average:
            num_result = np.nanmean(ser_data.values)
        else:
            ### Weights filtering:
            list_weight = ser_weight[ser_data.dropna().index].values
            ### Checking for weights presence:
            if np.nansum(list_weight):
                ### Data filtering:
                list_data = ser_data.dropna().values
                ### Weighted average calculating:
                num_result = np.nansum(list_data * list_weight) / np.nansum(list_weight)
    ### Results output:
    return num_result

In [10]:
### DEFINING WEIGHTED AVERAGE FOR DATAFRAME COLUMNS (PART OF THE PRODUCT CODE)

def columns_average(df_series, list_weights = False):
    ### Equal weights list creating:
    if isinstance(list_weights, bool):
        list_weights = [1] * len(df_series.columns)
    ### Dataframe of weights initialising:
    df_weights = pd.DataFrame([list_weights] * len(df_series.index), index = df_series.index, columns = df_series.columns)
    ### Zeroing weights for NaN values:
    for iter_col in df_weights.columns:
        df_weights.loc[df_series[iter_col].isna(), iter_col] = 0
    ### Weighted mean calulating:
    df_means = (df_series * df_weights).sum(axis = 1) / df_weights.sum(axis = 1)    
    ### Results output:
    return df_means

In [11]:
### DEFINING MEAN MOMENTUM FUNCTION (PART OF THE PRODUCT CODE)

def rolling_cond_weighted_mean(ser_country_matrix, ser_full_source, int_mean_win, int_mean_min, list_weight = False):
    ### Defining conditional average calculator:
    def conditional_average(ser_source, list_weight, int_min_count = 0, ser_condition = False):
        ### Weight setting
        ser_weight = pd.Series(list_weight[ : len(ser_source.index)], ser_source.index)
        ### Results output:
        return weighted_average(ser_source, ser_weight, int_min_count)    
    ### Country saving:
    str_country = ser_country_matrix.index[0][1]
    ### Checking for country presence in source vector:
    if (str_country in ser_full_source.index.get_level_values(1)):
        ### Filtering country vector from source:
        ser_country_source = ser_full_source.loc[All, str_country]
        ### :
        for iter_bm_date in ser_country_matrix.index.get_level_values(0):
            try:
                ### Defining monthend date number in source country vector:
                int_idx_num = ser_country_source.index.get_loc(iter_bm_date)
                ### Creating vectors for numerator and denominator means calculation:
                ser_rolled_source = ser_country_source.iloc[max((int_idx_num - int_mean_win + 1), 0) : int_idx_num + 1]
                if not isinstance(ser_full_cond, bool):
                    ser_rolled_cond = ser_country_cond.loc[ser_rolled_source.index]
                else:
                    ser_rolled_cond = False
                ### Action for MatLab compatibility:
                ser_rolled_source.iloc[0] = np.NaN
                ### Simple mean calculation:
                if isinstance(list_weight, bool):
                    ser_country_matrix.loc[iter_bm_date, str_country] = weighted_average(ser_rolled_source, False, int_mean_min)
                else:
                    ### Weighted mean calculation:
                    ser_country_matrix.loc[iter_bm_date, str_country] = conditional_average(ser_rolled_source, list_weight, int_mean_min, ser_rolled_cond)
            except KeyError:
                pass
    ### Resulting vector output:
    return ser_country_matrix

In [12]:
### DEFINING MULTI-STEP STANDARTIZATION FOR SEPARATE SERIES (PART OF THE PRODUCT CODE)

def multistep_standartize(ser_data_source, arr_truncate, ser_weight = False, reuse_outliers = False, center_result = True, full_result = False):  
    ### Arrays of iterations properties:
    arr_mean = []
    arr_std = []
    ### Adding equal weights, when weights are absent:
    if isinstance(ser_weight, bool):
        ser_weight = pd.Series(1, index = ser_data_source.index)
        ser_weight.name = 'Weight'    
    ### Workhorse and resulting data vectors initialising:
    ser_data_iter = ser_data_source.dropna()
    ser_weight_iter = ser_weight.copy()
    ser_data_full = pd.Series(np.NaN, index = ser_data_iter.index)
    ### Looping by boundaries array:
    for num_bound_iter in arr_truncate:
        ### Properties calculating and saving:
        num_mean_iter = weighted_average(ser_data_iter, ser_weight_iter)
        num_std_iter = ser_data_iter.std()
        arr_mean.append(num_mean_iter)
        arr_std.append(num_std_iter)
        ser_data_iter = (ser_data_iter - num_mean_iter) / num_std_iter       
        ### Standartizing:
        if reuse_outliers:
            ser_data_iter[ser_data_iter.abs() >= num_bound_iter] = np.sign(ser_data_iter) * num_bound_iter 
        else:
            ### Saving to result and excluding from further calculations truncated values:             
            ser_data_full.where(ser_data_iter.abs() < num_bound_iter, np.sign(ser_data_iter) * num_bound_iter, inplace = True)
            ser_data_iter = ser_data_iter[ser_data_iter.abs() < num_bound_iter]           
    ### Aggregating result:
    if (reuse_outliers):
        ser_data_full = ser_data_iter
    else:     
        ser_data_full[ser_data_iter.index] = ser_data_iter
    ### Centering result:
    if (center_result):
        ser_result = ser_data_full - weighted_average(ser_data_full, ser_weight) 
    else:
        ser_result = ser_data_full    
    ### Result output:
    ser_result.name = str(ser_data_source.name) + '_standartized'
    if (full_result):
        return (ser_result, arr_mean, arr_std)
    else:
        return ser_result

In [13]:
### DEFINING MULTI-STEP STANDARTIZATION BY MARKET FOR CROSS-SECTION (PART OF THE PRODUCT CODE)

def ison_standartize(ser_to_manage, arr_truncate, ser_weight = False, reuse_outliers = False, center_result = True, full_result = False, within_market = False):
    ### Multi-step standartizing:
    if (within_market):
    ### Within market standartizing:
        ser_result = ser_to_manage.groupby(by = 'Market', group_keys = False).apply(multistep_standartize, arr_truncate, ser_weight, 
                                                                                                  reuse_outliers, center_result, full_result)
    else:
    ### Full universe standartizing:
        ser_result = multistep_standartize(ser_to_manage, arr_truncate, ser_weight, reuse_outliers, center_result, full_result)
    ### Results output:
    return ser_result

In [14]:
# DEFINING MULTI-STEP STANDARTIZATION BY MARKET FOR FULL FACTOR STACK (PART OF THE PRODUCT CODE)

def single_factor_standartize_daily(ser_factor, arr_truncate, ser_weight = False, reuse_outliers = False, center_result = True, within_market = False):
    ### Weights preparing:
    if isinstance(ser_weight, bool):
        ser_weight = pd.Series(1, index = ser_factor.index)
        ser_weight.name = 'Weight'
    ### Multi-step standartizing:        
    df_factor = ser_factor.to_frame().join(ser_weight, how = 'left')
    df_factor.columns = ['Factor', 'Weight']
    ser_result = ison_standartize(df_factor['Factor'], arr_truncate, df_factor['Weight'], reuse_outliers, center_result, False, within_market)
    ### Results output:
    ser_result.name = ser_factor.name
    return ser_result   

In [15]:
### DEFINING EXTRACTION UNIVERSE DATA FROM MS EXCEL SOURCE (SHOULD BE IGNORED)

def ison_membership_converting(str_path_universe, date_end, bool_daily = False, int_backfill_months = 0):
    ### Defining business-month-end reindexation on country level:
    def country_modify(ser_raw_country, date_end):
        ser_res_country = ser_raw_country.droplevel(0).resample('MS').last().resample('BM').last()
        range_country = pd.date_range(ser_res_country.index[0], date_end, freq = 'BM')
        return ser_res_country.reindex(range_country).ffill()
    ### Markets encoding table:
    dict_markets = {50 : 'DM', 57 : 'EM', 504 : 'FM', 0: np.NaN}     
    ### Loading source file:
    df_raw_universe = pd.read_excel(io = str_path_universe, sheet_name = 0, header = 0, parse_dates = True, index_col = [0, 1],
                                 na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                             '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null'], keep_default_na = False)
    ### Converting source file:
    df_raw_universe.index.names = ['Country', 'Date']
    ser_raw_universe = df_raw_universe['Region']
    ser_raw_universe.fillna(0, inplace = True)
    ser_raw_universe.name = 'Market'
    ### By country reindexation and translation:
    ser_res_universe = ser_raw_universe.groupby('Country').apply(country_modify, date_end)
    ser_res_universe.index.names = ['Country', 'Date']
    ser_res_universe = ser_res_universe.replace(dict_markets).reorder_levels([1, 0]).sort_index() 
    ### Expanding membership for primary regions members by backfilling:
    if int_backfill_months:
        ### List of regions:
        list_region = list(ser_res_universe.dropna().unique())
        ### Initialising of collection of series with backfilled data for each region:
        list_ison_backfill = []
        ### Regions looping:
        for iter_region in list_region:
            ### Defining start of region date:
            date_first_valid = ser_res_universe.loc[ser_res_universe == iter_region].first_valid_index()[0]
            ### Creating dates index to backfilling:
            idx_date_backfill = pd.date_range(end = date_first_valid, periods = int_backfill_months + 1, freq = 'BM')[: -1]
            ### Creating primary countries index to backfilling:            
            idx_region_backfill = ser_res_universe.loc[ser_res_universe == iter_region].loc[date_first_valid, All].index.get_level_values('Country')
            ### Creating full index:
            idx_ison_backfill = pd.MultiIndex.from_product([idx_date_backfill, idx_region_backfill])
            ### Series with backfilled data:
            list_ison_backfill.append(pd.Series(iter_region, index = idx_ison_backfill))
        ### Combination of backfilled series and original ISON data:    
        ser_res_universe = ser_res_universe.combine_first(pd.concat(list_ison_backfill, axis = 0)).sort_index()  
        ser_res_universe.index.names = ['Date', 'Country']
    ### Converting to daily frequency:
    if bool_daily:
        ser_res_universe = ser_res_universe.reset_index('Country').groupby('Country').resample('B').ffill()['Market'].swaplevel().sort_index()    
    ### Results output:
    ser_res_universe.name = 'Market'
    return ser_res_universe

In [16]:
### DATA LOADING (SHOULD BE ADOPTED)

ser_fx_country = pd.read_hdf(str_path_bb_hdf, key = str_key_fx_country) ### FX Rates to use in Export source denomination to domestic currency
ser_fx_rate_demeaned = pd.read_hdf(str_path_bb_hdf, key = str_key_fx_demeaned) ### Demeaned FX Rates to use as a source in the Short-Term factor
ser_reer = pd.read_hdf(str_path_bb_hdf, key = str_key_reer_sourced) ### Real Effective Exchange Rate to use as a source in the Short-Term factor
ser_neer = pd.read_hdf(str_path_bb_hdf, key = str_key_neer_sourced) ### Nominal Effective Exchange Rate to use as a source in the Short-Term factor
ser_gdp = pd.read_hdf(str_path_bb_hdf, key = str_key_gdp) ### GDP to use in Interaction variable calculation
ser_export = pd.read_hdf(str_path_bb_hdf, key = str_key_export_monthly)
ser_cpi = pd.read_hdf(str_path_bb_hdf, key = str_key_cpi) ### GDP to use in Interaction variable calculation
ser_ison = ison_membership_converting(str_path_universe, datetime.strptime(str_measure_date_end, '%Y-%m-%d')) ### ISON universe, end-of-bus-month vector
ser_ison_daily = ison_membership_converting(str_path_universe, datetime.strptime(str_measure_date_end, '%Y-%m-%d'), bool_daily = True) ### ISON universe, bus-daily vector

In [17]:
### MAIN SCRIPT: FX RATES CORRECTING BY CLEARING OFF DENOMINATION DISTRORTIONS (SHOULD BE ADOPTED)

### Deleting extreme FX Rates changes, caused by currency denomination:
def kill_extreme_fx(ser_country_rate, list_extreme_fx_ret):
    ### Searching for first rate:
    idx_first_valid = ser_country_rate.first_valid_index()
    ### Calculating FX Returns:
    ser_country_ret = ser_country_rate.diff() / ser_country_rate.shift()
    ### Clearing extreme returns:
    ser_country_ret.loc[(ser_country_ret < list_extreme_fx_ret[0]) | (ser_country_ret > list_extreme_fx_ret[1])] = 0.0
    ### Recovering FX Rates from FX Returns:
    ser_country_res = (1 + ser_country_ret)
    ser_country_res.loc[idx_first_valid] = ser_country_rate.loc[idx_first_valid]
    ser_country_res = ser_country_res.cumprod()
    ### Results output:
    return ser_country_res
### FX Rates clearing from distortions:
ser_fx_rate_cleared = ser_fx_country.groupby('Country').ffill()
ser_fx_rate_cleared = ser_fx_rate_cleared.groupby('Country').apply(kill_extreme_fx, list_extreme_fx_ret)

In [18]:
### MAIN SCRIPT: EXPORT SEASONALITY ADJUSTMENT (SHOULD BE ADOPTED)

### Export data filling:
ser_export_monthly = ser_export.groupby('Country').ffill()
### Year length moving average calculating:
ser_ma_centered = ser_export_monthly.groupby('Country', group_keys = False).rolling(window = int_season_adj_ma, min_periods = int_season_adj_shift).mean()
### Year length moving average shifting:
ser_ma_centered = ser_ma_centered.groupby('Country').shift(-int_season_adj_shift)
ser_ma_centered.name = 'MA Centered'
### Vectors concatenation:
df_export_adjustment = pd.concat([ser_export_monthly, ser_ma_centered], axis = 1)
### Export / MA Ratio calculating:
df_export_adjustment['Ratio'] = df_export_adjustment['Export'] / df_export_adjustment['MA Centered']
### Adding month number to index:
df_export_adjustment['Month'] = df_export_adjustment.index.get_level_values('Date').month
df_export_adjustment.set_index(['Month'], append = True, inplace = True)
### Month number Ratio median calculating as Scale:
ser_ratio_median = df_export_adjustment['Ratio'].groupby(['Country', 'Month']).median()
ser_ratio_median.name = 'Scale'
### Scale modification:
ser_ratio_median = ser_ratio_median / ser_ratio_median.groupby('Country').mean()
### Adding Scale to data table:
df_export_adjustment = df_export_adjustment.join(ser_ratio_median, on = ['Country', 'Month'], how = 'left', sort = True).reset_index('Month', drop = True)
### Seasonally adjusted Export calculating:
ser_export_adjusted = (df_export_adjustment['Export'] / df_export_adjustment['Scale']).sort_index()

In [19]:
### MAIN SCRIPT: FACTOR SOURCES ADOPTING (SHOULD BE ADOPTED)

### Sources options preparing:
dict_ser_eer = {}
### Source vectors forward filling and reindexing:
ser_reer_source = ser_reer.droplevel('Source').unstack('Country').reindex(idx_source_date_range).ffill(limit = int_eer_fill_limit).stack('Country').sort_index()
ser_reer_source.index.names = ['Date', 'Country']
ser_neer_source = ser_neer.droplevel('Source').unstack('Country').reindex(idx_source_date_range).ffill(limit = int_eer_fill_limit).stack('Country').sort_index()
ser_neer_source.index.names = ['Date', 'Country']
ser_fx_source = ser_fx_rate_demeaned.unstack('Country').reindex(idx_source_date_range).ffill(limit = int_eer_fill_limit).stack('Country').sort_index()
ser_fx_source.index.names = ['Date', 'Country']
ser_export_source = ser_export_adjusted.unstack('Country').reindex(idx_source_date_range).ffill(limit = int_eer_fill_limit).stack('Country').sort_index()
ser_export_source.index.names = ['Date', 'Country']
### REER vector creating:
dict_ser_eer['REER'] = ser_reer_source
### MIXED vector creating:
### Selecting all ISON countries:
set_ison = set(ser_ison.dropna().index.get_level_values('Country').unique())
### Selecting all REER countries:
set_reer_all = set(ser_reer.dropna().index.get_level_values('Country').unique())
### Selecting all NEER countries:
set_neer_all = set(ser_neer.dropna().index.get_level_values('Country').unique())
### Selecting countries, where REER has monthly frequency:
set_reer_monthly = set(ser_reer.loc[All, All, ['IMF', 'BIS']].index.get_level_values(1).unique())
### Defining countries from REER to participate in NEER source:
set_reer_st = set_reer_all - set_reer_monthly
### Defining countries from NEER to participate in NEER source:
ser_neer_st = set_reer_monthly & set_neer_all
### Defining rest of countries to participate in NEER source from FX rates:
set_fx_st = set_ison - (set_reer_st | ser_neer_st)
### Converting sets to lists:
list_reer_st = sorted(list(set_reer_st))
list_neer_st = sorted(list(ser_neer_st))
list_fx_st = sorted(list(set_fx_st))
dict_ser_eer['MIXED'] = pd.concat([ser_reer_source.loc[All, list_reer_st], ser_neer_source.loc[All, list_neer_st], ser_fx_source.loc[All, list_fx_st]]).sort_index()
### EXPORT vector creating:  
### Selecting all ISON countries:    
set_ison = set(ser_ison.dropna().index.get_level_values('Country').unique())
### Monthly CPI calculating:
ser_cpi_monthly = ser_cpi.groupby('Country').transform(lambda ser_country: (1 + ser_country / 100) ** (1 /12) - 1).groupby('Country').ffill()
### CPI countries:
set_cpi_export = set(ser_cpi_monthly.dropna().index.get_level_values('Country').unique())
### FX countries:
set_fx_export = set(ser_fx_rate_cleared.dropna().index.get_level_values('Country').unique())
### USD Export countries:
set_usd_export = set_ison - (set_cpi_export & set_fx_export)
### USD Export vector:
ser_export_usd = ser_export_source.loc[All, set_usd_export]
### CPI cumprod monthly shifted:
ser_cpi_cumprod = ser_cpi_monthly.groupby('Country').transform(lambda ser_country: (1 + ser_country).cumprod()).groupby('Country').shift(int_cpi_lag)
### CPI cumprod daily:    
ser_cpi_cumprod = ser_cpi_cumprod.unstack('Country').reindex(idx_source_date_range).ffill(limit = int_eer_fill_limit).stack('Country').sort_index()
ser_cpi_cumprod.index.names = ['Date', 'Country']
### Local Export vector:
ser_export_loc = (ser_export_source / ser_fx_rate_cleared / ser_cpi_cumprod).loc[All, set_ison - set_usd_export]
### Combined Export vector:
ser_export_all = pd.concat([ser_export_usd, ser_export_loc]).sort_index() 
list_usd_export = sorted(set_usd_export)
list_loc_export = sorted(set_ison - set_usd_export)   
dict_ser_eer['EXPORT'] = ser_export_all   

In [20]:
### MAIN SCRIPT: INTERACTION VARIABLE PREPARING (SHOULD BE ADOPTED)

### Defining the way to rank the interaction variable:
def value_to_rank(ser_region):
    ser_cutted = np.maximum(0, (np.minimum(prctile_matlab(ser_region, int_concept_pctile_top), ser_region)))
    ser_cutted.loc[ser_cutted.isna()] = prctile_matlab(ser_region, 0.50)    
    ser_result = np.maximum(0, (ser_cutted - ser_cutted.min()) / (ser_cutted.max() - ser_cutted.min()) * int_prctile_scale) + 1
    return ser_result

### Interaction variable source vectors converting:
ser_export_ma12_annual = ser_export.groupby('Country', group_keys = False).rolling(int_rolling_exp_max, int_rolling_exp_min).mean() * int_rolling_exp_max
ser_export_ma12_annual = ser_export_ma12_annual.groupby(['Country']).fillna(method = 'ffill').groupby(['Country']).fillna(method = 'bfill')
### Interaction variable data shifting:
ser_gdp_concept = ser_gdp.groupby('Country').fillna(method = 'ffill').groupby('Country').fillna(method = 'bfill').groupby('Country').shift(int_gdp_lag)
ser_export_concept = ser_export_ma12_annual.shift(int_export_lag)
### Interaction variable calculating:
ser_concept_raw = ser_export_concept / ser_gdp_concept / int_concept_divider
### Interaction variable adjusting:
ser_concept_raw.name = 'Multiplicator'  
### Vector ISONing:
ser_concept_raw = ser_concept_raw.to_frame().join(ser_ison, how = 'right').set_index('Market', append = True).squeeze()   
### Logarithmization performing:
ser_concept_raw.loc[ser_concept_raw <= -1] = -0.99    
ser_concept_raw = np.log(1 + ser_concept_raw)
### Interaction variable ranking:
ser_concept_res = ser_concept_raw.groupby(['Date', 'Market']).apply(value_to_rank).sort_index()

In [33]:
### TEMP

### Standalone factors collection:
dict_iter_factor = {}
iter_date = idx_source_date_range[580]
#iter_date = idx_source_date_range[586]

print(iter_date)
date_iter_bm = iter_date + 1 * pd.tseries.offsets.BDay() + 0 * pd.tseries.offsets.BMonthEnd() - 1 * pd.tseries.offsets.BMonthEnd()
ser_iter_date_concept = ser_concept_res.loc[date_iter_bm, All, All]
ser_iter_date_concept = pd.concat({iter_date: ser_iter_date_concept.droplevel('Date')}, names = ['Date'])
### ISON countries list
idx_universe = ser_ison.index.get_level_values(1).unique()
### Factors looping:
for iter_factor in dict_combinations:
    ### Parameters loading:
    iter_term = dict_combinations[iter_factor][0]
    iter_eer = dict_combinations[iter_factor][1]
    print(f'{iter_term} / {iter_eer}')  
    ### Momentum parameters:
    int_mom_hl = dict_mom_hl[iter_term] * flo_exp_weight_month ### Not rounding here
    int_mom_win = int_mom_length * ser_work_periods['Year', 'D']
    int_mom_min = round(dict_mom_min[iter_term] * flo_exp_weight_month)
    ### Weights array:
    list_weight = list(map(lambda iter_num: exp_weight_single(int_mom_hl, iter_num), range(int_mom_win)))[::-1]    
    ### Date interval for factor calculating:
    idx_iter_date_range = pd.date_range(end = iter_date, periods = int_mom_win, freq = 'B')    
    ### Source loading:
    ser_iter_eer = dict_ser_eer[iter_eer].loc[idx_iter_date_range, All]    
    ### Source performing:
    ser_iter_delta = ser_iter_eer.groupby('Country').diff() / ser_iter_eer.groupby('Country').shift()   
    ser_iter_delta = ser_iter_delta.replace([np.inf, -np.inf], np.NaN)    
    ### Factor vector creating:
    ser_iter_factor = pd.Series(index = pd.MultiIndex.from_product([[iter_date], idx_universe])).sort_index()
    ser_iter_factor.index.set_names(['Date', 'Country'], inplace = True)    
    ### Momentum factor calculation:
    ser_iter_factor = ser_iter_factor.groupby('Country').transform(rolling_cond_weighted_mean, ser_iter_delta, int_mom_win, int_mom_min, list_weight)
    ### Factor ISONing:
    ser_iter_factor = ser_iter_factor.to_frame().join(ser_ison_daily, how = 'left').set_index('Market', append = True).squeeze()
    ser_iter_factor.name = 'Factor' 
    ### Regions clearing:
    ser_iter_factor = ser_iter_factor.loc[All, All, list_ison]
    ### Countries filtering:
    ser_iter_factor = ser_iter_factor.drop(list_countries_to_exclude, level = 'Country')     
    ### Standalone factor standartizing:
    ser_iter_factor_std = dict_factors_signs[iter_factor] * single_factor_standartize_daily(ser_iter_factor, list_truncate, within_market = bool_within_market)
    ser_iter_factor_std.name = 'Factor'        
    ### Interaction variable applying:
    ser_iter_multiplied = ser_iter_factor_std * ser_iter_date_concept
    ### Multiplied factor restandartizing:
    ser_iter_multiplied_std = single_factor_standartize_daily(ser_iter_multiplied, list_truncate, within_market = bool_within_market)
    ser_iter_multiplied_std.name = 'Factor'    
    ### Saving result to collection:
    dict_iter_factor[iter_factor] = ser_iter_multiplied_std
### Concatenating factors for averaaing:
df_weighted_factor = pd.concat(dict_iter_factor, axis = 1)
### Factors combining:
ser_combo_factor = columns_average(df_weighted_factor, list_static_weights)
### Combined factor standartizing:
ser_combo_factor_std = single_factor_standartize_daily(ser_combo_factor, list_truncate, within_market = bool_within_market)

1994-03-23 00:00:00
LONG_TERM / REER
SHORT_TERM / MIXED
LONG_TERM / EXPORT


In [21]:
### DEFINING FACTOR CREATING FUNCTION (SHOULD BE ADOPTED)

def get_currency_factor(iter_date):
    ### Standalone factors collection:
    dict_iter_factor = {}
    date_iter_bm = iter_date + 1 * pd.tseries.offsets.BDay() + 0 * pd.tseries.offsets.BMonthEnd() - 1 * pd.tseries.offsets.BMonthEnd()
    ser_iter_date_concept = ser_concept_res.loc[date_iter_bm, All, All]
    ser_iter_date_concept = pd.concat({iter_date: ser_iter_date_concept.droplevel('Date')}, names = ['Date'])
    ### ISON countries list
    idx_universe = ser_ison.index.get_level_values(1).unique()
    ### Factors looping:
    for iter_factor in dict_combinations:
        ### Parameters loading:
        iter_term = dict_combinations[iter_factor][0]
        iter_eer = dict_combinations[iter_factor][1]
        ### Momentum parameters:
        int_mom_hl = dict_mom_hl[iter_term] * flo_exp_weight_month ### Not rounding here
        int_mom_win = int_mom_length * ser_work_periods['Year', 'D']
        int_mom_min = round(dict_mom_min[iter_term] * flo_exp_weight_month)
        ### Weights array:
        list_weight = list(map(lambda iter_num: exp_weight_single(int_mom_hl, iter_num), range(int_mom_win)))[::-1]    
        ### Date interval for factor calculating:
        idx_iter_date_range = pd.date_range(end = iter_date, periods = int_mom_win, freq = 'B')    
        ### Source loading:
        ser_iter_eer = dict_ser_eer[iter_eer].loc[idx_iter_date_range, All]    
        ### Source performing:
        ser_iter_delta = ser_iter_eer.groupby('Country').diff() / ser_iter_eer.groupby('Country').shift()   
        ser_iter_delta = ser_iter_delta.replace([np.inf, -np.inf], np.NaN)    
        ### Factor vector creating:
        ser_iter_factor = pd.Series(index = pd.MultiIndex.from_product([[iter_date], idx_universe])).sort_index()
        ser_iter_factor.index.set_names(['Date', 'Country'], inplace = True)    
        ### Momentum factor calculation:
        ser_iter_factor = ser_iter_factor.groupby('Country').transform(rolling_cond_weighted_mean, ser_iter_delta, int_mom_win, int_mom_min, list_weight)
        ### Factor ISONing:
        ser_iter_factor = ser_iter_factor.to_frame().join(ser_ison_daily, how = 'left').set_index('Market', append = True).squeeze()
        ser_iter_factor.name = 'Factor' 
        ### Regions clearing:
        ser_iter_factor = ser_iter_factor.loc[All, All, list_ison]
        ### Countries filtering:
        ser_iter_factor = ser_iter_factor.drop(list_countries_to_exclude, level = 'Country')     
        ### Standalone factor standartizing:
        ser_iter_factor_std = dict_factors_signs[iter_factor] * single_factor_standartize_daily(ser_iter_factor, list_truncate, within_market = bool_within_market)
        ser_iter_factor_std.name = 'Factor'        
        ### Interaction variable applying:
        ser_iter_multiplied = ser_iter_factor_std * ser_iter_date_concept
        ### Multiplied factor restandartizing:
        ser_iter_multiplied_std = single_factor_standartize_daily(ser_iter_multiplied, list_truncate, within_market = bool_within_market)
        ser_iter_multiplied_std.name = 'Factor'    
        ### Saving result to collection:
        dict_iter_factor[iter_factor] = ser_iter_multiplied_std
    ### Concatenating factors for averaaing:
    df_weighted_factor = pd.concat(dict_iter_factor, axis = 1)
    ### Factors combining:
    ser_combo_factor = columns_average(df_weighted_factor, list_static_weights)
    ### Combined factor standartizing:
    ser_combo_factor_std = single_factor_standartize_daily(ser_combo_factor, list_truncate, within_market = bool_within_market)    
    ### Results output:
    return ser_combo_factor_std

In [26]:
### TESTING: PERFORMING FACTOR FOR DATE RANGE (SHOULD BE ADOPTED)

#iter_date = idx_source_date_range[580]
#iter_date = idx_source_date_range[586]
#print(iter_date)

### Local testing parameters:
int_interval = 100
dict_factor_by_date = {}
date_start = datetime.utcnow()
date_control = datetime.utcnow()
### Test performing:
print('Start time:', date_start)
for iter_num, iter_date in enumerate(idx_test_date_range[580 : 587]): # enumerate(idx_test_date_range): # 
    if not (divmod(iter_num, int_interval)[1]):
        if iter_num:
            print('Counter marker:', iter_num, '/', len(idx_test_date_range))
            timedelta_interval = datetime.utcnow() - date_control
            print('Time interval since last marker:', datetime.utcnow() - date_control)            
            print('Average interval for single date:', str(timedelta_interval / int_interval))
        date_control = datetime.utcnow()
    dict_factor_by_date[iter_date] = get_currency_factor(iter_date)
date_finish = datetime.utcnow()
print('Finish time:', date_finish)
print('Full interval:', date_finish - date_start)
print('Average interval for single date:', str((date_finish - date_start) / len(idx_test_date_range)))
ser_factor_full = pd.concat(dict_factor_by_date, axis = 0).droplevel(0)

Start time: 2021-01-20 10:32:12.515120
Finish time: 2021-01-20 10:32:36.778365
Full interval: 0:00:24.263245
Average interval for single date: 0:00:00.003498


In [27]:
### TEMP

ser_factor_full

Date        Country  Market
1996-04-22  AT       DM       -0.229720
            AU       DM       -1.370300
            BE       DM       -0.094994
            CA       DM        1.788951
            CH       DM       -0.389441
                                 ...   
1996-04-30  NO       DM        0.919579
            NZ       DM       -1.916358
            SE       DM       -1.654135
            SG       DM       -0.644592
            US       DM       -0.023909
Length: 147, dtype: float64

In [38]:
### TESTING: SAVING FACTOR TO COMPARE RESULTS (SHOULD BE IGNORED)

str_path_trans_hdf = 'Data_Files/Test_Files/EER_factors_transitional.h5'
str_key_trans_factor = 'trans_factor'
iter_term = 'TRIPLE'
iter_algo = 'COMBO'
iter_concept = 'COMBO'
iter_eer = 'COMBO'
iter_ret = 'COMBO'
iter_region = ['DM', 'EM', 'FM']
str_iter_key = '__'.join([iter_term, iter_algo, iter_concept, iter_eer, iter_ret]) 
ser_combo_research = pd.read_hdf(str_path_trans_hdf, key = str_key_trans_factor + '__' + str_iter_key).loc[All, All, iter_region]
ser_combo_test = (ser_factor_full.loc[idx_factor_date_range, All, All] - ser_combo_research).abs()
print('Errors sum:', ser_combo_test.sum())
print('Errors list:', ser_combo_test.loc[ser_combo_test > 0])
print('Count error:', (ser_factor_full.loc[idx_factor_date_range, All, All].count() - ser_combo_research.count()))

Errors sum: 0.0
Errors list: Series([], dtype: float64)
Count error: 0
