In [1]:
### EER FACTOR DAILY GENERATOR (TO BE IGNORED IN PRODUCT CODE)

In [2]:
### MODULES IMPORT (PART OF THE PRODUCT CODE)

import pandas as pd
import numpy as np
from datetime import date, datetime
import math

In [3]:
### INTERNAL PARAMETERS INITIALIZATION (TO BE IGNORED IN PRODUCT CODE)

import os ### To work with csv files

### Universe path:
str_path_universe = 'Data_Files/Source_Files/acadian_universe.xlsx'
### Bloomberg structured data extraction keys:
str_path_bb_hdf = 'Data_Files/Source_Files/Bloomberg_prepared.h5'
str_key_fx_country = 'bb_fx_country'
str_key_fx_demeaned = 'bb_fx_demeaned'
str_key_mcap = 'bb_mcap'
str_key_reer = 'bb_reer'
str_key_neer = 'bb_neer'
str_key_reer_sourced = 'bb_reer_sourced'
str_key_neer_sourced = 'bb_neer_sourced'
str_key_export_monthly = 'bb_export'
str_key_gdp = 'bb_gdp'
str_key_cpi = 'bb_cpi'
### General daily-mone ranges parameters:
str_source_date_start = '1992-01-01' ### Start date for source vectors
str_measure_date_start = '1996-08-01' ### Start date for efficacy measures
str_ison_date_start = '1994-01-31' ### Start date for ISON Universe
str_measure_date_end = '2020-08-31' ### End date for efficacy measures
idx_source_date_range = pd.date_range(str_source_date_start, str_measure_date_end, freq = 'B') ### Range for source data filtering
idx_test_monthly_date_range = pd.date_range(str_ison_date_start, str_measure_date_end, freq = 'BM') ### Range for source data filtering
idx_test_daily_date_range = pd.date_range(str_ison_date_start, str_measure_date_end, freq = 'B') ### Range for source data filtering
idx_factor_date_range = pd.date_range(str_source_date_start, str_measure_date_end, freq = 'BM') ### Range for factor data filtering
idx_measure_date_range = pd.date_range(str_measure_date_start, str_measure_date_end, freq = 'BM') ### Range for measures calculation
### Results saving:
str_lt_reer_raw_csv = 'Data_Files/Test_Files/acadian_mode_lt_reer_raw.csv'
str_lt_reer_std_xlsx = 'Data_Files/Test_Files/acadian_mode_lt_reer_std.xlsx'

In [4]:
### GENERAL PARAMETERS INITIALIZATION (PART OF THE PRODUCT CODE)

### Common constants:
All = slice(None)
date_source_start = pd.to_datetime('1992-01-01') ### Start date for source vectors

### Standartization parameters:
list_truncate = [2.5, 2.0] ### Standartization boundaries
bool_within_market = True ### Standartization option

### ISON filtering options:
list_ison = ['DM', 'EM', 'FM'] ### Regions filter to drop NaN region values
list_countries_to_exclude = ['VE'] ### Countries not to play the game

In [5]:
### DEFINING EXPONENTIAL WEIGHT (PART OF THE PRODUCT CODE)

def exp_weight_single(halflife_len = 3, num_element = 0):
    ### Weight calculating:
    num_period_factor = math.exp(math.log(0.5) / round(halflife_len))
    num_weight = math.exp(math.log(num_period_factor) * num_element)
    ### Result output:
    return num_weight

In [6]:
### DEFINING WEIGHTED AVERAGE (PART OF THE PRODUCT CODE)

def weighted_average(ser_data, ser_weight = False, int_min_count = 0):
    ### Default output:
    num_result = np.NaN
    ### Checking for data presence:
    if (ser_data.count() > int_min_count):       
        ### Checking for weights dataset:
        if isinstance(ser_weight, bool):
            ### Calculating of simple average:
            num_result = np.nanmean(ser_data.values)
        else:
            ### Weights filtering:
            list_weight = ser_weight[ser_data.dropna().index].values
            ### Checking for weights presence:
            if np.nansum(list_weight):
                ### Data filtering:
                list_data = ser_data.dropna().values
                ### Weighted average calculating:
                num_result = np.nansum(list_data * list_weight) / np.nansum(list_weight)
    ### Results output:
    return num_result

In [7]:
### DEFINING MULTI-STEP STANDARTIZATION FOR SEPARATE SERIES (TO BE IGNORED IN PRODUCT CODE)

def multistep_standartize(ser_data_source, arr_truncate, ser_weight = False, reuse_outliers = False, center_result = True, full_result = False):  
    ### Arrays of iterations properties:
    arr_mean = []
    arr_std = []
    ### Adding equal weights, when weights are absent:
    if isinstance(ser_weight, bool):
        ser_weight = pd.Series(1, index = ser_data_source.index)
        ser_weight.name = 'Weight'    
    ### Workhorse and resulting data vectors initialising:
    ser_data_iter = ser_data_source.dropna()
    ser_weight_iter = ser_weight.copy()
    ser_data_full = pd.Series(np.NaN, index = ser_data_iter.index)
    ### Looping by boundaries array:
    for num_bound_iter in arr_truncate:
        ### Properties calculating and saving:
        num_mean_iter = weighted_average(ser_data_iter, ser_weight_iter)
        num_std_iter = ser_data_iter.std()
        arr_mean.append(num_mean_iter)
        arr_std.append(num_std_iter)
        ser_data_iter = (ser_data_iter - num_mean_iter) / num_std_iter       
        ### Standartizing:
        if reuse_outliers:
            ser_data_iter[ser_data_iter.abs() >= num_bound_iter] = np.sign(ser_data_iter) * num_bound_iter 
        else:
            ### Saving to result and excluding from further calculations truncated values:             
            ser_data_full.where(ser_data_iter.abs() < num_bound_iter, np.sign(ser_data_iter) * num_bound_iter, inplace = True)
            ser_data_iter = ser_data_iter[ser_data_iter.abs() < num_bound_iter]           
    ### Aggregating result:
    if (reuse_outliers):
        ser_data_full = ser_data_iter
    else:     
        ser_data_full[ser_data_iter.index] = ser_data_iter
    ### Centering result:
    if (center_result):
        ser_result = ser_data_full - weighted_average(ser_data_full, ser_weight) 
    else:
        ser_result = ser_data_full    
    ### Result output:
    ser_result.name = str(ser_data_source.name) + '_standartized'
    if (full_result):
        return (ser_result, arr_mean, arr_std)
    else:
        return ser_result

In [8]:
### DEFINING MULTI-STEP STANDARTIZATION BY MARKET FOR CROSS-SECTION (TO BE IGNORED IN PRODUCT CODE)

def ison_standartize(ser_to_manage, arr_truncate, ser_weight = False, reuse_outliers = False, center_result = True, full_result = False, within_market = False):
    ### Multi-step standartizing:
    if (within_market):
    ### Within market standartizing:
        ser_result = ser_to_manage.groupby(by = 'Market', group_keys = False).apply(multistep_standartize, arr_truncate, ser_weight, 
                                                                                                  reuse_outliers, center_result, full_result)
    else:
    ### Full universe standartizing:
        ser_result = multistep_standartize(ser_to_manage, arr_truncate, ser_weight, reuse_outliers, center_result, full_result)
    ### Results output:
    return ser_result

In [9]:
# DEFINING MULTI-STEP STANDARTIZATION BY MARKET FOR FULL FACTOR STACK (TO BE IGNORED IN PRODUCT CODE)

def single_factor_standartize_daily(ser_factor, arr_truncate, ser_weight = False, reuse_outliers = False, center_result = True, within_market = False):
    ### Weights preparing:
    if isinstance(ser_weight, bool):
        ser_weight = pd.Series(1, index = ser_factor.index)
        ser_weight.name = 'Weight'
    ### Multi-step standartizing:        
    df_factor = ser_factor.to_frame().join(ser_weight, how = 'left')
    df_factor.columns = ['Factor', 'Weight']
    ser_result = ison_standartize(df_factor['Factor'], arr_truncate, df_factor['Weight'], reuse_outliers, center_result, False, within_market)
    ### Results output:
    ser_result.name = ser_factor.name
    return ser_result   

In [10]:
### DEFINING EXTRACTION UNIVERSE DATA FROM MS EXCEL SOURCE (TO BE IGNORED IN PRODUCT CODE)

def ison_membership_converting(str_path_universe, date_end, bool_daily = False, int_backfill_months = 0):
    ### Defining business-month-end reindexation on country level:
    def country_modify(ser_raw_country, date_end):
        ser_res_country = ser_raw_country.droplevel(0).resample('MS').last().resample('BM').last()
        range_country = pd.date_range(ser_res_country.index[0], date_end, freq = 'BM')
        return ser_res_country.reindex(range_country).ffill()
    ### Markets encoding table:
    dict_markets = {50 : 'DM', 57 : 'EM', 504 : 'FM', 0: np.NaN}     
    ### Loading source file:
    df_raw_universe = pd.read_excel(io = str_path_universe, sheet_name = 0, header = 0, parse_dates = True, index_col = [0, 1],
                                 na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                             '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null'], keep_default_na = False)
    ### Converting source file:
    df_raw_universe.index.names = ['Country', 'Date']
    ser_raw_universe = df_raw_universe['Region']
    ser_raw_universe.fillna(0, inplace = True)
    ser_raw_universe.name = 'Market'
    ### By country reindexation and translation:
    ser_res_universe = ser_raw_universe.groupby('Country').apply(country_modify, date_end)
    ser_res_universe.index.names = ['Country', 'Date']
    ser_res_universe = ser_res_universe.replace(dict_markets).reorder_levels([1, 0]).sort_index() 
    ### Expanding membership for primary regions members by backfilling:
    if int_backfill_months:
        ### List of regions:
        list_region = list(ser_res_universe.dropna().unique())
        ### Initialising of collection of series with backfilled data for each region:
        list_ison_backfill = []
        ### Regions looping:
        for iter_region in list_region:
            ### Defining start of region date:
            date_first_valid = ser_res_universe.loc[ser_res_universe == iter_region].first_valid_index()[0]
            ### Creating dates index to backfilling:
            idx_date_backfill = pd.date_range(end = date_first_valid, periods = int_backfill_months + 1, freq = 'BM')[: -1]
            ### Creating primary countries index to backfilling:            
            idx_region_backfill = ser_res_universe.loc[ser_res_universe == iter_region].loc[date_first_valid, All].index.get_level_values('Country')
            ### Creating full index:
            idx_ison_backfill = pd.MultiIndex.from_product([idx_date_backfill, idx_region_backfill])
            ### Series with backfilled data:
            list_ison_backfill.append(pd.Series(iter_region, index = idx_ison_backfill))
        ### Combination of backfilled series and original ISON data:    
        ser_res_universe = ser_res_universe.combine_first(pd.concat(list_ison_backfill, axis = 0)).sort_index()  
        ser_res_universe.index.names = ['Date', 'Country']
    ### Converting to daily frequency:
    if bool_daily:
        ser_res_universe = ser_res_universe.reset_index('Country').groupby('Country').resample('B').ffill()['Market'].swaplevel().sort_index()    
    ### Results output:
    ser_res_universe.name = 'Market'
    return ser_res_universe

In [11]:
### DEFINING MEAN MOMENTUM FUNCTION (PART OF THE PRODUCT CODE)

def weighted_mean(ser_country_source, list_weight, int_mean_min):
    try:
        ### Weight setting
        ser_weight = pd.Series(list_weight[ -len(ser_country_source.index) : ], ser_country_source.index)  
        ### Weighted mean calculation:
        return weighted_average(ser_country_source, ser_weight, int_mean_min)
    except KeyError:
        return np.NaN

In [12]:
### DEFINING FILTERING DATE INTERVAL, REINDEXING FILTERED VECTOR TO BUSINESS DATES/MONTHS FREQUENCY AND FILLING DATA (PART OF THE PRODUCT CODE)

def get_country_interval(ser_filtered, date_start, date_end, int_fill_limit = 1):
    ### Drop level to avoid stack/unstack manipulations:
    ser_filtered = ser_filtered.droplevel('Country')
    ### Business day filter:
    idx_date_business = pd.date_range(start = date_start, end = date_end, freq = 'B')
    try:
        ### Frequency checker:
        date_first = ser_filtered.first_valid_index()
        date_last = ser_filtered.last_valid_index()
        ### Resampling to business month:
        if ((date_last - date_first).days / len(ser_filtered.dropna().index) > 3.0):          
            ser_filtered = ser_filtered.resample('MS').last().resample('BM').last()
    except TypeError:
        pass
    ### Reindexation and forward filling:
    ser_reindexed = ser_filtered.resample('B').ffill().fillna(method = 'ffill', limit = int_fill_limit).reindex(idx_date_business).ffill(limit = int_fill_limit)        
    ### Results output:
    ser_reindexed.index.names = ['Date']        
    return ser_reindexed

In [13]:
### DATA LOADING (TO BE IGNORED IN PRODUCT CODE)

ser_reer = pd.read_hdf(str_path_bb_hdf, key = str_key_reer_sourced) ### Real Effective Exchange Rate to use as a factor data source
ser_ison_daily = ison_membership_converting(str_path_universe, datetime.strptime(str_measure_date_end, '%Y-%m-%d'), bool_daily = True) ### ISON universe, bus-daily vector

In [17]:
### TEMP

ser_ison_daily.groupby('Country').apply(lambda ser_country: print(ser_country))

Country  Date      
AE       2007-01-31    504.0
         2014-05-30      0.0
         2014-06-30     57.0
Name: AE, dtype: float64
Country  Date      
AR       1998-10-30     57.0
         2001-03-30      0.0
         2002-10-31     57.0
         2010-11-30    504.0
         2019-05-31     57.0
Name: AR, dtype: float64
Country  Date      
AT       1994-01-31    50.0
Name: AT, dtype: float64
Country  Date      
AU       1994-01-31    50.0
Name: AU, dtype: float64
Country  Date      
BD       2007-01-31    504.0
Name: BD, dtype: float64
Country  Date      
BE       1994-01-31    50.0
Name: BE, dtype: float64
Country  Date      
BG       2007-01-31    504.0
Name: BG, dtype: float64
Country  Date      
BH       2007-01-31    504.0
Name: BH, dtype: float64
Country  Date      
BR       1998-10-30    57.0
Name: BR, dtype: float64
Country  Date      
BW       2007-01-31    504.0
Name: BW, dtype: float64
Country  Date      
CA       1994-01-31    50.0
Name: CA, dtype: float64
Country  Date    

Country
AE    None
AR    None
AT    None
AU    None
BD    None
      ... 
UG    None
US    None
VN    None
ZA    None
ZM    None
Name: Market, Length: 84, dtype: object

In [15]:
### TESTING: LONG-TERM REER BASED FACTOR CALCULATION: DIRECT DAILY SCRIPT SINGLE RUN FOR DEBUGGING (TO BE IGNORED IN PRODUCT CODE)

iter_date = idx_test_daily_date_range[580] # '1996-04-22'
#iter_date = pd.to_datetime('2006-12-29')
#iter_date = idx_test_daily_date_range[6586] # '2019-04-30'

### Momentum parameters:
int_mom_hl = 520 ### Without rounding here
int_mom_win = 1300
int_mom_min = 520
### Weights array:
list_weight = list(map(lambda iter_num: exp_weight_single(int_mom_hl, iter_num), range(int_mom_win)))[::-1]
### Source loading:
int_fill_limit = 66
date_start_win = np.maximum(iter_date - pd.tseries.offsets.BDay(int_mom_win - 1), date_source_start)
date_start_loc = np.maximum(iter_date - pd.tseries.offsets.BDay(int_mom_win + int_fill_limit), date_source_start)
### Datasource for particular date (should be substituted by SQL query):
ser_iter_source_raw = ser_reer.droplevel('Source').loc[pd.date_range(start = date_start_loc, end = iter_date, freq = 'D'), All]
### Data source resampling:
ser_iter_source = ser_iter_source_raw.groupby('Country').apply(get_country_interval, date_start_win, iter_date, int_fill_limit).swaplevel().sort_index()
### Source performing:
ser_iter_delta = ser_iter_source.groupby('Country').diff() / ser_iter_source.groupby('Country').shift()   
ser_iter_delta = ser_iter_delta.replace([np.inf, -np.inf], np.NaN)    
### Momentum factor calculation:
ser_iter_factor = ser_iter_delta.groupby('Country').apply(weighted_mean, list_weight, int_mom_min)
### Date index recreating:
ser_iter_factor = pd.concat({iter_date: ser_iter_factor}, names = ['Date'])
### ISON Universe for the date loading (should be substituted by SQL query):
ser_ison_iter_date = ser_ison_daily.loc[iter_date, All]
### Factor ISONing:
ser_iter_factor = ser_iter_factor.to_frame().join(ser_ison_iter_date, how = 'left').set_index('Market', append = True).squeeze()
ser_iter_factor.name = 'Factor' 
### Regions clearing:
ser_iter_factor = ser_iter_factor.loc[All, All, list_ison]
### Countries filtering:
ser_iter_factor = ser_iter_factor.drop(list_countries_to_exclude, level = 'Country')    
### Standalone factor standartizing and sign changing:
ser_iter_factor_std = -single_factor_standartize_daily(ser_iter_factor, list_truncate, within_market = bool_within_market)
ser_iter_factor_std.name = 'Factor'        
#### Interaction variable applying:
#ser_iter_multiplied = ser_iter_factor_std * ser_iter_date_concept
#### Multiplied factor restandartizing:
#ser_iter_multiplied_std = single_factor_standartize_daily(ser_iter_multiplied, list_truncate, within_market = bool_within_market)
#ser_iter_multiplied_std.name = 'Factor'    
#### Saving result to collection:
#dict_iter_factor[iter_factor] = ser_iter_multiplied_std

KeyboardInterrupt: 

In [None]:
### TESTING: LONG-TERM REER BASED FACTOR CALCULATION: DIRECT DAILY SCRIPT SINGLE RUN FOR DEBUGGING (TO BE IGNORED IN PRODUCT CODE)



In [None]:
### DEFINING FACTOR CREATING FUNCTION (PART OF THE PRODUCT CODE)

def get_lt_reer_factor(iter_date):
    ### Momentum parameters:
    int_mom_hl = 520 ### Without rounding here
    int_mom_win = 1300
    int_mom_min = 520
    ### Weights array:
    list_weight = list(map(lambda iter_num: exp_weight_single(int_mom_hl, iter_num), range(int_mom_win)))[::-1]
    ### Source load parameters:
    int_fill_limit = 66
    date_start_win = np.maximum(iter_date - pd.tseries.offsets.BDay(int_mom_win - 1), date_source_start)
    date_start_loc = np.maximum(iter_date - pd.tseries.offsets.BDay(int_mom_win + int_fill_limit), date_source_start)
    ### Datasource for particular date (should be substituted by SQL query):
    ser_iter_source_raw = ser_reer.droplevel('Source').loc[pd.date_range(start = date_start_loc, end = iter_date, freq = 'D'), All]
    ### Data source resampling:
    ser_iter_source = ser_iter_source_raw.groupby('Country').apply(get_country_interval, date_start_win, iter_date, int_fill_limit).swaplevel().sort_index()
    ### Source performing:
    ser_iter_delta = ser_iter_source.groupby('Country').diff() / ser_iter_source.groupby('Country').shift()   
    ser_iter_delta = ser_iter_delta.replace([np.inf, -np.inf], np.NaN)    
    ser_iter_delta.index.names = ['Date', 'Country']
    ### Momentum factor calculation:
    ser_iter_factor = ser_iter_delta.groupby('Country').apply(weighted_mean, list_weight, int_mom_min)
    ser_iter_factor.name = 'Factor'
    ### Sign changing:
    ser_iter_factor = -ser_iter_factor
    ### Add to csv file (should be substituted by SQL query):
    ser_iter_factor_csv = pd.concat({iter_date: ser_iter_factor}, names = ['Date'])
    ser_iter_factor_csv.to_csv(str_lt_reer_raw_csv, mode = 'a', header = not os.path.exists(str_lt_reer_raw_csv))
    ### Results output:
    return ser_iter_factor

In [None]:
### TESTING: PERFORMING FACTOR FOR DATE RANGE (TO BE IGNORED IN PRODUCT CODE)

#iter_date = idx_test_monthly_date_range[580]
#iter_date = idx_test_monthly_date_range[586]
#print(iter_date)

### Removing csv files before loop running:
if (os.path.exists(str_lt_reer_raw_csv)):
    os.remove(str_lt_reer_raw_csv)
### Dictionary for date vectors collecting:
list_factor_by_date = []
### Local testing parameters:
int_interval = 10 ### Interval of progress displaying
date_start = datetime.utcnow() ### Start time of calculations
date_control = datetime.utcnow() ### Control time to display
idx_test_date_range = idx_test_monthly_date_range # idx_test_monthly_date_range[311 : 320] # idx_test_monthly_date_range[311 : 313] # 
### Test performing:
print('Start time:', date_start)
for iter_num, iter_date in enumerate(idx_test_date_range):
    ### Progress printing:
    if not (divmod(iter_num, int_interval)[1]):
        if iter_num:
            print('Counter marker:', iter_num, '/', len(idx_test_date_range))
            timedelta_interval = datetime.utcnow() - date_control
            print('Time interval since last marker:', datetime.utcnow() - date_control)            
            print('Average interval for single date:', str(timedelta_interval / int_interval))
        date_control = datetime.utcnow()
    ### Long-term REER based factor calculating:
    ser_iter_factor = get_lt_reer_factor(iter_date)  
    ### Date index recreating:
    ser_iter_factor = pd.concat({iter_date: ser_iter_factor}, names = ['Date'])
    ### ISON Universe for the date loading (should be substituted by SQL query):
    ser_ison_iter_date = ser_ison_daily.loc[iter_date, All]
    ### Factor ISONing:
    ser_iter_factor = ser_iter_factor.to_frame().join(ser_ison_iter_date, how = 'left').set_index('Market', append = True).squeeze() 
    ### Regions clearing:
    ser_iter_factor = ser_iter_factor.loc[All, All, list_ison]
    ### Countries filtering:
    ser_iter_factor = ser_iter_factor.drop(list_countries_to_exclude, level = 'Country')   
    ### Standalone factor standartizing:    
    ser_iter_factor_std = single_factor_standartize_daily(ser_iter_factor, list_truncate, within_market = bool_within_market)
    ser_iter_factor_std.name = 'Factor'   
    ### Collecting daste result for comparision with research mode results:
    list_factor_by_date.append(ser_iter_factor_std)
date_finish = datetime.utcnow()
### Overall statistics printing:
print('Finish time:', date_finish)
print('Full interval:', date_finish - date_start)
print('Average interval for single date:', str((date_finish - date_start) / len(idx_test_date_range)))
### Results aggregating for comparision with research mode results:
ser_factor_full = pd.concat(list_factor_by_date, axis = 0).sort_index()
ser_factor_full.name = 'Combo Factor'

In [None]:
### TESTING: PERFORMING FACTOR FOR DATE RANGE (TO BE IGNORED IN PRODUCT CODE)

ser_factor_full.to_excel(str_lt_reer_std_xlsx, merge_cells = False)