In [1]:
### BLOOMBERG RAW DATA CONVERTING TO DATAFRAMES AND SERIES

In [2]:
### INITIALIZATION

import pandas as pd
import numpy as np
from datetime import date

In [3]:
### GENERAL DATA PREPARATION

### Constants:
All = slice(None)
### Data source:
str_path_bb_tr_source = 'Data_Files/Source_Files/Bloomberg_TR.xlsx'
str_path_bb_mmr_source = 'Data_Files/Source_Files/Bloomberg_MMR.xlsx'
str_path_bb_fx_source = 'Data_Files/Source_Files/Bloomberg_FX.xlsx'
str_path_bb_mcap_source = 'Data_Files/Source_Files/Bloomberg_MCap.xlsx'
str_path_bb_eer_source = 'Data_Files/Source_Files/Bloomberg_EER.xlsx'
str_path_bb_xcra_source = 'Data_Files/Source_Files/Bloomberg_XCRA.xlsx'
### Ret TR options:
int_type_start = 8
int_type_end = -6
date_start = date(1992, 1, 1)
date_end = date(2020, 4, 30)
### MMR options:
dict_mmr_replace = {'Cl': 'CI'}
### EER options:
dict_eer_sources = {'JPM REER B C D': 'REER 01-JPM', 'CTG REER B D': 'REER 02-CTG', 'IMF REER B M': 'REER 03-IMF', 'BIS REER B M': 'REER 04-BIS',
                    'JPM NEER B D': 'NEER 01-JPM', 'CTG REER B D (2)': 'NEER 02-CTG', 'BIS NEER B D': 'NEER 03-BIS'}
### XCRA options:
int_rolling_win_max = 12
int_rolling_win_min = int_rolling_win_max // 2
### Results saving:
str_path_bb_hdf = 'Data_Files/Source_Files/Bloomberg_prepared.h5'
str_key_ret = 'bb_ret'
str_key_mmr = 'bb_mmr'
str_key_fx = 'bb_fx'
str_key_mcap = 'bb_mcap'
str_key_reer = 'bb_reer'
str_key_neer = 'bb_neer'
str_key_xcra = 'bb_xcra'

In [4]:
### DEFINING EXTRACTION UNIVERSE DATA FROM GENERAL MS EXCEL SOURCE

def market_membership(convert_to_daily = False):
    ### Importing standard modules and date-special modules:    
    import numpy as np
    import pandas as pd
    ### Reindexing function declaring:
    def reindex_month_ends(iter_group):
        iter_range = pd.date_range(iter_group.first_valid_index(), iter_group.last_valid_index(), freq = 'BM')
        iter_result = iter_group.reindex(iter_range)
        return iter_result    
    ### Declaring local constants & variables:
    path_msci = 'Data_Files/Source_Files/sample_universe.xlsx' ### Path for membership source     
    tab_monthly = 'universe_joined'    
    arr_markets_needed = ['DM', 'FM', 'EM']   
    dict_markets = {50 : 'DM', 57 : 'EM', 504 : 'FM'}
    no_slice = slice(None)
    ### Extracting universe data:
    df_universe = pd.read_excel(io = path_msci, sheet_name = tab_monthly, skiprows = [0, 2], header = 0, parse_dates = True, 
                                na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                             '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null'], keep_default_na = False)
    df_universe = df_universe.loc[no_slice, ['dates', 'region', 'ctry']]
    df_universe.columns = ['Date', 'Market', 'Country']
    df_universe.set_index(['Date', 'Country'], inplace = True)
    ser_universe = df_universe.squeeze()
    ser_universe.sort_index(level = [0, 1], inplace = True)
    ser_universe.replace(dict_markets, inplace = True)
    ser_market_membership = ser_universe[ser_universe.isin(arr_markets_needed)]
    ### Reindexing to show absent monthes for future daily resampling: 
    if (convert_to_daily):
        ser_market_membership = ser_market_membership.groupby('Country').apply(lambda iter_group: reindex_month_ends(iter_group.droplevel(1)))
        ser_market_membership.index.names = ['Country', 'Date']
        ser_market_membership = ser_market_membership.swaplevel()
        ser_market_membership = ser_market_membership.reset_index('Country').groupby('Country').resample('B').ffill().drop('Country', axis = 1).squeeze()
        ser_market_membership = ser_market_membership.swaplevel().sort_index(level = ['Country', 'Date'])
        
    return ser_market_membership

In [5]:
### DEFINING TOTAL RETURN INDEXES LOADING

def tot_ret_ind_export(str_path_bb_tr_source):
 
    ### Loading raw excel source:
    dict_tr_source = pd.read_excel(io = str_path_bb_tr_source, sheet_name = None, skiprows = list(range(27)), index_col = 0, header = 0, parse_dates = True, 
                                    na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                                     '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null', '#N/A Invalid Security'], keep_default_na = False)  
    ### Preparing datasets concatenation:
    arr_tr_data = []
    for str_sheet_name in dict_tr_source:
        ### Future additional indexes:    
        str_currency = 'LOC'
        if ('USD' in str_sheet_name):
            str_currency = 'USD'
        str_ind_type = str_sheet_name[int_type_start : int_type_end]
        ### Stacking county codes for making series:
        ser_iter_set = dict_tr_source[str_sheet_name].stack(dropna = False)
        ### Main index levels renaming:
        ser_iter_set.index.names = ['Date', 'Country']
        ### Resampling from possible calendar month ends to business month ends:
        ser_iter_set = ser_iter_set.unstack('Country').resample('MS').last().resample('BM').last().stack('Country', dropna = False).squeeze()
        ### Adding index levels for source description:
        ser_iter_set = ser_iter_set.to_frame().assign(Currency = str_currency)\
                                              .assign(Type = str_ind_type)\
                                              .set_index(['Type', 'Currency'], append = True).squeeze()
        ### Data aggregation for concatenation:
        arr_tr_data.append(ser_iter_set)
    ### Consolidated dataset preparing:
    ser_tr_raw_data = pd.concat(arr_tr_data).reorder_levels([3, 2, 0, 1])    
    ### Results output:
    return ser_tr_raw_data   

In [6]:
### DEFINING TOTAL RETURN INDEXES LOADING

def tot_ret_ind_converter(str_path_bb_tr_source, date_start, date_end):
    ### Data loading:
    ser_tr_raw_data = tot_ret_ind_export(str_path_bb_tr_source)
    index_raw_gaps = ser_tr_raw_data
    ### Data reindexing and forward filling:
    index_ret_dates = pd.date_range(date_start, date_end, freq = 'BM')
    ser_tr_reindex = ser_tr_raw_data.reindex(index_ret_dates, level = 'Date')
    idx_tr_empty = ser_tr_reindex.loc[ser_tr_reindex.isna()].index
    ser_tr_reindex = ser_tr_reindex.groupby(['Currency', 'Type', 'Country']).fillna(method = 'ffill').sort_index(level = ['Currency', 'Type', 'Country', 'Date'])
    ### Returns calculating:
    ser_ret_reindex = ser_tr_reindex.groupby(['Currency', 'Type', 'Country']).apply(lambda iter_group: iter_group / iter_group.shift(1) - 1)
    ### Dropping zero returns, generated by forward filling after reindexation:
    ser_ret_reindex.loc[idx_tr_empty] = np.NaN
    ### Dropping zero returns, generated by bloomberg request:
    ser_ret_reindex.loc[ser_ret_reindex == 0] = np.NaN
    ### Returns combining:
    df_ret_reindex = ser_ret_reindex.unstack('Type')    
    df_ret_reindex['Combined'] = df_ret_reindex.groupby(['Currency', 'Country'], group_keys = False).apply(lambda iter_group: iter_group['MSCI']\
                                                                                                                   .combine_first(iter_group['Old MSCI'])
                                                                                                                   .combine_first(iter_group['Main Index']))
    ### Results output:
    return df_ret_reindex

In [7]:
### DEFINING MONEY MARKET RATES LOADING

def mmr_export(str_path_bb_mmr_source, dict_mmr_replace):
    ### Defining multipurpose country-level function: 
    ###   1) Resampling to BM for monthly tickers
    ###   2) Reindexing for BD    
    ###   3) Forward filling tickers before combining
    ###   4) Combining tickers
    def mmr_convertion(df_mmr_country, ser_mmr_freq, idx_mmr_date_range):
        ### Forward fillin limit:
        num_fill_limit = 23
        ### Dictionary for source frequencies:
        str_country_code = df_mmr_country.iloc[: 1].index[0][1]
        dict_freq = dict(ser_mmr_freq.loc[str_country_code, All].droplevel('Country'))
        ### Dictionary for transitional results:
        dict_source = {}
        ### Looping over sources inside country data:
        for chr_freq in dict_freq:
            if (dict_freq[chr_freq] == 'M'):
                ### Resampling to Business-Month-Ends for monthly frequency (then reindexing for proper date range):
                dict_source[chr_freq] = df_mmr_country.loc[All, chr_freq].droplevel('Country').resample('MS').last().resample('BM').last().reindex(idx_mmr_date_range)
            elif (dict_freq[chr_freq] == 'D'):
                ### Reindexing for proper date range for not monthly frequencies:
                dict_source[chr_freq] = df_mmr_country.loc[All, chr_freq].droplevel('Country').reindex(idx_mmr_date_range)
        ### Forward filling for primary source:
        ser_mmr_country = dict_source[1].fillna(method = 'ffill', limit = num_fill_limit)   
        ### Combining (if we have secondary source to combine with):
        if (dict_freq[2]):
            ### Combining sources (with preliminary forward filling for secondary source):
            ser_mmr_country = ser_mmr_country.combine_first(dict_source[2].fillna(method = 'ffill', limit = num_fill_limit)) 
        ### Results output:
        ser_mmr_country = ser_mmr_country.to_frame().assign(Country = str_country_code).set_index('Country', append = True).squeeze().fillna(method = 'ffill')
        return ser_mmr_country
    ### Loading raw excel source:
    df_mmr_source = pd.read_excel(io = str_path_bb_mmr_source, index_col = 0, header = [0, 1], skiprows = [2], parse_dates = True, 
                                  na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a',
                                               'nan', 'null', '#N/A Invalid Security'], keep_default_na = False)  
    df_mmr_stacked = df_mmr_source.stack(['Code', 'Priority'], dropna = False).unstack('Priority')
    df_mmr_stacked.index.names = ['Date', 'Country']
    ### Loading MMR source frequencies:
    df_mmr_freq = pd.read_excel(io = str_path_bb_mmr_source, index_col = 0, header = [0, 1], nrows = 1, parse_dates = True, 
                                  na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a',
                                               'nan', 'null', '#N/A Invalid Security'], keep_default_na = False)
    ser_mmr_freq = df_mmr_freq.stack(['Code', 'Priority'], dropna = False).droplevel(0).replace(np.NaN, '')
    ser_mmr_freq.index.names = ['Country', 'Priority']
    ### Date range constructing:
    date_mmr_min = df_mmr_stacked.index.get_level_values(0).unique().min()
    date_mmr_max = df_mmr_stacked.index.get_level_values(0).unique().max()
    idx_mmr_date_range = pd.date_range(date_mmr_min, date_mmr_max, freq = 'B')
    ### Multi-purpose mmr raw data convertion:
    ser_mmr_combined = df_mmr_stacked.groupby('Country', group_keys = False).apply(mmr_convertion, ser_mmr_freq, idx_mmr_date_range).squeeze()
    ser_mmr_combined = ser_mmr_combined.reset_index('Country').replace(dict_mmr_replace).set_index('Country', append = True).squeeze()
    ser_mmr_combined = ser_mmr_combined / 100.00
    ser_mmr_combined.name = 'MMR'
    ser_mmr_combined.index.names = ['Date', 'Country']
    ### Results output:
    return ser_mmr_combined

In [8]:
### DEFINING FOREIGN EXCHANGE RATES LOADING

def fx_export(str_path_bb_fx_source, str_code = 'Country'):
    ### Defining country-level function for exchange rate division by factor:
    def fx_convertion(ser_fx_code, ser_fx_factor, idx_fx_date_range):    
        ### Exchange rate factor extracting:
        str_code = ser_fx_code.iloc[: 1].index[0][1]       
        num_factor = ser_fx_factor[str_code]
        ### Reindexing and dividing exchange rate:
        ser_fx_converted = ser_fx_code.droplevel('Code').reindex(idx_fx_date_range).fillna(method = 'ffill') / num_factor
        return ser_fx_converted
    ### Loading raw excel source:
    df_fx_source = pd.read_excel(io = str_path_bb_fx_source, sheet_name = 'FX Data', skiprows = list(range(2, 8)), index_col = 0, header = [0, 1], parse_dates = True, 
                                    na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                                 '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null', '#N/A Invalid Security'], keep_default_na = False)
    ser_fx_data = df_fx_source.stack([0, 1])
    ### Adding USD rates:
    ser_fx_usd = pd.Series(1, index = pd.MultiIndex.from_product([ser_fx_data.index.get_level_values(0).unique(), ['US'], ['USD']], names = ser_fx_data.index.names))
    ser_fx_data = pd.concat([ser_fx_data, ser_fx_usd]).sort_index()
    ### Dropping redundant index level:    
    if (str_code == 'Country'):
        ser_fx_data = ser_fx_data.droplevel(2)
    else:
        ser_fx_data = ser_fx_data.droplevel(1)
        ser_fx_data = ser_fx_data[~ser_fx_data.index.duplicated()]
    ser_fx_data.index.names = ['Date', 'Code']
    ser_fx_data.name = 'FX'    
    ### Extracting factor:
    df_fx_factor = pd.read_excel(io = str_path_bb_fx_source, index_col = 0, header = [0, 1], nrows = 1, 
                                  na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a',
                                               'nan', 'null', '#N/A Invalid Security'], keep_default_na = False)    
    ser_fx_factor = df_fx_factor.stack([0, 1]).droplevel(0)
    if (str_code == 'Country'):
        ser_fx_factor = ser_fx_factor.droplevel(1)
    else:
        ser_fx_factor = ser_fx_factor.droplevel(0)
        ser_fx_factor = ser_fx_factor[~ser_fx_factor.index.duplicated()]
    ser_fx_factor.index.names = ['Code']
    ser_fx_factor.name = 'Factor'
    ### Date range constructing:
    date_fx_min = ser_fx_data.index.get_level_values(0).unique().min()
    date_fx_max = ser_fx_data.index.get_level_values(0).unique().max()
    idx_fx_date_range = pd.date_range(date_fx_min, date_fx_max, freq = 'B')    
    ### Results preparing:
    ser_fx_ready = ser_fx_data.groupby('Code', group_keys = True).apply(fx_convertion, ser_fx_factor, idx_fx_date_range).squeeze().swaplevel()
    ser_fx_ready.index.names = ['Date', str_code]
    ### Results output:
    return ser_fx_ready

In [9]:
### DEFINING MARKET CAPITALIZATIONS LOADING

def mcap_export(str_path_bb_mcap_source):
    ### Loading raw excel source:
    df_mcap_source = pd.read_excel(io = str_path_bb_mcap_source, index_col = 0, header = 0, skiprows = list(range(28)), parse_dates = True, 
                                   na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a',
                                               'nan', 'null', '#N/A Invalid Security'], keep_default_na = False)  
    ser_mcap_stacked = df_mcap_source.stack(dropna = False).squeeze()
    ser_mcap_stacked = ser_mcap_stacked.astype('float32')
    ser_mcap_stacked.index.names = ['Date', 'Country']
    ser_mcap_stacked.name = 'Market Cap'
    ### Loading MMR source frequencies:
    df_mcap_currency = pd.read_excel(io = str_path_bb_mcap_source, index_col = 0, header = 0, skiprows = list(range(10)), nrows = 5, parse_dates = True, 
                                  na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a',
                                               'nan', 'null', '#N/A Invalid Security'], keep_default_na = False)
    ser_mcap_currency = df_mcap_currency.iloc[4, All].squeeze()
    ser_mcap_currency.index.names = ['Country']
    ser_mcap_currency.name = 'Currency'
    ### Date range constructing:
    date_mcap_min = ser_mcap_stacked.index.get_level_values(0).unique().min()
    date_mcap_max = ser_mcap_stacked.index.get_level_values(0).unique().max()
    idx_mcap_date_range = pd.date_range(date_mcap_min, date_mcap_max, freq = 'B')   
    ### Loading FX rates for currencies:
    ser_fx_curr = fx_export(str_path_bb_fx_source, 'Currency')
    ### Denominating to USD by Country -> Currency -> FX rate connection:
    df_mcap_usd = ser_mcap_stacked.to_frame().join(ser_mcap_currency, how = 'left').set_index('Currency', append = True).join(ser_fx_curr, how = 'left')
    ser_mcap_usd = df_mcap_usd['Market Cap'].mul(df_mcap_usd['FX']).droplevel('Currency')
    ser_mcap_usd.name = 'Market Cap'
    ### Adding ISON regions:
    ser_ison_membership = market_membership()
    ser_mcap_usd = ser_mcap_usd.to_frame().join(ser_ison_membership, how = 'left').set_index('Market', append = True).squeeze()
    ### Filling values for all-empty countries (only in ISON universe):
    list_empty_countries = ser_mcap_usd.groupby('Country').filter(lambda iter_country: iter_country.count() == 0).index.get_level_values(1).unique()
    ser_mcap_usd.loc[All, list_empty_countries, All] = \
    ser_mcap_usd.groupby(['Date', 'Market']).apply(lambda iter_group: iter_group.fillna(iter_group.mean())).loc[All, list_empty_countries, All]
    ser_mcap_usd = round(ser_mcap_usd, 2)
    ### Forward-filling all gaps:
    ser_mcap_usd = ser_mcap_usd.groupby(['Country']).ffill().groupby(['Country']).bfill()
    ### Results output:
    return ser_mcap_usd

In [10]:
### DEFINING EXTRACTION BLOOMBERG EER DATA FROM GENERAL MS EXCEL SOURCE

def eer_export(str_path_bb_eer_source, dict_eer_sources, bool_unique_countries = True):
    ### Defining multipurpose country-level function: 
    ###   1) Resampling to BM for monthly tickers
    ###   2) Reindexing for BD    
    def eer_convertion(ser_eer_country, chr_eer_freq, idx_eer_date_range):
        ### Country code saving:
        str_country_code = ser_eer_country.iloc[: 1].index[0][1]
        ### Conditional resampling and reindexing::
        if (chr_eer_freq == 'M'):
            ### Resampling to Business-Month-Ends for monthly frequency (then reindexing for proper date range):
            ser_eer_converted = ser_eer_country.droplevel('Country').resample('MS').last().resample('BM').last().reindex(idx_eer_date_range)
        else:
            ### Reindexing for proper date range for not monthly frequencies:
            ser_eer_converted = ser_eer_country.droplevel('Country').reindex(idx_eer_date_range)
        ### Results output:
        ser_eer_converted.index.names = ['Date']
        return ser_eer_converted    
    ### Loading raw excel source:
    dict_eer_source = pd.read_excel(io = str_path_bb_eer_source, sheet_name = None, skiprows = list(range(4)), index_col = 0, header = 0, parse_dates = True, 
                                    na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                                 '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null', '#N/A Invalid Security'], keep_default_na = False)
    ### Preparing datasets concatenation:
    list_eer_data = []
    for str_sheet_name in dict_eer_source:
        ### Filtering Broad NEER or Broad CPI-Based REER:
        if str_sheet_name in dict_eer_sources:
            ### Future additional indexes:
            list_iter_index = dict_eer_sources[str_sheet_name].split()
            ### Checking for monthly frequency:
            chr_iter_freq = 'D'
            if (str_sheet_name.endswith('M')):
                chr_iter_freq = 'M'
            ### Stacking county codes for making series:
            ser_iter_set = dict_eer_source[str_sheet_name].stack(dropna = False)
            ### Main index levels renaming:
            ser_iter_set.index.names = ['Date', 'Country']
            ### Date range constructing:
            date_eer_min = ser_iter_set.index.get_level_values(0).unique().min()
            date_eer_max = ser_iter_set.index.get_level_values(0).unique().max()
            idx_eer_date_range = pd.date_range(date_eer_min, date_eer_max, freq = 'B')
            ### Multi-purpose eer raw data convertion:
            ser_iter_eer = ser_iter_set.groupby('Country', group_keys = True).apply(eer_convertion, chr_iter_freq, idx_eer_date_range).squeeze()         
            ### Adding index levels for source description:
            ser_iter_eer = ser_iter_eer.to_frame().assign(Type = list_iter_index[0])\
                                                  .assign(Source = list_iter_index[1])\
                                                  .set_index(['Type', 'Source'], append = True).squeeze()
            ### Data aggregation for concatenation:
            list_eer_data.append(ser_iter_eer)
    ### Consolidated dataset preparing:
    ser_eer_data = pd.concat(list_eer_data).reorder_levels([2, 3, 0, 1])
    if (not bool_unique_countries):
        ### Results output:
        return ser_eer_data
    else:
        ### REER filtering:
        df_reer_data = ser_eer_data.loc['REER', All, All, All].unstack('Source').swaplevel().sort_index(axis = 1)
        ### REER sources looping:
        dict_reer_combined = {}
        set_prev_countries = set()
        for iter_source in sorted(df_reer_data.columns):
            ### Selecting unique REER source countries
            set_iter_countries = set(df_reer_data[iter_source].dropna().index.get_level_values(1).unique()) - set_prev_countries
            set_prev_countries = set_prev_countries | set_iter_countries
            ### Creating dataset from REER source:
            dict_reer_combined[iter_source.split('-')[1]] = df_reer_data[iter_source].loc[All, set_iter_countries]
        ### REER combined source creating:
        df_reer_combined = pd.concat(dict_reer_combined).reset_index(0).sort_index(level = ['Date', 'Country'])
        df_reer_combined.columns = ['Source', 'EER']
        ser_reer = df_reer_combined['EER']
        ### NEER filtering:
        df_neer_data = ser_eer_data.loc['NEER', All, All, All].unstack('Source').swaplevel().sort_index(axis = 1)
        ### NEER sources looping:
        dict_neer_combined = {}
        set_prev_countries = set()
        for iter_source in sorted(df_neer_data.columns):
            ### Selecting unique NEER source countries
            set_iter_countries = set(df_neer_data[iter_source].dropna().index.get_level_values(1).unique()) - set_prev_countries
            set_prev_countries = set_prev_countries | set_iter_countries
            ### Creating dataset from NEER source:
            dict_neer_combined[iter_source.split('-')[1]] = df_neer_data[iter_source].loc[All, set_iter_countries]
        ### NEER combined source creating:
        df_neer_combined = pd.concat(dict_neer_combined).reset_index(0).sort_index(level = ['Date', 'Country'])
        df_neer_combined.columns = ['Source', 'EER']
        ser_neer = df_neer_combined['EER'] 
        ### Results output:
        return (ser_reer, ser_neer)        

In [11]:
### DEFINING EXTRACTION BLOOMBERG XCRA DATA FROM GENERAL MS EXCEL SOURCE

def xcra_export(str_path_bb_xcra_source, bool_fill_and_ma = True):
    ### Loading raw excel source:
    dict_xcra_source = pd.read_excel(io = str_path_bb_xcra_source, sheet_name = None, skiprows = list(range(5)), index_col = 0, header = 0, parse_dates = True, 
                                    na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                                 '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null', '#N/A Invalid Security'], keep_default_na = False)
    dict_xcra_stacked = {}
    ### Resampling from possible calendar month ends to business month ends:
    for iter_concept in dict_xcra_source:
        dict_xcra_stacked[iter_concept] = dict_xcra_source[iter_concept].resample('MS').last().resample('BM').last().stack(dropna = False).squeeze()
    ### Data consolidating:
    df_xcra_stacked = pd.concat(dict_xcra_stacked, axis = 1)
    df_xcra_stacked.index.names = ['Date', 'Country']    
    ### Exit without additional data preparation:
    if (not bool_fill_and_ma):
        ### Results output:
        return df_xcra_stacked
    ### Additional data preparation:
    else:
        ### Imports and Exports annual MA modifying:
        df_xcra_stacked[['Imports','Exports']] = df_xcra_stacked[['Imports','Exports']].groupby('Country', group_keys = False)\
                                                                                       .rolling(int_rolling_win_max, int_rolling_win_min).mean()
        df_xcra_stacked[['Imports','Exports']] = df_xcra_stacked[['Imports','Exports']] * 12
        ### XCRA concepts forward filling and back filling for first observation:
        df_xcra_filled = df_xcra_stacked.groupby(['Country']).fillna(method = 'ffill').groupby(['Country']).fillna(method = 'bfill')        
        ### Results output:
        return df_xcra_filled

In [14]:
### MAIN SCRIPT

### Data export:
df_ret_reindex = tot_ret_ind_converter(str_path_bb_tr_source, date_start, date_end)
ser_returns = df_ret_reindex['Combined']
ser_mmr = mmr_export(str_path_bb_mmr_source, dict_mmr_replace)
ser_ison_membership = market_membership()
ser_fx_country = fx_export(str_path_bb_fx_source)
ser_mcap = mcap_export(str_path_bb_mcap_source)
(ser_reer, ser_neer) = eer_export(str_path_bb_eer_source, dict_eer_sources)
df_xcra_filled = xcra_export(str_path_bb_xcra_source)
### Data saving:
ser_returns.to_hdf(str_path_bb_hdf, key = str_key_ret, mode = 'w')
ser_mmr.to_hdf(str_path_bb_hdf, key = str_key_mmr, mode = 'r+')
ser_fx_country.to_hdf(str_path_bb_hdf, key = str_key_fx, mode = 'r+')
ser_mcap.to_hdf(str_path_bb_hdf, key = str_key_mcap, mode = 'r+')
ser_reer.to_hdf(str_path_bb_hdf, key = str_key_reer, mode = 'r+')
ser_neer.to_hdf(str_path_bb_hdf, key = str_key_neer, mode = 'r+')
df_xcra_filled.to_hdf(str_path_bb_hdf, key = str_key_xcra, mode = 'r+')

In [None]:
### TEMP

ser_test_equal = ser_returns.unstack('Currency').groupby('Country').apply(lambda df_country: (df_country['LOC'] - df_country['USD']).abs().mean())

ser_returns.unstack('Currency').loc[(All, 'EG'), All]

ser_test_equal.loc[ser_test_equal < 0.01].sort_values(ascending = False)