In [1]:
### ADDITIONAL POLITICAL DATA OBSERVATION

In [1]:
### DEFINING EXTRACTION UNIVERSE DATA FROM GENERAL MS EXCEL SOURCE
def get_market_membership_from_excel(path_msci):
    ### Importing standard modules and date-special modules:
    import numpy as np
    import pandas as pd
    ### Declaring local constants & variables: 
    tab_monthly = 'universe_joined'    
    arr_markets_needed = ['DM', 'FM', 'EM']   
    dict_markets = {50 : 'DM', 57 : 'EM', 504 : 'FM'}
    no_slice = slice(None)
    ### Extracting universe data:
    df_universe = pd.read_excel(io = path_msci, sheet_name = tab_monthly, skiprows = [0, 2], header = 0, parse_dates = True, 
                                na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                             '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null'], keep_default_na = False)
    df_universe = df_universe.loc[no_slice, ['dates', 'region', 'ctry']]
    df_universe.columns = ['Date', 'Market', 'Country']
    df_universe.set_index(['Date', 'Country'], inplace = True)
    ser_universe = df_universe.squeeze()
    ser_universe.sort_index(level = [0, 1], inplace = True)
    ser_universe.replace(dict_markets, inplace = True)
    ser_market_membership = ser_universe[ser_universe.isin(arr_markets_needed)]
    
    return ser_market_membership

In [2]:
def get_observation_data(path_source, path_msci):
    ### Importing standard modules and date-special modules:
    import numpy as np
    import pandas as pd
    ### Loading data:
    df_politics = pd.read_excel(path_source, index_col = [0, 5, 6], parse_dates = True, 
                                na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                             '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null'], keep_default_na = False)
    ### Mungling table:
    df_politics = df_politics.iloc[: , 12 : ].stack().to_frame()
    df_politics.index.names = ['Country', 'Indicator', 'Group', 'Date_Q']
    df_politics.reset_index('Date_Q', inplace = True)    
    ### Date manipulating:
    df_politics['Date'] = pd.to_datetime(df_politics['Date_Q']) + pd.offsets.BQuarterEnd()
    df_politics.set_index('Date', append = True, inplace = True)
    df_politics.drop('Date_Q', axis = 1, inplace = True)    
    df_politics = df_politics.groupby(['Country', 'Indicator', 'Group']).\
        apply(lambda iter_group: iter_group.droplevel([0, 1, 2]).resample('BM').bfill())
    df_politics = df_politics.reorder_levels(['Date', 'Country', 'Indicator', 'Group'])
    df_politics.columns = ['Value']
    ### Market membership importing:
    ser_market_membership = get_market_membership_from_excel(path_msci)
    ### Adding membership column to result dataframe:    
    df_politics = df_politics.join(ser_market_membership, on = ['Date', 'Country'], how = 'left')
    df_politics = df_politics.set_index('Market', drop = True, append = True)    
    ### Dropping countries with no participation in ISON universe:
    ser_politics = df_politics.loc[(All, ser_market_membership.index.get_level_values(1).unique().to_list(), All), All].squeeze()
    ### Dropping group index and formatting table:
    ser_politics.reset_index('Group', drop = True, inplace = True)
    df_result = ser_politics.unstack('Indicator').sort_index(level = ['Country', 'Date']) 
    
    return df_result

In [3]:
### MAIN SCRIPT
### Importing standard modules and date-special modules:
import numpy as np
import pandas as pd
### Declaring global constants & variables: 
path_msci = 'Data_Files/Source_Files/sample_universe.xlsx' ### Path for membership source
path_composite_indicators = 'Data_Files/Source_Files/OD_Composite_Indicators.xlsx' ### Path for Composite Indicators
path_growth_potential = 'Data_Files/Source_Files/OD_Growth_Potential.xlsx' ### Path for Growth Potential
path_social_inclusion = 'Data_Files/Source_Files/OD_Social_Inclusion.xlsx' ### Path for Social Inclusion
All = slice(None)
get_market_membership_from_excel(path_msci)
df_composite_indicators = get_observation_data(path_composite_indicators, path_msci)
df_growth_potential = get_observation_data(path_growth_potential, path_msci)
df_social_inclusion = get_observation_data(path_social_inclusion, path_msci)
df_composite_indicators.to_excel('Data_Files/Test_Files/OD_Composite_Indicators.xlsx', merge_cells = False)
df_growth_potential.to_excel('Data_Files/Test_Files/OD_Growth_Potential.xlsx', merge_cells = False)
df_social_inclusion.to_excel('Data_Files/Test_Files/OD_Social_Inclusion.xlsx', merge_cells = False)


In [4]:
### TESTING: SAVING TO HDF FOR TESTING PURPOSES

path_growth_potential_hdf = 'Data_Files/Source_Files/OD_Growth_Potential.h5'
key_growth_potential = 'growth_potential'
df_growth_potential.to_hdf(path_growth_potential_hdf, key_growth_potential)