In [1]:
### Importing standard modules and date-special modules:
import numpy as np
import pandas as pd
### Declaring global constants:
All = slice(None)

In [2]:
### DEFINING EXTRACTION UNIVERSE DATA FROM GENERAL MS EXCEL SOURCE
def get_market_membership_from_excel():
    ### Importing standard modules and date-special modules:
    import numpy as np
    import pandas as pd
    ### Declaring local constants & variables: 
    path_msci = 'Data_Files/Source_Files/sample_universe.xlsx' ### Path for membership source    
    tab_monthly = 'universe_joined'    
    arr_markets_needed = ['DM', 'FM', 'EM']   
    dict_markets = {50 : 'DM', 57 : 'EM', 504 : 'FM'}
    no_slice = slice(None)
    ### Extracting universe data:
    df_universe = pd.read_excel(io = path_msci, sheet_name = tab_monthly, skiprows = [0, 2], header = 0, parse_dates = True, 
                                na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                             '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null'], keep_default_na = False)
    df_universe = df_universe.loc[no_slice, ['dates', 'region', 'ctry']]
    df_universe.columns = ['Date', 'Market', 'Country']
    df_universe.set_index(['Date', 'Country'], inplace = True)
    ser_universe = df_universe.squeeze()
    ser_universe.sort_index(level = [0, 1], inplace = True)
    ser_universe.replace(dict_markets, inplace = True)
    ser_market_membership = ser_universe[ser_universe.isin(arr_markets_needed)]
    
    return ser_market_membership

In [3]:
### DEFINING EXTRACTION MARKET CAPITALIZATION DATA FROM GENERAL MS EXCEL SOURCE
def get_mcaps_from_excel(str_sheet_name):
    ### Importing standard modules and date-special modules:
    import numpy as np
    import pandas as pd    
    ### Constants declaring:
    path_msci_data = 'Data_Files/Source_Files/MSCI_mcaps.xlsx'
    arr_rows_to_skip = np.arange(0, 28)
    ### Extracting market caps data:
    df_mcap = pd.read_excel(io = path_msci_data, sheet_name = str_sheet_name, skiprows = arr_rows_to_skip, header = 0, index_col = 0,
                                    na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                                 '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null', '#N/A Invalid Security'], keep_default_na = False) 
    ### Stacking index data:
    ser_mcap = df_mcap.stack(dropna = False)
    ser_mcap.index.names = ['Date', 'Country']
    df_mcap = ser_mcap.reset_index(level = 'Country')
    ser_mcap = df_mcap.set_index('Country', append = True).squeeze()
    ser_mcap = ser_mcap.sort_index(level = ['Country', 'Date'])
    ser_mcap.name = 'Market_Cap'
    
    return ser_mcap

In [4]:
### DEFINING EXTRACTION CURRENCY EXCHANGE DATA FROM GENERAL MS EXCEL SOURCE
def get_fx_from_excel(str_sheet_name):
    ### Importing standard modules and date-special modules:
    import numpy as np
    import pandas as pd    
    ### Constants declaring:
    path_msci_data = 'Data_Files/Source_Files/MSCI_FX.xlsx'
    arr_rows_to_skip = np.arange(0, 28)
    num_rows_for_quote = 16
    num_country_ID_for_qoute = 9
    num_factor_for_quote = 15
    ### Extracting currency exchange rates data:
    df_fx = pd.read_excel(io = path_msci_data, sheet_name = str_sheet_name, skiprows = arr_rows_to_skip, header = 0, index_col = 0,
                                    na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                                 '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null', '#N/A Invalid Security'], keep_default_na = False)
    ### Extracting quote factors
    df_quote_factor = pd.read_excel(io = path_msci_data, sheet_name = str_sheet_name, nrows = num_rows_for_quote,
                                    na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                                 '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null', '#N/A Invalid Security'], keep_default_na = False)
    ser_factor = pd.Series(df_quote_factor.iloc[num_factor_for_quote].values[1:], index = df_quote_factor.iloc[num_country_ID_for_qoute].values[1:])
    ser_factor.fillna(1, inplace = True)
    ser_factor.name = 'Quote_Factor'
    ### Stacking index data:
    ser_fx = df_fx.stack(dropna = False)
    ser_fx.index.names = ['Date', 'Country']
    df_fx = ser_fx.reset_index(level = 'Country')
    ser_fx = df_fx.set_index('Country', append = True).squeeze()
    ser_fx = ser_fx.sort_index(level = ['Country', 'Date'])
    ser_fx.name = 'FX'
    ### Applying quote factors:   
    df_fx = ser_fx.to_frame().join(ser_factor, on = 'Country', how = 'left')
    ser_fx = df_fx['FX'] / df_fx['Quote_Factor']

    return ser_fx

In [5]:
### MARKET CAP MIXED CASTING TO USD AND FILLING THE GAPS:
### Constants declaring:
str_sheet_mc_USD = 'Market Cap USD'
str_sheet_mc_mixed = 'Market Cap MIXED'
str_sheet_fx = 'FX Rates LOCUSD'
### Market caps data reading:
ser_mcap_USD = get_mcaps_from_excel(str_sheet_mc_USD)
ser_mcap_mixed = get_mcaps_from_excel(str_sheet_mc_mixed)
### FX data reading and filling currency exchange coefficients for USD-nominated countries with 1:
ser_fx = get_fx_from_excel(str_sheet_fx)
arr_fx_USD_countries = list(ser_fx.groupby('Country').filter(lambda iter_group: iter_group.count() == 0).index.get_level_values(1).unique())
ser_fx.loc[ser_fx.groupby('Country').filter(lambda iter_group: iter_group.count() == 0).index] = 1
### Multiplying market caps to get USD-nominated values:
ser_mcap_fx = ser_mcap_mixed.mul(ser_fx)
ser_mcap_fx.name = 'Market Cap'
### Receiving MSCI membership data:    
ser_market_membership = get_market_membership_from_excel()
### Adding market membership:
df_mcap_fx = ser_mcap_fx.to_frame().join(ser_market_membership, on = ['Date', 'Country'], how = 'left')
ser_mcap = df_mcap_fx.set_index('Market', drop = True, append = True).sort_index(level = ['Country', 'Date']).squeeze()
### Filling values for all-empty countries (only in ISON universe):
arr_empty_countries = ser_mcap.groupby('Country').filter(lambda iter_group: iter_group.count() == 0).index.get_level_values(1).unique()
ser_mcap.loc[All, arr_empty_countries, All] = \
ser_mcap.groupby(['Date', 'Market']).apply(lambda iter_group: iter_group.fillna(iter_group.mean())).loc[All, arr_empty_countries, All]
ser_mcap = round(ser_mcap, 2)
### Back-filling backward from firsl valid value:
ser_mcap_bfill = ser_mcap.groupby(['Country']).apply(lambda iter_group: \
                                                     iter_group.iloc[: iter_group.index.get_loc(iter_group.first_valid_index()) + 1].bfill()).droplevel(0)
ser_mcap = ser_mcap.combine_first(ser_mcap_bfill).sort_index(level = ['Country', 'Date'])
### Forward-filling all gaps:
ser_mcap = ser_mcap.groupby(['Country']).ffill()

In [6]:
### MARKET CAPITALIZATIONS SAVING
path_market_cap = 'Data_Files/Source_Files/Market_Cap.h5'
key_market_cap = 'mcap'
ser_mcap.to_hdf(path_market_cap, key_market_cap, mode = 'w', format = 'fixed')

In [7]:
### MARKET CAPS EXPORT RESULTS
### Generating tables for visual control:
ser_mcap_USD_country = ser_mcap_USD.groupby('Country').count()
ser_mcap_mixed_country = ser_mcap_mixed.groupby('Country').count()
ser_fx_country = ser_fx.groupby('Country').count()
ser_mcap_fx_country = ser_mcap_fx.groupby('Country').count()
df_mcap_compare = pd.concat([ser_mcap_USD, ser_mcap_mixed, ser_mcap_fx], keys = ['USD', 'Mixed', 'FX_Casted'], axis = 1)
df_mcap_compare['Deviation'] = ((df_mcap_compare['FX_Casted'] - df_mcap_compare['USD']) / df_mcap_compare['USD']).abs()
df_mcap_compare_top = df_mcap_compare.dropna().groupby('Country').last().sort_values(['Deviation'], ascending = False)
### Results printing:
print('All about USD Market Cap data:'.upper())
print('Not NaN values number:', ser_mcap_USD.count())
print('Nominal countries number:', ser_mcap_USD_country.count())
print('Countries with only NaN values:', ser_mcap_USD_country[ser_mcap_USD_country == 0].count(), ':', list(ser_mcap_USD_country[ser_mcap_USD_country == 0].index))

print('All about mixed Market Cap data:'.upper())
print('Not NaN values number:', ser_mcap_mixed.count())
print('Nominal countries number:', ser_mcap_mixed_country.count())
print('Countries with only NaN values:', ser_mcap_mixed_country[ser_mcap_mixed_country == 0].count(), ':', list(ser_mcap_mixed_country[ser_mcap_mixed_country == 0].index))

print('All about currency exchange data:'.upper())
print('Not NaN values number:', ser_fx.count())
print('Nominal countries number:', ser_fx_country.count())
print('USD-nominated countries:', len(arr_fx_USD_countries), ':', arr_fx_USD_countries)

print('All about mixed Market Cap casted to USD data by FX:'.upper())
print('Not NaN values number:', ser_mcap_fx.count())
print('Nominal countries number:', ser_mcap_fx_country.count())
print('Countries with only NaN values:', ser_mcap_fx_country[ser_mcap_fx_country == 0].count(), ':', list(ser_mcap_fx_country[ser_mcap_fx_country == 0].index))

print('Sources comparision:'.upper())
print('USD Not NaN and FX NaN count:', len(df_mcap_compare.loc[(df_mcap_compare['USD'].notna() & df_mcap_compare['FX_Casted'].isna())].index))
print('USD NaN and FX Not NaN count:', len(df_mcap_compare.loc[(df_mcap_compare['USD'].isna() & df_mcap_compare['FX_Casted'].notna())].index))
print('Non-negligible deviations between nominal USD Market Caps and Mixed Market Caps casted to USD by FX rate for last date values:\n', 
      df_mcap_compare_top.loc[df_mcap_compare_top['Deviation'] > 0.01])

ALL ABOUT USD MARKET CAP DATA:
Not NaN values number: 6825
Nominal countries number: 84
Countries with only NaN values: 8 : ['CI', 'CR', 'EC', 'MT', 'PA', 'SA', 'UG', 'ZM']
ALL ABOUT MIXED MARKET CAP DATA:
Not NaN values number: 18594
Nominal countries number: 84
Countries with only NaN values: 4 : ['CR', 'EC', 'UG', 'ZM']
ALL ABOUT CURRENCY EXCHANGE DATA:
Not NaN values number: 24424
Nominal countries number: 84
USD-nominated countries: 14 : ['AR', 'BR', 'CI', 'CL', 'CO', 'EC', 'IL', 'KZ', 'MX', 'PE', 'QA', 'RU', 'US', 'ZM']
ALL ABOUT MIXED MARKET CAP CASTED TO USD DATA BY FX:
Not NaN values number: 18380
Nominal countries number: 84
Countries with only NaN values: 4 : ['CR', 'EC', 'UG', 'ZM']
SOURCES COMPARISION:
USD Not NaN and FX NaN count: 0
USD NaN and FX Not NaN count: 11555
Non-negligible deviations between nominal USD Market Caps and Mixed Market Caps casted to USD by FX rate for last date values:
                  USD      Mixed    FX_Casted  Deviation
Country                

In [8]:
### DEFINING EXTRACTION RETURNS DATA FROM GENERAL MS EXCEL SOURCE
def get_msci_returns_from_excel(str_sheet_name):
    ### Importing standard modules and date-special modules:
    import numpy as np
    import pandas as pd    
    ### Constants declaring:
    path_msci_data = 'Data_Files/Source_Files/MSCI_Returns.xlsx'     
    arr_rows_to_skip = np.arange(0, 27)    
    ### Extracting returns data:
    df_index_return = pd.read_excel(io = path_msci_data, sheet_name = str_sheet_name, skiprows = arr_rows_to_skip, header = 0, index_col = 0,
                                    na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                                 '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null', '#N/A Invalid Security'], keep_default_na = False) 
    ### Stacking index data:
    ser_index_return = df_index_return.stack(dropna = False).to_frame()
    ser_index_return.index.names = ['Date', 'Country']
    df_index_return = ser_index_return.reset_index(level = 'Country')
    ser_index_return = df_index_return.set_index('Country', append = True).squeeze()
    ser_index_return = ser_index_return.sort_index(level = ['Country', 'Date'])
    ser_index_return.name = 'Total_Return'
    ### Filling the gaps:
    ser_index_return = ser_index_return.groupby('Country').fillna(method = 'ffill')
    ### Extracting return from index:
    ser_index_return_shifted = ser_index_return.groupby('Country').shift()
    ser_return = ser_index_return.div(ser_index_return_shifted) - 1
    ### Reindexing to confirm business-end-of-month indexation:
    df_return = ser_return.reset_index()
    df_return.loc[All, 'Date'] = df_return.loc[All, 'Date'] - pd.offsets.MonthBegin() + pd.offsets.BMonthEnd()
    ser_return = df_return.set_index(['Date', 'Country']).squeeze()
    
    return ser_return

In [9]:
### DEFINING SHEETS PRIORITY FOR RETURNS EXTRACTING

### Sheets lists from From prior to minor:
arr_sheet_LOC = ['Returns LOC', 'Returns LOC - Old MSCI Code', 'Returns LOC - Main Index']
arr_sheet_USD = ['Returns USD', 'Returns USD - Old MSCI Code', 'Returns USD - Main Index']
### Extracting and combining USD returns:
for iter_number, iter_sheet in enumerate(arr_sheet_USD):
    if (iter_number == 0):
        ### Extracting prior data:
        ser_return_USD = get_msci_returns_from_excel(iter_sheet)
    else:
        ### Filling the gaps in prior data:
        ser_return_USD = ser_return_USD.combine_first(get_msci_returns_from_excel(iter_sheet))
### Extracting and combining LOC returns:        
for iter_number, iter_sheet in enumerate(arr_sheet_LOC):
    if (iter_number == 0):
        ### Extracting prior data:
        ser_return_LOC = get_msci_returns_from_excel(iter_sheet)
    else:
        ### Filling the gaps in prior data:
        ser_return_LOC = ser_return_LOC.combine_first(get_msci_returns_from_excel(iter_sheet))
### Filling the gaps in USD returns with LOC ones:
ser_return_common = ser_return_USD.combine_first(ser_return_LOC).sort_index(level = ['Country', 'Date'])
### Receiving MSCI membership data:    
ser_market_membership = get_market_membership_from_excel()
### Adding market membership:
df_return_common = ser_return_common.to_frame().join(ser_market_membership, on = ['Date', 'Country'], how = 'left')
ser_return = df_return_common.set_index('Market', drop = True, append = True).sort_index(level = ['Country', 'Date']).squeeze()
ser_return.name = 'Return'
#ser_return.to_excel('Data_Files/Test_Files/Test_Returns_Combined.xlsx')

In [37]:
### INTEGRATED RETURNS SAVING
path_return = 'Data_Files/Source_Files/Returns_Integrated.h5'
key_return = 'returns'
ser_return.to_hdf(path_return, key_return, mode = 'w', format = 'fixed')

In [None]:
###########################################################################################################################################################################

In [1]:
### EVENT STUDY INITIALISING
### Importing standard modules and date-special modules:
import numpy as np
import pandas as pd
### Declaring global constants:
All = slice(None)

In [2]:
### LOADING DATA FOR EVENT STUDY
### Credit debt collpased ranking loading:
path_collapsed_marked = 'Data_Files/Source_Files/Collapsed_Rank_Marked.h5'
key_collapsed = 'Rank'
ser_ranking = pd.read_hdf(path_collapsed_marked, key_collapsed)
### Market Caps loading:
path_market_cap = 'Data_Files/Source_Files/Market_Cap.h5'
key_market_cap = 'mcap'
ser_mcap = pd.read_hdf(path_market_cap, key_market_cap)
### Returns loading:
path_return = 'Data_Files/Source_Files/Returns_Integrated.h5'
key_return = 'returns'
ser_return = pd.read_hdf(path_return, key_return)

In [8]:
ser_return_diff = ser_return.groupby('Country').diff()
ser_return_diff.loc[ser_return_diff != 0] = ser_return_diff / ser_return_diff.abs()
ser_return_diff.to_excel('Data_Files/Test_Files/Test_Event_Study.xlsx')