In [1]:
### WIOT MATRICES DEMONSTRATION

In [2]:
### INITIALIZATION

import pandas as pd
import numpy as np
import os
import gc
from pandarallel import pandarallel

In [3]:
### PARAMETERS

### Date Range defining:
str_year_start = '1994'
### MultiIndex level slice constant:
All = slice(None)
### Universe path:
str_path_universe = 'Data_Files/Source_Files/acadian_universe.xlsx'
### Commodity to Industry shares:
str_path_unc_ind_weights_hdf = 'Data_Files/Result_Files/unc_ind_weights.h5'
str_path_unc_ind_weights_agg_hdf = 'Data_Files/Result_Files/unc_ind_weights_agg.h5'
str_gics_key = 'gics_io'
### Product / Industry mapping path:
str_path_matrix_map = 'Data_Files/Source_Files/WIOT_mapping_detailed.xlsx'
str_sheet_matrix = 'GICS 2018'
### Augmented bilateral export:
str_path_export_bilateral = 'Data_Files/Source_Files/comtrade_export_bilateral.h5'
str_key_unc_export = 'export_augmented'
### Date / Country / Commodity GICS Group Momentum:
str_path_momentum = 'Data_Files/Result_Files/country_group_momentum.h5'
str_key_momentum = 'momentum'

In [4]:
### DEFINING COUNTRY CODES EXTRACTOR

def get_country_codes(use_local_copy = False):  
    ### In case if URL is unavailable:
    if (use_local_copy):
        url_country_code = 'Data_Files/Source_Files/countrycode.html'
    ### Online extraction:
    else:
        url_country_code = 'https://countrycode.org/'
    df_full_codes = pd.read_html(url_country_code, index_col = 'COUNTRY')[0]
    df_full_codes[['ISO SHORT', 'ISO LONG']] = df_full_codes['ISO CODES'].str.split(' / ', expand = True)
    df_result = df_full_codes[['ISO SHORT', 'ISO LONG']].sort_index()    
    df_result.index = df_result.index.str.upper()
    ### Results output:
    return df_result
    
### World Country Codes:
df_country_codes = get_country_codes()

In [5]:
### DEFINING EXTRACTION UNIVERSE DATA FROM MS EXCEL SOURCE (TO BE IGNORED IN PRODUCT CODE)

def ison_membership_converting(str_path_universe, date_end, bool_daily = False, int_backfill_months = 0):
    ### Defining business-month-end reindexation on country level:
    def country_modify(ser_raw_country, date_end):
        ser_res_country = ser_raw_country.droplevel(0).resample('MS').last().resample('BM').last()
        range_country = pd.date_range(ser_res_country.index[0], date_end, freq = 'BM')
        return ser_res_country.reindex(range_country).ffill()
    ### Markets encoding table:
    dict_markets = {50 : 'DM', 57 : 'EM', 504 : 'FM', 0: np.NaN}     
    ### Loading source file:
    df_raw_universe = pd.read_excel(engine = 'openpyxl', io = str_path_universe, sheet_name = 'Switchers', header = 0, parse_dates = True, index_col = [0, 1],
                                 na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                             '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null'], keep_default_na = False)
    ### Converting source file:
    df_raw_universe.index.names = ['Country', 'Date']
    ser_raw_universe = df_raw_universe['Region']
    ser_raw_universe.fillna(0, inplace = True)
    ser_raw_universe.name = 'Market'
    ### By country reindexation and translation:
    ser_res_universe = ser_raw_universe.groupby('Country').apply(country_modify, date_end)
    ser_res_universe.index.names = ['Country', 'Date']
    ser_res_universe = ser_res_universe.replace(dict_markets).reorder_levels([1, 0]).sort_index() 
    ### Expanding membership for primary regions members by backfilling:
    if int_backfill_months:
        ### List of regions:
        list_region = list(ser_res_universe.dropna().unique())
        ### Initialising of collection of series with backfilled data for each region:
        list_ison_backfill = []
        ### Regions looping:
        for iter_region in list_region:
            ### Defining start of region date:
            date_first_valid = ser_res_universe.loc[ser_res_universe == iter_region].first_valid_index()[0]
            ### Creating dates index to backfilling:
            idx_date_backfill = pd.date_range(end = date_first_valid, periods = int_backfill_months + 1, freq = 'BM')[: -1]
            ### Creating primary countries index to backfilling:            
            idx_region_backfill = ser_res_universe.loc[ser_res_universe == iter_region].loc[date_first_valid, All].index.get_level_values('Country')
            ### Creating full index:
            idx_ison_backfill = pd.MultiIndex.from_product([idx_date_backfill, idx_region_backfill])
            ### Series with backfilled data:
            list_ison_backfill.append(pd.Series(iter_region, index = idx_ison_backfill))
        ### Combination of backfilled series and original ISON data:    
        ser_res_universe = ser_res_universe.combine_first(pd.concat(list_ison_backfill, axis = 0)).sort_index()  
        ser_res_universe.index.names = ['Date', 'Country']
    ### Converting to daily frequency:
    if bool_daily:
        ser_res_universe = ser_res_universe.reset_index('Country').groupby('Country').resample('B').ffill()['Market'].swaplevel().sort_index()    
    ### Results output:
    ser_res_universe.name = 'Market'
    return ser_res_universe

In [6]:
### DUMMY INDUSTRY RETURNS MOMENTUM GENERATION

### Index of dates:
idx_dates = pd.date_range(start = str_year_start, end = pd.to_datetime('today'), freq = 'BM')
### List of countries:
list_countries = df_country_codes['ISO SHORT'].to_list()# + ['WW']
### List of GICS Industries:
list_industries = pd.read_excel(engine = 'openpyxl', io = str_path_matrix_map, sheet_name = str_sheet_matrix, dtype = str, skiprows = 4, header = [0], 
                                usecols = [4], index_col = None).dropna().squeeze().values
### MutiIndex creation:
idx_momentum = pd.MultiIndex.from_product([idx_dates, list_countries, list_industries])
### Dummy Series generation:
ser_future_values = pd.Series(np.random.normal(0.0, 1.0, len(idx_momentum) * 2))
ser_future_values = ser_future_values[ser_future_values.abs() <= 1.0][: len(idx_momentum)]
ser_momentum = pd.Series(ser_future_values.values, index = idx_momentum)
ser_momentum.index.names = ['Date', 'Importer', 'GICS_Industry_Code']
ser_momentum.name = 'Industry_Momentum'
ser_momentum = ser_momentum.astype('float16')

In [7]:
### TRANSFORMATION TO COUNTRY / EXPORT GICS GROUP MOMENTUM

def get_gics_group_mom(df_group):
#    print(df_group.index[0])
    df_group = df_group.droplevel(['Exporter', 'Importer', 'Commodity_ID', 'Commodity_Group_Code'])
    ser_result = (df_group['Share'] * df_group['Industry_Momentum']).sum() / df_group['Share'].sum()
    return ser_result

def get_gics_country_mom(df_group):
#    print(df_group.index[0])
    df_group = df_group.droplevel(['Exporter', 'Commodity_ID'])
    ser_result = (df_group['Export'] * df_group['Commodity_Momentum']).sum() / df_group['Export'].sum()
    return ser_result

if (os.path.exists(str_path_momentum)):
    os.remove(str_path_momentum)
    
### Looping over monthly Industry Returns Momentum dates:
for iter_date in idx_dates[::-1]:
    gc.collect()
    print('Date to work with: ', iter_date.date())
    ### Dates shifting to find nearest lagged BYearEnd:
    dt_bm_begin = pd.tseries.offsets.BMonthBegin().rollback(iter_date)
    dt_bm_lagged = pd.tseries.offsets.BMonthEnd().rollforward(dt_bm_begin - pd.DateOffset(months=12))
    dt_by_lagged = pd.tseries.offsets.BYearEnd().rollback(dt_bm_lagged)
    print('Date to search in annual datasets:', dt_by_lagged.date())  

    ### Export Distribution Shares Extraction:
    ser_ind_weights_bil = pd.read_hdf(path_or_buf = str_path_unc_ind_weights_hdf, key = str_gics_key, where = "(Date in [dt_by_lagged])")\
                            .droplevel('Date').astype('float16')
    ser_ind_weights_agg = pd.read_hdf(path_or_buf = str_path_unc_ind_weights_agg_hdf, key = str_gics_key, where = "(Date in [dt_by_lagged])")\
                            .droplevel('Date').astype('float16')
    ### WIOT Importers list extraction:
    list_wiot_importers = ser_ind_weights_bil.index.levels[1].to_list()    
    list_wiot_importers.remove('YY')
    list_wiot_importers.remove('WW')    
    print('WIOT Weights Loaded')
    ### Industry Returns Momentum Extraction:
    ser_ind_mom = ser_momentum[iter_date]
    ser_ind_mom_wiot = ser_ind_mom[list_wiot_importers]
    ser_ind_mom_row = ser_ind_mom.loc[~ser_ind_mom.index.get_level_values('Importer').isin(list_wiot_importers)] 
    print('Industry Returns Momentum Loaded')    
    ### Adding Industry Momentum values to Export Distribution Shares:
    df_ind_mom_wiot_wiot = ser_ind_weights_bil.to_frame().join(ser_ind_mom_wiot)
    df_ind_mom_yy_wiot = ser_ind_weights_agg.loc[['YY'], list_wiot_importers].to_frame().join(ser_ind_mom_wiot)
    df_ind_mom_wiot = pd.concat([df_ind_mom_wiot_wiot, df_ind_mom_yy_wiot], axis = 0)    
    del ser_ind_weights_bil
    del ser_ind_mom_wiot    
    del df_ind_mom_wiot_wiot
    del df_ind_mom_yy_wiot
    gc.collect()            
    df_ind_mom_wiot = df_ind_mom_wiot.reorder_levels(['Exporter', 'Importer', 'Commodity_ID', 'Commodity_Group_Code', 'GICS_Industry_Code']).sort_index()
    df_ind_mom_wiot.index = df_ind_mom_wiot.index.set_levels(df_ind_mom_wiot.index.levels[1].astype('category'), level = 'Importer')
    df_ind_mom_wiot.index = df_ind_mom_wiot.index.set_levels(df_ind_mom_wiot.index.levels[4].astype('category'), level = 'GICS_Industry_Code')    
#    df_ind_mom_wiot = df_ind_mom_wiot.loc[['CN', 'MX', 'YY']]
    print('Dataset with WIOT Importers Prepared to Calculate Exporter / Importer / Commodity_ID Momentum')    
    ser_comm_mom_wiot = df_ind_mom_wiot.groupby(['Exporter', 'Importer', 'Commodity_ID', 'Commodity_Group_Code'], observed = True).apply(get_gics_group_mom) 
#    pandarallel.initialize(progress_bar = False)
#    ser_comm_mom_wiot = df_ind_mom_wiot.groupby(['Exporter', 'Importer', 'Commodity_ID', 'Commodity_Group_Code'], observed = True)\
#                                       .parallel_apply(get_gics_group_mom)   
    del df_ind_mom_wiot        
    gc.collect()      
    print('WIOT Importers Exporter / Importer / Commodity_ID Momentum Calculated')    
    df_ind_mom_row = ser_ind_weights_agg.loc[:, 'YY'].to_frame().join(ser_ind_mom_row)
    del ser_ind_weights_agg    
    del ser_ind_mom_row        
    gc.collect()        
    df_ind_mom_row = df_ind_mom_row.reorder_levels(['Exporter', 'Importer', 'Commodity_ID', 'Commodity_Group_Code', 'GICS_Industry_Code']).sort_index()
    df_ind_mom_row.index = df_ind_mom_row.index.set_levels(df_ind_mom_row.index.levels[1].astype('category'), level = 'Importer')
    df_ind_mom_row.index = df_ind_mom_row.index.set_levels(df_ind_mom_row.index.levels[4].astype('category'), level = 'GICS_Industry_Code')    
#    df_ind_mom_row = df_ind_mom_row.loc[['CN', 'MX', 'YY']]
    print('Dataset with RoW Importers Prepared to Calculate Exporter / Importer / Commodity_ID Momentum')        
    ser_comm_mom_row = df_ind_mom_row.groupby(['Exporter', 'Importer', 'Commodity_ID', 'Commodity_Group_Code'], observed = True).apply(get_gics_group_mom)
#    pandarallel.initialize(progress_bar = False)
#    ser_comm_mom_row = df_ind_mom_row.groupby(['Exporter', 'Importer', 'Commodity_ID', 'Commodity_Group_Code'], observed = True)\
#                                     .parallel_apply(get_gics_group_mom)
    del df_ind_mom_row        
    gc.collect()         
    print('RoW Importers Exporter / Importer / Commodity_ID Momentum Calculated')  
    ser_comm_mom = pd.concat([ser_comm_mom_wiot, ser_comm_mom_row], axis = 0).sort_index()
    ser_comm_mom.index = ser_comm_mom.index.set_levels(ser_comm_mom.index.levels[1].astype('category'), level = 'Importer')
    del ser_comm_mom_wiot
    del ser_comm_mom_row
    gc.collect()        
    print('All Importers Exporter / Importer / Commodity_ID Momentum Concatenated')      
    ### ISON membership:
    list_ison_members = ison_membership_converting(str_path_universe, pd.to_datetime(dt_by_lagged))[dt_by_lagged].index.to_list()
    ### Replacing YY as Exporter:
    list_non_wiot_ison = sorted(list(set(list_ison_members) - set(list_wiot_importers)))
    df_comm_mom_yy = ser_comm_mom[['YY']].unstack('Exporter')
    df_comm_mom_yy[list_non_wiot_ison] = np.NaN
    ser_comm_mom_yy = df_comm_mom_yy.ffill(axis = 1).drop('YY', axis = 1).stack('Exporter').astype('float16').reorder_levels([3, 0, 1, 2]).sort_index()
    print('YY as Exporter Momentum Propagated on Rest of ISON Countries')    
    ser_comm_mom_ison = pd.concat([ser_comm_mom, ser_comm_mom_yy], axis = 0).drop('YY', level = 'Exporter') 
    del df_comm_mom_yy
    del ser_comm_mom
    del ser_comm_mom_yy
    gc.collect()
    ser_comm_mom_ison.name = 'Commodity_Momentum'    
    ### UN Comtrade Bilateral Export Flows Extraction:
    ser_unc_export = pd.read_hdf(str_path_export_bilateral, key = str_key_unc_export, where = "Date in [dt_by_lagged]").droplevel(['Date', 'Type']).sort_index() 
    ser_unc_export.index.names = ['Exporter', 'Importer', 'Commodity_ID']
    print('Bilateral Export Flows Loaded')  
    df_comm_mom = ser_comm_mom_ison.to_frame().join(ser_unc_export[ser_unc_export > 0.0]).dropna()
    del ser_comm_mom_ison
    del ser_unc_export
    gc.collect()
    print('Bilateral Export Flows Joined to Exporter / Importer / Commodity_ID Momentum')      
    ser_country_mom = df_comm_mom.groupby(['Exporter', 'Commodity_Group_Code'], observed = True).apply(get_gics_country_mom)
    ser_country_mom.name = 'Momentum'    

    pd.concat([ser_country_mom], keys = [iter_date], names = ['Date'])\
                                                    .to_hdf(str_path_momentum, key = str_key_momentum, mode = 'a', format = 'table', append = True)
    print('Exporter / Industry Group Momentum Calculated & Saved')    
    break

Date to work with:  2023-11-30
Date to search in annual datasets: 2021-12-31
WIOT Weights Loaded
Industry Returns Momentum Loaded
Dataset with WIOT Importers Prepared to Calculate Exporter / Importer / Commodity_ID Momentum
WIOT Importers Exporter / Importer / Commodity_ID Momentum Calculated
Dataset with RoW Importers Prepared to Calculate Exporter / Importer / Commodity_ID Momentum
RoW Importers Exporter / Importer / Commodity_ID Momentum Calculated
All Importers Exporter / Importer / Commodity_ID Momentum Concatenated
YY as Exporter Momentum Propagated on Rest of ISON Countries
Bilateral Export Flows Loaded
Bilateral Export Flows Joined to Exporter / Importer / Commodity_ID Momentum
Exporter / Industry Group Momentum Calculated & Saved


In [8]:
### TEMP

ser_test = pd.read_hdf(str_path_momentum, key = str_key_momentum)

Date        Exporter  Commodity_Group_Code
2023-11-30  AT        3020                    0.061567
                      1510                   -0.030936
                      2020                   -0.038711
                      5020                    0.100973
                      2530                   -0.032308
                                                ...   
            ZM        3030                   -0.086733
                      2520                   -0.066546
                      2010                   -0.078679
                      5010                   -0.018112
                      4520                   -0.064264
Name: Momentum, Length: 1333, dtype: float32

In [None]:
### TEMP

