In [1]:
### WIOT MATRICES DEMONSTRATION

In [2]:
### INITIALIZATION

import pandas as pd
import numpy as np
import os
import gc

In [3]:
### PARAMETERS

### Date Range defining:
str_year_start = '1994'
### MultiIndex level slice constant:
All = slice(None)
### Commodity to Industry shares:
str_path_unc_ind_weights_hdf = 'Data_Files/Result_Files/unc_ind_weights.h5'
str_path_unc_ind_weights_agg_hdf = 'Data_Files/Result_Files/unc_ind_weights_agg.h5'
str_gics_key = 'gics_io'
### Product / Industry mapping path:
str_path_matrix_map = 'Data_Files/Source_Files/WIOT_mapping_detailed.xlsx'
str_sheet_matrix = 'GICS 2018'
### Augmented bilateral export:
str_path_export_bilateral = 'Data_Files/Source_Files/comtrade_export_bilateral.h5'
str_key_unc_export = 'export_augmented'

In [4]:
### DEFINING COUNTRY CODES EXTRACTOR

def get_country_codes(use_local_copy = False):  
    ### In case if URL is unavailable:
    if (use_local_copy):
        url_country_code = 'Data_Files/Source_Files/countrycode.html'
    ### Online extraction:
    else:
        url_country_code = 'https://countrycode.org/'
    df_full_codes = pd.read_html(url_country_code, index_col = 'COUNTRY')[0]
    df_full_codes[['ISO SHORT', 'ISO LONG']] = df_full_codes['ISO CODES'].str.split(' / ', expand = True)
    df_result = df_full_codes[['ISO SHORT', 'ISO LONG']].sort_index()    
    df_result.index = df_result.index.str.upper()
    ### Results output:
    return df_result

### World Country Codes:
df_country_codes = get_country_codes()

In [5]:
### DUMMY INDUSTRY RETURNS MOMENTUM GENERATION

### Index of dates:
idx_dates = pd.date_range(start = str_year_start, end = pd.to_datetime('today'), freq = 'BM')
### List of countries:
list_countries = df_country_codes['ISO SHORT'].to_list()# + ['WW']
### List of GICS Industries:
list_industries = pd.read_excel(engine = 'openpyxl', io = str_path_matrix_map, sheet_name = str_sheet_matrix, dtype = str, skiprows = 4, header = [0], 
                                usecols = [4], index_col = None).dropna().squeeze().values
### MutiIndex creation:
idx_momentum = pd.MultiIndex.from_product([idx_dates, list_countries, list_industries])
### Dummy Series generation:
ser_future_values = pd.Series(np.random.normal(0.0, 1.0, len(idx_momentum) * 2))
ser_future_values = ser_future_values[ser_future_values.abs() <= 1.0][: len(idx_momentum)]
ser_momentum = pd.Series(ser_future_values.values, index = idx_momentum)
ser_momentum.index.names = ['Date', 'Importer', 'GICS_Industry_Code']
ser_momentum.name = 'Industry_Momentum'
ser_momentum = ser_momentum.astype('float16')

In [6]:
### TEMP

### Looping over monthly Industry Returns Momentum dates:
for iter_date in idx_dates[::-1]:
    gc.collect()
    print('Date to work with: ', iter_date.date())
    ### Dates shifting to find nearest lagged BYearEnd:
    dt_bm_begin = pd.tseries.offsets.BMonthBegin().rollback(iter_date)
    dt_bm_lagged = pd.tseries.offsets.BMonthEnd().rollforward(dt_bm_begin - pd.DateOffset(months=12))
    dt_by_lagged = pd.tseries.offsets.BYearEnd().rollback(dt_bm_lagged)
    print('Date to search in annual datasets:', dt_by_lagged.date())  

    ### Export Distribution Shares Extraction:
    ser_ind_weights_bil = pd.read_hdf(path_or_buf = str_path_unc_ind_weights_hdf, key = str_gics_key, where = "(Date in [dt_by_lagged])")\
                            .droplevel('Date').astype('float16')
    ser_ind_weights_agg = pd.read_hdf(path_or_buf = str_path_unc_ind_weights_agg_hdf, key = str_gics_key, where = "(Date in [dt_by_lagged])")\
                            .droplevel('Date').astype('float16')
    ### WIOT Importers list extraction:
    list_wiot_importers = ser_ind_weights_bil.index.levels[1].to_list()    
    list_wiot_importers.remove('YY')
    list_wiot_importers.remove('WW')    
    ### Industry Returns Momentum Extraction:
    ser_ind_mom = ser_momentum[iter_date]
    ser_ind_mom_wiot = ser_ind_mom[list_wiot_importers]
    ser_ind_mom_row = ser_ind_mom.loc[~ser_ind_mom.index.get_level_values('Importer').isin(list_wiot_importers)] 
    ### Adding Industry Momentum values to Export Distribution Shares:
    df_ind_mom_wiot = ser_ind_weights_bil.to_frame().join(ser_ind_mom_wiot)
    del ser_ind_weights_bil
    del ser_ind_mom_wiot        
    gc.collect()        
    df_ind_mom_wiot = df_ind_mom_wiot.reorder_levels(['Exporter', 'Importer', 'Commodity_ID', 'Commodity_Group_Code', 'GICS_Industry_Code']).sort_index()
    df_ind_mom_row = ser_ind_weights_agg.loc[:, 'YY'].to_frame().join(ser_ind_mom_row)
    del ser_ind_weights_agg    
    del ser_ind_mom_row        
    gc.collect()        
    df_ind_mom_row = df_ind_mom_row.reorder_levels(['Exporter', 'Importer', 'Commodity_ID', 'Commodity_Group_Code', 'GICS_Industry_Code']).sort_index()
    df_ind_mom_all = pd.concat([df_ind_mom_row, df_ind_mom_wiot], axis = 0)  
    df_ind_mom_all.index = df_ind_mom_all.index.set_levels(df_ind_mom_all.index.levels[1].astype('category'), level = 'Importer')
    df_ind_mom_all.index = df_ind_mom_all.index.set_levels(df_ind_mom_all.index.levels[4].astype('category'), level = 'GICS_Industry_Code')
    df_ind_mom_all = df_ind_mom_all.sort_index()
    del df_ind_mom_wiot
    del df_ind_mom_row
    gc.collect()            
#    ### UN Comtrade Bilateral Export Flows Extraction:
#    ser_unc_export = pd.read_hdf(str_path_export_bilateral, key = str_key_unc_export, where = "Date in [dt_by_lagged]").droplevel(['Date', 'Type']).sort_index() 
#    ser_unc_export.index.names = ['Exporter', 'Importer', 'Commodity_ID']
    
    break

Date to work with:  2023-10-31
Date to search in annual datasets: 2021-12-31


In [None]:
### TEMP

gc.collect()
df_group_mom_all = df_ind_mom_all.groupby(['Exporter', 'Importer', 'Commodity_ID', 'Commodity_Group_Code'], observed = True)\
                                 .apply(lambda df_i: df_i['Share'] * df_i['Industry_Momentum'] / df_i['Share'].sum())

In [14]:
### TEMP

gc.collect()
df_ind_mom_all['GICS_Group_Code'] = df_ind_mom_all.index.get_level_values('GICS_Industry_Code').str[: 4].astype('category')
#df_ind_mom_all = df_ind_mom_all.set_index('GICS_Group_Code', append = True)
df_ind_mom_all.dtypes

Share                 float16
Industry_Momentum     float16
GICS_Group_Code      category
dtype: object

In [4]:
### DATA EXTRACTION

for iter_date in [list_dates[-2]]:
    ser_ind_weights_bil = pd.read_hdf(path_or_buf = str_path_unc_ind_weights_hdf, key = str_gics_key, where = "(Date in [iter_date])")#.droplevel('Date')
    ser_ind_weights_agg = pd.read_hdf(path_or_buf = str_path_unc_ind_weights_agg_hdf, key = str_gics_key, where = "(Date in [iter_date])")#.droplevel('Date')  
    ser_ind_weights = pd.concat([ser_ind_weights_bil, ser_ind_weights_agg], axis = 0).sort_index()
    break

In [7]:
### TEMP

ser_ind_weights = pd.concat([ser_ind_weights_bil, ser_ind_weights_agg], axis = 0).sort_index()

In [8]:
### TEMP

ser_ind_weights

Date        Exporter  Importer  Commodity_ID  Commodity_Group_Code  GICS_Industry_Code
2021-12-31  AT        AU        04            3020                  151030                0.006100
                                                                    151050                0.002773
                                                                    201030                0.001387
                                                                    201070                0.007283
                                                                    202020                0.009713
                                                                                            ...   
            WW        US        97            2520                  551020                0.000135
                                                                    551030                0.000135
                                                                    551050                0.000270
                      