In [1]:
### COMTRADE DATASETS EXTRACTING

In [2]:
### RUN EVERY TIME: INITIALIZATION

import pandas as pd
import numpy as np
import requests
import gc
import os
import time

In [3]:
### DISABLING OF WARNINGS

import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

In [4]:
### CONSTANTS (RESEARCH VERSION ONLY)

### Universe path:
str_path_universe = 'Data_Files/Source_Files/acadian_universe.xlsx'
### Service codes convertion path:
str_path_ebops = 'Data_Files/Source_Files/goods_to_industries_2023.xlsx'
str_ebops_2002 = 'EBOPS_2002'
str_ebops_2010 = 'EBOPS_2010'
### NA for MS Excel files:
list_na_excel_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null',
                        '#N/A Requesting Data...', '#N/A Invalid Security', '#N/A Field Not Applicable', '---']
### UN Comtrade adopted data containers:
str_path_unc_res_all_annual = 'Data_Files/Source_Files/unc_res_all_annual.h5'
str_path_unc_eb_am_services_annual = 'Data_Files/Source_Files/unc_eb_am_services_annual.h5'
str_path_unc_eb_pm_services_annual = 'Data_Files/Source_Files/unc_eb_pm_services_annual.h5'
str_path_unc_eb10_pm_services_annual = 'Data_Files/Source_Files/unc_eb10_pm_services_annual.h5'
str_key_unc_res = 'unc_res'
### File with aggregated flows:
str_path_unc_res_flows = 'Data_Files/Source_Files/unc_res_flows.h5'
### Universal HDF5 key:
str_key_unc_res = 'unc_res'
### Augmented bilateral export:
str_path_export_bilateral = 'Data_Files/Source_Files/comtrade_export_bilateral.h5'
### Export key:
str_key_unc_export = 'export_augmented'
### Augmented bilateral import:
str_path_import_bilateral = 'Data_Files/Source_Files/comtrade_import_bilateral.h5'
### Import key:
str_key_unc_import = 'import_augmented'

In [5]:
### MAIN CONSTANTS

### Dates:
date_start = pd.Timestamp('1989-12-29')
date_end = pd.Timestamp('2022-12-30')

In [13]:
### EBOPS SERVICE'S CODES PREPARATION

### EBOPS 2010 codes:
list_ebops_2010 = pd.read_excel(engine = 'openpyxl', io = str_path_ebops, sheet_name = str_ebops_2010, header = 0, index_col = 0, 
                               na_values = list_na_excel_values + ['None'], keep_default_na = False)['GICS Group Code'].dropna().sort_index().index.to_list()
### EBOPS 2002 codes mapped with EBOPS 2010:
ser_ebops_2002 = pd.read_excel(engine = 'openpyxl', io = str_path_ebops, sheet_name = str_ebops_2002, header = 0, index_col = 0, dtype = str,
                               na_values = list_na_excel_values + ['None'], keep_default_na = False)['EBOPS 2010 Correspondent ID'].dropna().sort_index()
ser_ebops_2002.index = ser_ebops_2002.index.astype(str)
ser_ebops_2002 = ser_ebops_2002[ser_ebops_2002.isin(list_ebops_2010)]
ser_ebops_2002.name = 'ebops_mapping'

In [14]:
### DEFINING & LAUNCH COUNTRY CODES EXTRACTOR (RESEARCH VERSION ONLY)

def get_country_codes(use_local_copy = False):  
    ### In case if URL is unavailable:
    if (use_local_copy):
        url_country_code = 'Data_Files/Source_Files/countrycode.html'
    ### Online extraction:
    else:
        url_country_code = 'https://countrycode.org/'
    df_full_codes = pd.read_html(url_country_code, index_col = 'COUNTRY')[0]
    df_full_codes[['ISO SHORT', 'ISO LONG']] = df_full_codes['ISO CODES'].str.split(' / ', expand = True)
    df_result = df_full_codes[['ISO SHORT', 'ISO LONG']].sort_index()    
    df_result.index = df_result.index.str.upper()
    ### Results output:
    return df_result

### World Country Codes:
df_country_codes = get_country_codes()

In [15]:
### UN COMTRADE: CODELISTS LOADING (PRODUCT VERSION)

### Codelists container:
dict_codelist = {}
### List of reference tables for categories (request parameters) loading:
request_session = requests.Session()
obj_unc_reference = request_session.get('https://comtradeapi.un.org/files/v1/app/reference/ListofReferences.json')
df_cat_reference = pd.DataFrame(obj_unc_reference.json()['results']).set_index('category')
### Parameters:
ser_type_code = pd.Series(['Goods', 'Services'], index = ['C', 'S'])
ser_type_code.index.names = ['id']
ser_type_code.name = 'typeCode'
dict_codelist['typeCode'] = ser_type_code
### Commodity HS Codes:
df_hs_comm = pd.DataFrame(request_session.get(df_cat_reference.loc['cmd:HS']['fileuri']).json()['results'])
### Filtering needed level of coomodity groups aggregation:
ser_hs_comm_ag2 = df_hs_comm[df_hs_comm['aggrLevel'] == 2].drop(['parent', 'isLeaf', 'aggrLevel'], axis = 1).set_index('id').squeeze().str[5: ]
ser_hs_comm_ag2.name = 'clCode'
dict_codelist['clCode'] = {}
dict_codelist['clCode']['C'] = ser_hs_comm_ag2
### Service EBOPS 2002 Codes:
df_eb_serv_02 = pd.DataFrame(request_session.get(df_cat_reference.loc['cmd:EB02']['fileuri']).json()['results'])
### Filtering needed level of services groups aggregation:
df_eb_serv_ag2 = df_eb_serv_02[df_eb_serv_02['parent'].isin(df_eb_serv_02[df_eb_serv_02['parent'] == '200']['id'])]
ser_eb_serv_ag2 = df_eb_serv_02[df_eb_serv_02['id'].isin(ser_ebops_2002.index)].set_index('id')['text']
ser_eb_serv_ag2.name = 'clCode'
dict_codelist['clCode']['S_old'] = ser_eb_serv_ag2
### Service EBOPS 2010 Codes:
df_eb_serv_02 = pd.DataFrame(request_session.get(df_cat_reference.loc['cmd:EB10']['fileuri']).json()['results'])
### Filtering needed level of services groups aggregation:
df_eb_serv_ag2 = df_eb_serv_02[df_eb_serv_02['parent'].isin(df_eb_serv_02[df_eb_serv_02['parent'] == '200']['id'])]
ser_eb_serv_ag2 = df_eb_serv_02[df_eb_serv_02['id'].isin(list_ebops_2010)].set_index('id')['text']
ser_eb_serv_ag2.name = 'clCode'
dict_codelist['clCode']['S_new'] = ser_eb_serv_ag2
### United codes:
dict_codelist['clCode']['T'] = pd.concat([dict_codelist['clCode']['C'], dict_codelist['clCode']['S_old'], dict_codelist['clCode']['S_new']], axis = 0)
### Reporter Codes:
df_reporter_raw = pd.DataFrame(request_session.get(df_cat_reference.loc['reporter']['fileuri']).json()['results'])
df_reporter_raw['id'] = df_reporter_raw['id'].astype(str).str.zfill(3)
df_reporter_raw['entryEffectiveDate'] = pd.to_datetime(df_reporter_raw['entryEffectiveDate'])
df_reporter_raw['entryExpiredDate'] = pd.to_datetime(df_reporter_raw['entryExpiredDate'])
### Reporters filtering to exclude aggregated and regional values (need to be replaced with SQL-based code):
df_reporter_raw = df_reporter_raw[df_reporter_raw['reporterCodeIsoAlpha2'].isin(df_country_codes['ISO SHORT'])]
### Non-actual country codes filtering out:
df_reporter_raw = df_reporter_raw[df_reporter_raw['entryExpiredDate'].isna() | (df_reporter_raw['entryExpiredDate'] > date_start)]
ser_reporter_code = df_reporter_raw.set_index('id')['reporterCodeIsoAlpha2'].squeeze()
ser_reporter_code.name = 'Reporter'
dict_codelist['reporterCode'] = ser_reporter_code
### Partner Codes:
df_partner_raw = pd.DataFrame(request_session.get(df_cat_reference.loc['partner']['fileuri']).json()['results'])
df_partner_raw['id'] = df_partner_raw['id'].astype(str).str.zfill(3)
df_partner_raw['entryEffectiveDate'] = pd.to_datetime(df_partner_raw['entryEffectiveDate'])
df_partner_raw['entryExpiredDate'] = pd.to_datetime(df_partner_raw['entryExpiredDate'])
### Partners filtering to exclude aggregated and regional values (need to be replaced with SQL-based code):
df_partner_raw = df_partner_raw[df_partner_raw['PartnerCodeIsoAlpha2'].isin(df_country_codes['ISO SHORT'])]
### Non-actual country codes filtering out:
df_partner_raw = df_partner_raw[df_partner_raw['entryExpiredDate'].isna() | (df_partner_raw['entryExpiredDate'] > date_start)]
ser_partner_code = df_partner_raw.set_index('id')['PartnerCodeIsoAlpha2'].squeeze()
ser_partner_code.name = 'Partner'
dict_codelist['partnerCode'] = ser_partner_code
### Trade Flow Codes:
ser_flow_code = pd.DataFrame(request_session.get(df_cat_reference.loc['flow']['fileuri']).json()['results']).set_index('id').squeeze()
ser_flow_code.name = 'flowCode'
dict_codelist['flowCode'] = ser_flow_code

In [16]:
### UN COMTRADE: DATA REQUEST EXECUTION

def get_un_comtrade_data(str_type, str_freq, str_classification, str_trade_flow, list_reporters, list_partners, list_commodities, list_periods, str_api_key, 
                         int_limit = 99999):
    ### Request preparation:
    str_url_base = 'https://comtradeapi.un.org/data/v1/get/'
    str_url_request = str_url_base + str_type + '/'
    str_url_request = str_url_request + str_freq + '/' 
    str_url_request = str_url_request + str_classification + '?'   
    str_url_request = str_url_request + 'flowCode=' + str_trade_flow 
    str_url_request = str_url_request + '&reporterCode=' + ','.join(list_reporters)
    str_url_request = str_url_request + '&partnerCode=' + ','.join(list_partners)    
    str_url_request = str_url_request + '&cmdCode=' + ','.join(list_commodities)
    str_url_request = str_url_request + '&customsCode=' + 'C00'
    str_url_request = str_url_request + '&motCode=' + '0'    
    str_url_request = str_url_request + '&partner2Code=' + '000'       
    str_url_request = str_url_request + '&period=' + ','.join(list_periods)  
    str_url_request = str_url_request + '&maxrecords=' + str(int_limit)
    ### Request sending:
    bool_loaded = False
    while (not bool_loaded):
        request_session = requests.Session()
        dict_request_headers = {}
        dict_request_headers['Cache-Control'] = 'no-cache'
        dict_request_headers['Ocp-Apim-Subscription-Key'] = str_api_key
        request_session.headers.update(dict_request_headers)
        ### Respond processing:
        print(str_url_request)    
        obj_unc_dataset = request_session.get(str_url_request)
#        print(obj_unc_dataset.json())
        ### Request error marker:
        if not ('count' in obj_unc_dataset.json()):
#            print(obj_unc_dataset.json())
            print(obj_unc_dataset.json()['error'])
            int_dataset_length = -1
        else:
            int_dataset_length = obj_unc_dataset.json()['count']
        ### Respond result transformation:
        if (int_dataset_length > 0):
            df_dataset_raw = pd.DataFrame(obj_unc_dataset.json()['data'])
            ### Selecting columns:
            df_dataset_res = df_dataset_raw[['flowCode', 'typeCode', 'period', 'reporterCode', 'partnerCode', 'cmdCode', 'primaryValue']]        
            ### Replacing Flow codes to Flow names with categorization:
            df_dataset_res.loc[:, 'Flow'] = df_dataset_res['flowCode'].replace(dict_codelist['flowCode']).astype('category').values
            ### Expanding categorical list to full list of possible values:
            df_dataset_res['Flow'].cat.set_categories(sorted(dict_codelist['flowCode'].values), ordered = True, inplace = True)
            ### Replacing flow codes to flow names with categorization:
            df_dataset_res.loc[:, 'Type'] = df_dataset_res['typeCode'].replace(dict_codelist['typeCode']).astype('category').values
            ### Expanding categorical list to full list of possible values:
            df_dataset_res['Type'].cat.set_categories(sorted(dict_codelist['typeCode'].values), ordered = True, inplace = True)
            ### Year to Date transformation:
            df_dataset_res.loc[:, 'Date'] = (pd.to_datetime(df_dataset_raw['period']) + pd.offsets.BYearEnd()).values
            ### Replacing Reporter codes to ISON IDs with categorization:
            df_dataset_res.loc[:, 'Reporter'] = df_dataset_res['reporterCode'].astype(str).str.zfill(3).replace(dict_codelist['reporterCode'])\
                                                                                                       .astype('category').values
            ### Expanding categorical list to full list of possible values:
            df_dataset_res['Reporter'].cat.set_categories(sorted(dict_codelist['partnerCode'].unique()), ordered = True, inplace = True)
            ### Replacing Partner codes to ISON IDs with categorization:
            df_dataset_res.loc[:, 'Partner'] = df_dataset_res['partnerCode'].astype(str).str.zfill(3).replace(dict_codelist['partnerCode']).astype('category').values
            ### Expanding categorical list to full list of possible values:
            df_dataset_res['Partner'].cat.set_categories(sorted(dict_codelist['partnerCode'].unique()), ordered = True, inplace = True)
            ### Commodity Type categorization:
#            if ((str_type == 'S') & (list_commodities[0].find('.') == -1)):
#                df_dataset_res['cmdCode'] = df_dataset_res['cmdCode'].replace(ser_ebops_2002.to_dict())
            df_dataset_res.loc[:, 'Commodity_ID'] = df_dataset_res['cmdCode'].astype('category').values
            ### Expanding categorical list to full list of possible values:
            df_dataset_res['Commodity_ID'].cat.set_categories(sorted(dict_codelist['clCode']['T'].index), ordered = True, inplace = True)
            ### Values scaling and transformation to integer:
            df_dataset_res.loc[:, 'Value'] = (df_dataset_res['primaryValue'] / 1000).astype('int32').values
            ### Data clearing:
            df_dataset_res.drop(df_dataset_res[(df_dataset_res['Reporter'] == 'SA') & (df_dataset_res['Partner'] == 'TW')].index, inplace = True)
            df_dataset_res.drop(df_dataset_res[df_dataset_res['Reporter'] == df_dataset_res['Partner']].index, inplace = True)
            df_dataset_res['Value'].clip(lower = 0, inplace = True)
            ### Dropping extra columns:
            df_dataset_res = df_dataset_res[['Date', 'Reporter', 'Partner', 'Flow', 'Type', 'Commodity_ID', 'Value']]#.dropna()
            print('Loaded Observations Number:', int_dataset_length)
            bool_loaded = True
        elif (int_dataset_length == 0):
            print('Empty Dataset')
            bool_loaded = True            
            df_dataset_res = None            
        else:
            print('Loading Error. Let\'s try once more...')
    return df_dataset_res

In [10]:
### UN COMTRADE: GOODS DATA REQUEST PARAMETERS

### Primary key to authorize:
#str_primary_key = 'e690550ab9414234a6b705220596677a'
str_primary_key = 'd79c218e2e9d464fade1810fd14347d8'
#str_primary_key = '3f0b8d53d71b401e840d58c90a283176'
### Type: Goods
str_goods_type = 'C'
### Annual frequency:
str_freq = 'A'
### Goods classification:
str_goods_class = 'HS'
### Flow: Export
str_export_flow = 'X'
### Flow: Import
str_import_flow = 'M'
### Reporters list:
list_un_reporters = dict_codelist['reporterCode'].index.to_list()
### Partners list:
list_un_partners = dict_codelist['partnerCode'].index.to_list()
### Goods classification codes:
list_un_goods_ag2 = dict_codelist['clCode'][str_goods_type].index.to_list()
### Years to collect data:
list_periods = list(map(str, range(date_start.year, date_end.year + 1)))
### Request tuning:
int_pause_short_sec = 10
int_pause_long_sec = 10
int_goods_period_portion = 5

In [11]:
### GOODS ONLY DATA LOADING ENGINE

gc.collect()
### Checking of file status & loading last observation saved:
if (os.path.exists(str_path_unc_res_all_annual)):
    ser_last_row = pd.read_hdf(str_path_unc_res_all_annual, key = str_key_unc_res, start = -1)
    str_last_comm_id = ser_last_row.index[0][5]
    date_last_year = ser_last_row.index[0][0]
    bool_break_flag = True
    print('Last saved observation options:', date_last_year.year, '/', str_last_comm_id)
else:
    date_last_year = pd.to_datetime('1900-01-01')
    print(date_last_year.year)
    bool_break_flag = False
    
### Looping over period portions:
for iter_portion in range(-(-len(list_periods) // int_goods_period_portion)):
    gc.collect()
    ### Selecting periods:
    list_iter_periods = list_periods[iter_portion *  int_goods_period_portion : (iter_portion + 1) *  int_goods_period_portion]
    if (int(list_iter_periods[-1]) < date_last_year.year):
        continue
    else:         
        ### Commodities data loading:
        for iter_comm_id in list_un_goods_ag2:
            ### Starting point searching:
            if bool_break_flag:
                if (list_un_goods_ag2.index(str_last_comm_id) < list_un_goods_ag2.index(iter_comm_id)):
                    continue
                elif (list_un_goods_ag2.index(str_last_comm_id) == list_un_goods_ag2.index(iter_comm_id)):
                    bool_break_flag = False
                    continue
            ### Loading procedure:
            else:
                ### Container initialization:
                list_un_collection = []                
                ### Export Data Requests:                
                print(iter_comm_id, '/', list_iter_periods, '/ Export: Loading through the API')
                ### Export of Goods:
                df_iter_dataset = get_un_comtrade_data(str_goods_type, str_freq, str_goods_class, str_export_flow, list_un_reporters, list_un_partners, 
                                                       [iter_comm_id], list_iter_periods, str_primary_key)
                ### Adding dataset to container:
                if (df_iter_dataset is not None):
                    list_un_collection.append(df_iter_dataset)
                    time.sleep(int_pause_short_sec)
                else:
                    time.sleep(int_pause_long_sec) 
                ### Import Data Requests:
                print(iter_comm_id, '/', list_iter_periods, '/ Import: Loading through the API')
                ### Import of Goods:
                df_iter_dataset = get_un_comtrade_data(str_goods_type, str_freq, str_goods_class, str_import_flow, list_un_reporters, list_un_partners, 
                                                       [iter_comm_id], list_iter_periods, str_primary_key)
                ### Adding dataset to container:
                if (df_iter_dataset is not None):
                    list_un_collection.append(df_iter_dataset)
                    time.sleep(int_pause_short_sec)
                else:
                    time.sleep(int_pause_long_sec)             
                ### Downloaded Data concatenation and indexation:
                if (len(list_un_collection) > 0):
                    df_full_dataset = pd.concat(list_un_collection, axis = 0, sort = False, ignore_index = True)
                    ser_full_dataset = df_full_dataset.set_index(['Date', 'Reporter', 'Partner', 'Flow', 'Type', 'Commodity_ID']).squeeze().sort_index()
                    ### Dataset saving (need to be replaced with SQL Request):
                    ser_full_dataset.to_hdf(path_or_buf = str_path_unc_res_all_annual, key = str_key_unc_res, mode = 'a', format = 'table', complevel = 9, 
                                            append = True)
                    print(iter_comm_id, '/', list_iter_periods, ': Flows saved to database')
                else:
                    print(iter_comm_id, '/', list_iter_periods, ': Both flow\'s datasets are empty')
#            break
#    break

Last saved observation options: 2022 / 99


In [21]:
### UN COMTRADE: OLD STYLE CLASSIFIED SERVICES DATA REQUEST PARAMETERS

### Primary key to authorize:
str_primary_key = 'e690550ab9414234a6b705220596677a'
#str_primary_key = 'd79c218e2e9d464fade1810fd14347d8'
#str_primary_key = '3f0b8d53d71b401e840d58c90a283176'
### Type: Services
str_services_type = 'S'
### Annual frequency:
str_freq = 'A'
### Services classification:
str_services_class = 'EB'
### Flow: Export
str_export_flow = 'X'
### Flow: Import
str_import_flow = 'M'
### Reporters list:
list_un_reporters = dict_codelist['reporterCode'].index.to_list()
### Partners list:
list_un_partners = dict_codelist['partnerCode'].index.to_list()
### Services classification codes:
list_un_services_ag2 = dict_codelist['clCode']['S_old'].index.to_list()
### Years to collect data:
list_periods = list(map(str, range(2000, 2014)))
### Request tuning:
int_pause_short_sec = 10
int_pause_long_sec = 10
int_services_period_portion = 10

In [23]:
### OLD STYLE CLASSIFIED SERVICES ONLY DATA LOADING ENGINE

gc.collect()
### Checking of file status & loading last observation saved:
if (os.path.exists(str_path_unc_eb_am_services_annual)):
    ser_last_row = pd.read_hdf(str_path_unc_eb_am_services_annual, key = str_key_unc_res, start = -1)
    str_last_comm_id = ser_last_row.index[0][5]
    date_last_year = ser_last_row.index[0][0]
    bool_break_flag = True
    print('Last saved observation options:', date_last_year.year, '/', str_last_comm_id)
else:
    date_last_year = pd.to_datetime('1900-01-01')
    print(date_last_year.year)
    bool_break_flag = False
    
### Looping over period portions:
for iter_portion in range(-(-len(list_periods) // int_services_period_portion)):
    gc.collect()
    ### Selecting periods:
    list_iter_periods = list_periods[iter_portion *  int_services_period_portion : (iter_portion + 1) *  int_services_period_portion]
    if (int(list_iter_periods[-1]) < date_last_year.year):
        continue
    else:         
        ### Commodities data loading:
        for iter_comm_id in list_un_services_ag2:
            ### Starting point searching:
            if bool_break_flag:
                if (list_un_services_ag2.index(str_last_comm_id) < list_un_services_ag2.index(iter_comm_id)):
                    continue
                elif (list_un_services_ag2.index(str_last_comm_id) == list_un_services_ag2.index(iter_comm_id)):
                    bool_break_flag = False
                    continue
            ### Loading procedure:
            else:
                ### Container initialization:
                list_un_collection = []                
                ### Export Data Requests:                
                print(iter_comm_id, '/', list_iter_periods, '/ Export: Loading through the API')
                ### Export of Goods:
                df_iter_dataset = get_un_comtrade_data(str_services_type, str_freq, str_services_class, str_export_flow, list_un_reporters, list_un_partners, 
                                                       [iter_comm_id], list_iter_periods, str_primary_key)
                ### Adding dataset to container:
                if (df_iter_dataset is not None):
                    list_un_collection.append(df_iter_dataset)
                    time.sleep(int_pause_short_sec)
                else:
                    time.sleep(int_pause_long_sec) 
                ### Import Data Requests:
                print(iter_comm_id, '/', list_iter_periods, '/ Import: Loading through the API')
                ### Import of Goods:
                df_iter_dataset = get_un_comtrade_data(str_services_type, str_freq, str_services_class, str_import_flow, list_un_reporters, list_un_partners, 
                                                       [iter_comm_id], list_iter_periods, str_primary_key)
                ### Adding dataset to container:
                if (df_iter_dataset is not None):
                    list_un_collection.append(df_iter_dataset)
                    time.sleep(int_pause_short_sec)
                else:
                    time.sleep(int_pause_long_sec)             
                ### Downloaded Data concatenation and indexation:
                if (len(list_un_collection) > 0):
                    df_full_dataset = pd.concat(list_un_collection, axis = 0, sort = False, ignore_index = True)
                    ser_full_dataset = df_full_dataset.set_index(['Date', 'Reporter', 'Partner', 'Flow', 'Type', 'Commodity_ID']).squeeze().sort_index()
                    ### Dataset saving (need to be replaced with SQL Request):
                    ser_full_dataset.to_hdf(path_or_buf = str_path_unc_eb_am_services_annual, key = str_key_unc_res, mode = 'a', format = 'table', complevel = 9, 
                                            append = True)
                    print(iter_comm_id, '/', list_iter_periods, ': Flows saved to database')
                else:
                    print(iter_comm_id, '/', list_iter_periods, ': Both flow\'s datasets are empty')
#            break
#    break

Last saved observation options: 2013 / 289


In [19]:
### UN COMTRADE: NEW STYLE CLASSIFIED AS REPORTED SERVICES DATA REQUEST PARAMETERS

### Primary key to authorize:
#str_primary_key = 'e690550ab9414234a6b705220596677a'
str_primary_key = 'd79c218e2e9d464fade1810fd14347d8'
#str_primary_key = '3f0b8d53d71b401e840d58c90a283176'
### Type: Services
str_services_type = 'S'
### Annual frequency:
str_freq = 'A'
### Services classification:
str_services_class = 'EB'
### Flow: Export
str_export_flow = 'X'
### Flow: Import
str_import_flow = 'M'
### Reporters list:
list_un_reporters = dict_codelist['reporterCode'].index.to_list()
### Partners list:
list_un_partners = dict_codelist['partnerCode'].index.to_list()
### Services classification codes:
list_un_services_ag2 = dict_codelist['clCode']['S_new'].index.to_list()
### Responded as invalide codes:
list_un_services_ag2.remove('3.10')
list_un_services_ag2.remove('3.11')
### Years to collect data:
list_periods = list(map(str, range(2010, date_end.year + 1)))
### Request tuning:
int_pause_short_sec = 10
int_pause_long_sec = 10
int_services_period_portion = 10

In [21]:
### NEW STYLE CLASSIFIED AS REPORTED SERVICES ONLY DATA LOADING ENGINE

gc.collect()
### Checking of file status & loading last observation saved:
if (os.path.exists(str_path_unc_eb_pm_services_annual)):
    ser_last_row = pd.read_hdf(str_path_unc_eb_pm_services_annual, key = str_key_unc_res, start = -1)
    str_last_comm_id = ser_last_row.index[0][5]
    date_last_year = ser_last_row.index[0][0]
    bool_break_flag = True
    print('Last saved observation options:', date_last_year.year, '/', str_last_comm_id)
else:
    date_last_year = pd.to_datetime('1900-01-01')
    print(date_last_year.year)
    bool_break_flag = False
    
### Looping over period portions:
for iter_portion in range(-(-len(list_periods) // int_services_period_portion)):
    gc.collect()
    ### Selecting periods:
    list_iter_periods = list_periods[iter_portion *  int_services_period_portion : (iter_portion + 1) *  int_services_period_portion]
    if (int(list_iter_periods[-1]) < date_last_year.year):
        continue
    else:         
        ### Commodities data loading:
        for iter_comm_id in list_un_services_ag2:
            ### Starting point searching:
            if bool_break_flag:
                if (list_un_services_ag2.index(str_last_comm_id) < list_un_services_ag2.index(iter_comm_id)):
                    continue
                elif (list_un_services_ag2.index(str_last_comm_id) == list_un_services_ag2.index(iter_comm_id)):
                    bool_break_flag = False
                    continue
            ### Loading procedure:
            else:
                ### Container initialization:
                list_un_collection = []                
                ### Export Data Requests:                
                print(iter_comm_id, '/', list_iter_periods, '/ Export: Loading through the API')
                ### Export of Goods:
                df_iter_dataset = get_un_comtrade_data(str_services_type, str_freq, str_services_class, str_export_flow, list_un_reporters, list_un_partners, 
                                                       [iter_comm_id], list_iter_periods, str_primary_key)
                ### Adding dataset to container:
                if (df_iter_dataset is not None):
                    list_un_collection.append(df_iter_dataset)
                    time.sleep(int_pause_short_sec)
                else:
                    time.sleep(int_pause_long_sec) 
                ### Import Data Requests:
                print(iter_comm_id, '/', list_iter_periods, '/ Import: Loading through the API')
                ### Import of Goods:
                df_iter_dataset = get_un_comtrade_data(str_services_type, str_freq, str_services_class, str_import_flow, list_un_reporters, list_un_partners, 
                                                       [iter_comm_id], list_iter_periods, str_primary_key)
                ### Adding dataset to container:
                if (df_iter_dataset is not None):
                    list_un_collection.append(df_iter_dataset)
                    time.sleep(int_pause_short_sec)
                else:
                    time.sleep(int_pause_long_sec)             
                ### Downloaded Data concatenation and indexation:
                if (len(list_un_collection) > 0):
                    df_full_dataset = pd.concat(list_un_collection, axis = 0, sort = False, ignore_index = True)
                    ser_full_dataset = df_full_dataset.set_index(['Date', 'Reporter', 'Partner', 'Flow', 'Type', 'Commodity_ID']).squeeze().sort_index()
                    ### Dataset saving (need to be replaced with SQL Request):
                    ser_full_dataset.to_hdf(path_or_buf = str_path_unc_eb_pm_services_annual, key = str_key_unc_res, mode = 'a', format = 'table', complevel = 9, 
                                            append = True)
                    print(iter_comm_id, '/', list_iter_periods, ': Flows saved to database')
                else:
                    print(iter_comm_id, '/', list_iter_periods, ': Both flow\'s datasets are empty')
#            break
#    break

Last saved observation options: 2021 / 9.3


In [22]:
### UN COMTRADE: NEW STYLE CLASSIFIED EB10 SERVICES DATA REQUEST PARAMETERS

### Primary key to authorize:
str_primary_key = 'e690550ab9414234a6b705220596677a'
#str_primary_key = 'd79c218e2e9d464fade1810fd14347d8'
#str_primary_key = '3f0b8d53d71b401e840d58c90a283176'
### Type: Services
str_services_type = 'S'
### Annual frequency:
str_freq = 'A'
### Services classification:
str_services_class = 'EB10'
### Flow: Export
str_export_flow = 'X'
### Flow: Import
str_import_flow = 'M'
### Reporters list:
list_un_reporters = dict_codelist['reporterCode'].index.to_list()
### Partners list:
list_un_partners = dict_codelist['partnerCode'].index.to_list()
### Services classification codes:
list_un_services_ag2 = dict_codelist['clCode']['S_new'].index.to_list()
### Responded as invalide codes:
list_un_services_ag2.remove('3.10')
list_un_services_ag2.remove('3.11')
### Years to collect data:
list_periods = list(map(str, range(2010, date_end.year + 1)))
### Request tuning:
int_pause_short_sec = 10
int_pause_long_sec = 10
int_services_period_portion = 10

In [None]:
### NEW STYLE CLASSIFIED EB10 SERVICES ONLY DATA LOADING ENGINE

gc.collect()
### Checking of file status & loading last observation saved:
if (os.path.exists(str_path_unc_eb10_pm_services_annual)):
    ser_last_row = pd.read_hdf(str_path_unc_eb10_pm_services_annual, key = str_key_unc_res, start = -1)
    str_last_comm_id = ser_last_row.index[0][5]
    date_last_year = ser_last_row.index[0][0]
    bool_break_flag = True
    print('Last saved observation options:', date_last_year.year, '/', str_last_comm_id)
else:
    date_last_year = pd.to_datetime('1900-01-01')
    print(date_last_year.year)
    bool_break_flag = False
    
### Looping over period portions:
for iter_portion in range(-(-len(list_periods) // int_services_period_portion)):
    gc.collect()
    ### Selecting periods:
    list_iter_periods = list_periods[iter_portion *  int_services_period_portion : (iter_portion + 1) *  int_services_period_portion]
    if (int(list_iter_periods[-1]) < date_last_year.year):
        continue
    else:         
        ### Commodities data loading:
        for iter_comm_id in list_un_services_ag2:
            ### Starting point searching:
            if bool_break_flag:
                if (list_un_services_ag2.index(str_last_comm_id) < list_un_services_ag2.index(iter_comm_id)):
                    continue
                elif (list_un_services_ag2.index(str_last_comm_id) == list_un_services_ag2.index(iter_comm_id)):
                    bool_break_flag = False
                    continue
            ### Loading procedure:
            else:
                ### Container initialization:
                list_un_collection = []                
                ### Export Data Requests:                
                print(iter_comm_id, '/', list_iter_periods, '/ Export: Loading through the API')
                ### Export of Goods:
                df_iter_dataset = get_un_comtrade_data(str_services_type, str_freq, str_services_class, str_export_flow, list_un_reporters, list_un_partners, 
                                                       [iter_comm_id], list_iter_periods, str_primary_key)
                ### Adding dataset to container:
                if (df_iter_dataset is not None):
                    list_un_collection.append(df_iter_dataset)
                    time.sleep(int_pause_short_sec)
                else:
                    time.sleep(int_pause_long_sec) 
                ### Import Data Requests:
                print(iter_comm_id, '/', list_iter_periods, '/ Import: Loading through the API')
                ### Import of Goods:
                df_iter_dataset = get_un_comtrade_data(str_services_type, str_freq, str_services_class, str_import_flow, list_un_reporters, list_un_partners, 
                                                       [iter_comm_id], list_iter_periods, str_primary_key)
                ### Adding dataset to container:
                if (df_iter_dataset is not None):
                    list_un_collection.append(df_iter_dataset)
                    time.sleep(int_pause_short_sec)
                else:
                    time.sleep(int_pause_long_sec)             
                ### Downloaded Data concatenation and indexation:
                if (len(list_un_collection) > 0):
                    df_full_dataset = pd.concat(list_un_collection, axis = 0, sort = False, ignore_index = True)
                    ser_full_dataset = df_full_dataset.set_index(['Date', 'Reporter', 'Partner', 'Flow', 'Type', 'Commodity_ID']).squeeze().sort_index()
                    ### Dataset saving (need to be replaced with SQL Request):
                    ser_full_dataset.to_hdf(path_or_buf = str_path_unc_eb10_pm_services_annual, key = str_key_unc_res, mode = 'a', format = 'table', complevel = 9, 
                                            append = True)
                    print(iter_comm_id, '/', list_iter_periods, ': Flows saved to database')
                else:
                    print(iter_comm_id, '/', list_iter_periods, ': Both flow\'s datasets are empty')
#            break
#    break

In [40]:
### TEMP

ser_eb_am = pd.read_hdf(str_path_unc_eb_am_services_annual)
ser_eb_pm = pd.read_hdf(str_path_unc_eb_pm_services_annual)
ser_eb10_pm = pd.read_hdf(str_path_unc_eb10_pm_services_annual)
ser_eb_am.reset_index('Commodity_ID').replace(ser_ebops_2002.to_dict()).set_index('Commodity_ID', append = True).squeeze().sort_index()
ser_services = pd.concat([ser_eb_am, ser_eb_pm, ser_eb10_pm]).sort_index()
ser_services.to_hdf(path_or_buf = str_path_unc_res_all_annual, key = str_key_unc_res, mode = 'a', format = 'table', complevel = 9, append = True)

In [None]:
### SEVICES DATA RESEARCH : 1989 - 1999 TEST

gc.collect()

str_am_id = '206' # '247'
str_pm_id = '3.1' # '9.1'

list_iter_periods = list(map(str, range(1989, 2000)))
### Container initialization:
list_un_collection = []    

for iter_tup in [('EB', str_am_id), ('EB02', str_am_id), ('EB10', str_am_id), ('EB', str_pm_id), ('EB02', str_pm_id), ('EB10', str_pm_id)]:
    str_services_class = iter_tup[0]
    iter_comm_id = iter_tup[1]
            
    ### Export Data Requests:                
    print(str_services_class, '/', iter_comm_id, '/', list_iter_periods, '/ Export: Loading through the API')
    ### Export of Goods:
    df_iter_dataset = get_un_comtrade_data(str_services_type, str_freq, str_services_class, str_export_flow, list_un_reporters, list_un_partners, 
                                           [iter_comm_id], list_iter_periods, str_primary_key)
    ### Adding dataset to container:
    if (df_iter_dataset is not None):
        ser_iter_dataset = df_iter_dataset.set_index(['Date', 'Reporter', 'Partner'])['Value'].squeeze().sort_index()
        ser_iter_dataset = pd.concat({str_services_class : pd.concat({iter_comm_id : ser_iter_dataset}, names = ['ID'])}, names = ['Class'])
        list_un_collection.append(ser_iter_dataset)
    time.sleep(int_pause_long_sec)
#    break

In [13]:
### SEVICES DATA RESEARCH : 1989 - 1999 TEST

print(list_un_collection)

[]


In [None]:
### SEVICES DATA RESEARCH : 2000 - 2009 TEST

gc.collect()

str_am_id = '206' # '247'
str_pm_id = '3.1' # '9.1'

list_iter_periods = list(map(str, range(2000, 2010)))
### Container initialization:
list_un_collection = []    

for iter_tup in [('EB', str_am_id), ('EB02', str_am_id), ('EB10', str_am_id), ('EB', str_pm_id), ('EB02', str_pm_id), ('EB10', str_pm_id)]:
    str_services_class = iter_tup[0]
    iter_comm_id = iter_tup[1]
            
    ### Export Data Requests:                
    print(str_services_class, '/', iter_comm_id, '/', list_iter_periods, '/ Export: Loading through the API')
    ### Export of Goods:
    df_iter_dataset = get_un_comtrade_data(str_services_type, str_freq, str_services_class, str_export_flow, list_un_reporters, list_un_partners, 
                                           [iter_comm_id], list_iter_periods, str_primary_key)
    ### Adding dataset to container:
    if (df_iter_dataset is not None):
        ser_iter_dataset = df_iter_dataset.set_index(['Date', 'Reporter', 'Partner'])['Value'].squeeze().sort_index()
        ser_iter_dataset = pd.concat({str_services_class : pd.concat({iter_comm_id : ser_iter_dataset}, names = ['ID'])}, names = ['Class'])
        list_un_collection.append(ser_iter_dataset)
    time.sleep(int_pause_long_sec)
#    break

In [15]:
### SEVICES DATA RESEARCH : 2000 - 2009 TEST

ser_test_id = pd.concat(list_un_collection)#
display(ser_test_id.groupby(['Class', 'ID', 'Date']).apply(len).unstack('Date'))
#ser_test_id.unstack(['Class', 'ID']).dropna()

Unnamed: 0_level_0,Date,2000-12-29,2001-12-31,2002-12-31,2003-12-31,2004-12-31,2005-12-30,2006-12-29,2007-12-31,2008-12-31,2009-12-31
Class,ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
EB,206,84,108,226,251,438,524,602,568,594,550


In [None]:
### SEVICES DATA RESEARCH : 2010 - 2015 TEST

gc.collect()

str_am_id = '206' # '247'
str_pm_id = '3.1' # '9.1'

list_iter_periods = list(map(str, range(2010, 2016)))
### Container initialization:
list_un_collection = []    

for iter_tup in [('EB', str_am_id), ('EB02', str_am_id), ('EB10', str_am_id), ('EB', str_pm_id), ('EB02', str_pm_id), ('EB10', str_pm_id)]:
    str_services_class = iter_tup[0]
    iter_comm_id = iter_tup[1]
            
    ### Export Data Requests:                
    print(str_services_class, '/', iter_comm_id, '/', list_iter_periods, '/ Export: Loading through the API')
    ### Export of Goods:
    df_iter_dataset = get_un_comtrade_data(str_services_type, str_freq, str_services_class, str_export_flow, list_un_reporters, list_un_partners, 
                                           [iter_comm_id], list_iter_periods, str_primary_key)
    ### Adding dataset to container:
    if (df_iter_dataset is not None):
        ser_iter_dataset = df_iter_dataset.set_index(['Date', 'Reporter', 'Partner'])['Value'].squeeze().sort_index()
        ser_iter_dataset = pd.concat({str_services_class : pd.concat({iter_comm_id : ser_iter_dataset}, names = ['ID'])}, names = ['Class'])
        list_un_collection.append(ser_iter_dataset)
    time.sleep(int_pause_long_sec)
#    break

In [35]:
### SEVICES DATA RESEARCH : 2010 - 2015 TEST

ser_test_id = pd.concat(list_un_collection)
df_test_id = ser_test_id.unstack(['Class', 'ID'])
print('Number of values:')
display(ser_test_id.groupby(['Class', 'ID', 'Date']).apply(len).unstack('Date'))

df_test_id[(df_test_id[('EB', str_am_id)].isna() & df_test_id[('EB', str_pm_id)].isna() & df_test_id[('EB10', str_pm_id)].isna())]

Number of values:


Unnamed: 0_level_0,Date,2010-12-31,2011-12-30,2012-12-31,2013-12-31,2014-12-31,2015-12-31
Class,ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
EB,206.0,48.0,58.0,6.0,,,
EB,3.1,74.0,74.0,74.0,80.0,80.0,79.0
EB02,206.0,349.0,352.0,496.0,560.0,609.0,611.0
EB10,3.1,346.0,349.0,484.0,548.0,597.0,600.0


Unnamed: 0_level_0,Unnamed: 1_level_0,Class,EB,EB02,EB,EB10
Unnamed: 0_level_1,Unnamed: 1_level_1,ID,206,206,3.1,3.1
Date,Reporter,Partner,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2


In [None]:
### SEVICES DATA RESEARCH : 2016 - 2022 TEST

gc.collect()

str_am_id = '206' # '247'
str_pm_id = '3.1' # '9.1'

list_iter_periods = list(map(str, range(2016, 2023)))
### Container initialization:
list_un_collection = []    

for iter_tup in [('EB', str_am_id), ('EB02', str_am_id), ('EB10', str_am_id), ('EB', str_pm_id), ('EB02', str_pm_id), ('EB10', str_pm_id)]:
    str_services_class = iter_tup[0]
    iter_comm_id = iter_tup[1]
            
    ### Export Data Requests:                
    print(str_services_class, '/', iter_comm_id, '/', list_iter_periods, '/ Export: Loading through the API')
    ### Export of Goods:
    df_iter_dataset = get_un_comtrade_data(str_services_type, str_freq, str_services_class, str_export_flow, list_un_reporters, list_un_partners, 
                                           [iter_comm_id], list_iter_periods, str_primary_key)
    ### Adding dataset to container:
    if (df_iter_dataset is not None):
        ser_iter_dataset = df_iter_dataset.set_index(['Date', 'Reporter', 'Partner'])['Value'].squeeze().sort_index()
        ser_iter_dataset = pd.concat({str_services_class : pd.concat({iter_comm_id : ser_iter_dataset}, names = ['ID'])}, names = ['Class'])
        list_un_collection.append(ser_iter_dataset)
    time.sleep(int_pause_long_sec)
#    break

In [41]:
### SEVICES DATA RESEARCH : 2016 - 2022 TEST

ser_test_id = pd.concat(list_un_collection)
df_test_id = ser_test_id.unstack(['Class', 'ID'])
print('Number of values:')
display(ser_test_id.groupby(['Class', 'ID', 'Date']).apply(len).unstack('Date'))

df_test_id[(df_test_id[('EB', str_pm_id)].isna() & df_test_id[('EB10', str_pm_id)].isna())]

Number of values:


Unnamed: 0_level_0,Date,2016-12-30,2017-12-29,2018-12-31,2019-12-31,2020-12-31,2021-12-31
Class,ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
EB,3.1,81,194,194,307,312,314
EB02,206.0,610,742,721,618,633,896
EB10,3.1,599,618,605,506,527,582


Unnamed: 0_level_0,Unnamed: 1_level_0,Class,EB02,EB,EB10
Unnamed: 0_level_1,Unnamed: 1_level_1,ID,206,3.1,3.1
Date,Reporter,Partner,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2


In [15]:
### SEVICES DATA RESEARCH : 2016 - 2022 TEST

ser_test_id = pd.concat(list_un_collection)
df_test_id = ser_test_id.unstack(['Class', 'ID'])
print('Number of values:')
display(ser_test_id.groupby(['Class', 'ID', 'Date']).apply(len).unstack('Date'))
print('Number of unique EB02 | AM values:')
ser_test_unique = df_test_id.dropna(subset = [('EB02', '206')]).notna().sum(axis = 1)
print(len(ser_test_unique[ser_test_unique == 1]))
print(len(ser_test_unique[ser_test_unique == 1]) / len(df_test_id.dropna(subset = [('EB02', '206')])))
display(df_test_id.loc[ser_test_unique[ser_test_unique == 1].index])
print('Number of unique EB | PM values:')
ser_test_unique = df_test_id.dropna(subset = [('EB', '3.1')]).notna().sum(axis = 1)
print(len(ser_test_unique[ser_test_unique == 1]))
print(len(ser_test_unique[ser_test_unique == 1]) / len(df_test_id.dropna(subset = [('EB', '3.1')])))
display(df_test_id.loc[ser_test_unique[ser_test_unique == 1].index])
print('Number of unique EB10 | PM values:')
ser_test_unique = df_test_id.dropna(subset = [('EB10', '3.1')]).notna().sum(axis = 1)
print(len(ser_test_unique[ser_test_unique == 1]))
print(len(ser_test_unique[ser_test_unique == 1]) / len(df_test_id.dropna(subset = [('EB10', '3.1')])))
display(df_test_id.loc[ser_test_unique[ser_test_unique == 1].index])

Number of values:


Unnamed: 0_level_0,Date,2016-12-30,2017-12-29,2018-12-31,2019-12-31,2020-12-31,2021-12-31
Class,ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
EB,3.1,81,194,194,307,312,314
EB02,206.0,610,742,721,618,633,896
EB10,3.1,599,618,605,506,527,582


Number of unique EB02 | AM values:
0
0.0


Unnamed: 0_level_0,Unnamed: 1_level_0,Class,EB02,EB,EB10
Unnamed: 0_level_1,Unnamed: 1_level_1,ID,206,3.1,3.1
Date,Reporter,Partner,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2


Number of unique EB | PM values:
619
0.44151212553495006


Unnamed: 0_level_0,Unnamed: 1_level_0,Class,EB02,EB,EB10
Unnamed: 0_level_1,Unnamed: 1_level_1,ID,206,3.1,3.1
Date,Reporter,Partner,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2016-12-30,US,CA,,331000.0,
2016-12-30,US,CH,,1249000.0,
2016-12-30,US,CL,,73000.0,
2016-12-30,US,CN,,1200000.0,
2016-12-30,US,CY,,52000.0,
...,...,...,...,...,...
2020-12-31,UA,CY,,17911.0,
2020-12-31,UA,DE,,19434.0,
2020-12-31,UA,GB,,12018.0,
2020-12-31,UA,TR,,42551.0,


Number of unique EB10 | PM values:
0
0.0


Unnamed: 0_level_0,Unnamed: 1_level_0,Class,EB02,EB,EB10
Unnamed: 0_level_1,Unnamed: 1_level_1,ID,206,3.1,3.1
Date,Reporter,Partner,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2


In [18]:
### DATA CONCATENATION AND SAVING

#### Downloaded Data concatenation and indexation:
#df_full_dataset = pd.concat(list_un_collection, axis = 0, sort = False, ignore_index = True)
#del list_un_collection
#ser_full_dataset = df_full_dataset.set_index(['Date', 'Reporter', 'Partner', 'Flow', 'Type', 'Commodity_ID']).squeeze().sort_index()
#### Dataset saving (need to be replaced with SQL Request):
#ser_full_dataset.to_hdf(path_or_buf = str_path_unc_res_all_annual, key = str_key_unc_res, mode = 'w', format = 'table', complevel = 9)

In [None]:
### EXPORT AND REVERTED IMPORT CONCATENATION

gc.collect()
### File deleting (need to be replaced with SQL Request):
if (os.path.exists(str_path_unc_res_flows)):
    os.remove(str_path_unc_res_flows)
### Results container:
list_export_aug = []
### Countries portion length:
int_portion = 5
list_unc_countries = sorted(dict_codelist['partnerCode'].unique())
### Looping over countries portions:
for iter_num in range(len(list_unc_countries) // int_portion + 1):
    gc.collect()    
    ### Portion of countries selecting:
    list_iter_countries = list(list_unc_countries)[int_portion * iter_num : int_portion * (iter_num + 1)]
    if (len(list_iter_countries) > 0):
        print(list_iter_countries)
        ### Export data loading:
        ser_unc_export = pd.read_hdf(str_path_unc_res_all_annual, key = str_key_unc_res,
                                     where = "(Flow = 'Export') & (Reporter in list_iter_countries) & (Partner != 'World')").droplevel('Flow')
        print('Export dataset loaded')
        ### Import data loading:
        ser_unc_import = pd.read_hdf(str_path_unc_res_all_annual, key = str_key_unc_res, 
                                     where = "(Flow = 'Import') & (Partner in list_iter_countries)").droplevel('Flow')
        print('Import dataset loaded')    
        ### Import data reverting:
        ser_unc_import.index.set_names('Partner_Inv', level = 1, inplace = True)
        ser_unc_import.index.set_names('Reporter', level = 2, inplace = True)
        ser_unc_import.index.set_names('Partner', level = 1, inplace = True)
        ser_unc_import = ser_unc_import.swaplevel('Reporter', 'Partner').sort_index()
        print('Import dataset reverted')
        ### Datasets concatenation:
        df_export_aug = pd.concat([ser_unc_export, ser_unc_import], axis = 1, names = 'Source Flow', keys = ['Export', 'Import']).astype('float32')
        del ser_unc_export
        del ser_unc_import    
        gc.collect()    
        print('Export and reverted Import dataset concatenated')
        ### Aggregateddataset saving (need to be replaced with SQL Request):
        df_export_aug.to_hdf(str_path_unc_res_flows, key = str_key_unc_res, mode = 'a', format = 'table', complevel = 9, append = True)                
        print('Aggregated dataset added to database')

In [None]:
### CIF COEFFICIENTS CALCULATION & IMPLEMENTATION

gc.collect()
### Files deleting (need to be replaced with SQL Request):
if (os.path.exists(str_path_export_bilateral)):
    os.remove(str_path_export_bilateral)
if (os.path.exists(str_path_import_bilateral)):
    os.remove(str_path_import_bilateral)
### Getting full list of commodities:
str_date = '2020-12-31'
list_commodity_id = sorted(pd.read_hdf(str_path_unc_res_flows, key = str_key_unc_res, where = "Date in str_date").index.get_level_values('Commodity_ID').unique())
### Bounds to filter bilateral Import to Export ratio before median calculation:
flo_lower_bound = 1.0
flo_upper_bound = 2.0
### Bilateral median calculation procedure:
def get_obs_median(df_comm):
    ### Export to Import ratio:
    ser_obs_coeff = df_comm['Import'] / df_comm['Export']
    ### Ratio filtering:
    ser_obs_coeff = ser_obs_coeff.loc[(ser_obs_coeff >= flo_lower_bound) & (ser_obs_coeff <= flo_upper_bound)]
    ### Filtered timeseries median as a result:
    return ser_obs_coeff.median()
### Calulation CIF coefficient for each commodity:
for iter_commodity in list_commodity_id:
    gc.collect()
    ### Commodity flows loading (need to be replaced with SQL Request):
    df_iter_flows = pd.read_hdf(str_path_unc_res_flows, key = str_key_unc_res, where = "Commodity_ID = iter_commodity")
    ### Bilateral Commodity CIF Median calculation:
    ser_cif_median = df_iter_flows.droplevel('Commodity_ID').groupby(['Reporter', 'Partner']).apply(get_obs_median)
    ### General Commodity Median calculation:
    flo_median = ser_cif_median.median()
    print(iter_commodity, ':', flo_median)
    ### Filling missed bilateral values with general commodity median:
    if not (np.isnan(flo_median)):
        ser_cif_median.fillna(flo_median, inplace = True)        
    ser_cif_median.name = 'CIF_Coefficient'              
    ### Adding CIF coefficients to dataset:
    df_export_cif = df_iter_flows.merge(ser_cif_median, left_index = True, right_index = True)
    df_export_cif = df_export_cif.reorder_levels(['Date', 'Reporter', 'Partner', 'Type', 'Commodity_ID'])
    ### Import correction:
    df_export_cif['Import_Corrected'] = df_export_cif['Import'] / df_export_cif['CIF_Coefficient']
    ### Export correction:
    df_export_cif['Export_Corrected'] = df_export_cif['Export'] * df_export_cif['CIF_Coefficient']
    ### Combining Export & Import data:
    ser_export_cif = df_export_cif['Export'].combine_first(df_export_cif['Import_Corrected']).astype('float32')
    ser_import_cif = df_export_cif['Import'].combine_first(df_export_cif['Export_Corrected']).astype('float32')
    gc.collect()
    ### Import data reverting back to original order:
    ser_import_cif = ser_import_cif.reorder_levels(['Date', 'Partner', 'Reporter', 'Type', 'Commodity_ID']).sort_index()                               
    ser_import_cif.index.names = ['Date', 'Reporter', 'Partner', 'Type', 'Commodity_ID']
    gc.collect()
    ### Augmented flows saving (need to be replaced with SQL Request):
    ser_export_cif.squeeze().to_hdf(str_path_export_bilateral, key = str_key_unc_export, mode = 'a', format = 'table', complevel = 9, append = True, 
                                    min_itemsize = {'Type': 8, 'Commodity_ID': 3})
    ser_import_cif.squeeze().to_hdf(str_path_import_bilateral, key = str_key_unc_import, mode = 'a', format = 'table', complevel = 9, append = True, 
                                    min_itemsize = {'Type': 8, 'Commodity_ID': 3}) 