In [47]:
### IMF CDIS: BILATERAL EQUITY & DEBT INVESTMENT POSITIONS

In [48]:
### RUN EVERY TIME: INITIALIZATION

import numpy as np
import pandas as pd
pd.set_option('display.max_colwidth', -1) ### To display long strings
import math
import requests
import json ### To correct JSON structure before unpacking
import gc
import os
import datetime
import time
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as mticker
import seaborn as sns
#%load_ext line_profiler

In [49]:
### RUN EVERY TIME: VERSION CONTROL

from platform import python_version
print('pandas version: ', pd.__version__)
print('python version: ', python_version())

pandas version:  0.25.3
python version:  3.7.4


In [50]:
### RUN EVERY TIME: MAIN CONSTANTS

### MultiIndex level slice constant:
All = slice(None)
### Universe path:
str_path_universe = 'Data_Files/Source_Files/acadian_universe.xlsx'
### IMF CDIS datasets:
str_path_imf_cdis_dataset = 'Data_Files/Source_Files/cdis_assets.h5'
str_key_do_total_imf_cdis_dataset = 'cdis_total_outward_assets'
str_key_di_total_imf_cdis_dataset = 'cdis_total_inward_assets'
str_key_do_debt_imf_cdis_dataset = 'cdis_debt_outward_assets'
str_key_di_debt_imf_cdis_dataset = 'cdis_debt_inward_assets'
str_key_do_equity_imf_cdis_dataset = 'cdis_equity_outward_assets'
str_key_di_equity_imf_cdis_dataset = 'cdis_equity_inward_assets'
str_path_imf_cdis_augmented = 'Data_Files/Source_Files/cdis_augmented_assets.h5'
str_key_do_total_imf_cdis_augmented = 'cdis_total_outward_augmented_assets'
str_key_do_debt_imf_cdis_augmented = 'cdis_debt_outward_augmented_assets'
str_key_do_equity_imf_cdis_augmented = 'cdis_equity_outward_augmented_assets'
str_path_imf_cdis_options = 'Data_Files/Source_Files/cdis_options_assets.h5'
str_key_total_imf_cdis_options = 'cdis_total_outward_options_assets'
str_key_debt_imf_cdis_options = 'cdis_debt_outward_options_assets'
str_key_equity_imf_cdis_options = 'cdis_equity_outward_options_assets'
### OECD FDI datasets:
str_path_oecd_fdi_augmented = 'Data_Files/Source_Files/oecd_augmented_assets.h5'
str_key_do_total_oecd_fdi_augmented = 'fdi_total_outward_augmented_assets'
str_key_do_equity_oecd_fdi_augmented = 'fdi_equity_outward_augmented_assets'
str_path_oecd_fdi_options = 'Data_Files/Source_Files/oecd_options_assets.h5'
str_key_total_oecd_fdi_options = 'fdi_total_outward_options_assets'
str_key_equity_oecd_fdi_options = 'fdi_equity_outward_options_assets'
### Combined datasets:
str_path_direct_total_augmented = 'Data_Files/Source_Files/direct_total_augmented_assets.h5'
str_path_direct_equity_augmented = 'Data_Files/Source_Files/direct_equity_augmented_assets.h5'
str_key_direct_augmented = 'direct_augmented_assets'
str_path_total_direct_options = 'Data_Files/Source_Files/direct_total_options_assets.h5'
str_key_total_direct_options = 'direct_total_options_assets'
str_path_equity_direct_options = 'Data_Files/Source_Files/direct_equity_options_assets.h5'
str_key_equity_direct_options = 'direct_equity_options_assets'
### Technical Constants:
str_date_end = '2022-10-31'
date_start = pd.Timestamp('1989-12-29')
date_end = pd.Timestamp(str_date_end)
date_ison = pd.Timestamp('1994-12-31')

In [51]:
### DEFINING COUNTRY CODES EXTRACTOR

def get_country_codes(use_local_copy = False):  
    ### In case if URL is unavailable:
    if (use_local_copy):
        url_country_code = 'Data_Files/Source_Files/countrycode.html'
    ### Online extraction:
    else:
        url_country_code = 'https://countrycode.org/'
    df_full_codes = pd.read_html(url_country_code, index_col = 'COUNTRY')[0]
    df_full_codes[['ISO SHORT', 'ISO LONG']] = df_full_codes['ISO CODES'].str.split(' / ', expand = True)
    df_result = df_full_codes[['ISO SHORT', 'ISO LONG']].sort_index()    
    df_result.index = df_result.index.str.upper()
    ### Results output:
    return df_result

In [52]:
### DEFINING EXTRACTION UNIVERSE DATA FROM MS EXCEL SOURCE (TO BE IGNORED IN PRODUCT CODE)

def ison_membership_converting(str_path_universe, date_end, bool_daily = False, int_backfill_months = 0):
    ### Defining business-month-end reindexation on country level:
    def country_modify(ser_raw_country, date_end):
        ser_res_country = ser_raw_country.droplevel(0).resample('MS').last().resample('BM').last()
        range_country = pd.date_range(ser_res_country.index[0], date_end, freq = 'BM')
        return ser_res_country.reindex(range_country).ffill()
    ### Markets encoding table:
    dict_markets = {50 : 'DM', 57 : 'EM', 504 : 'FM', 0: np.NaN}     
    ### Loading source file:
    df_raw_universe = pd.read_excel(engine = 'openpyxl', io = str_path_universe, sheet_name = 'Switchers', header = 0, parse_dates = True, index_col = [0, 1],
                                 na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                             '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null'], keep_default_na = False)
    ### Converting source file:
    df_raw_universe.index.names = ['Country', 'Date']
    ser_raw_universe = df_raw_universe['Region']
    ser_raw_universe.fillna(0, inplace = True)
    ser_raw_universe.name = 'Market'
    ### By country reindexation and translation:
    ser_res_universe = ser_raw_universe.groupby('Country').apply(country_modify, date_end)
    ser_res_universe.index.names = ['Country', 'Date']
    ser_res_universe = ser_res_universe.replace(dict_markets).reorder_levels([1, 0]).sort_index() 
    ### Expanding membership for primary regions members by backfilling:
    if int_backfill_months:
        ### List of regions:
        list_region = list(ser_res_universe.dropna().unique())
        ### Initialising of collection of series with backfilled data for each region:
        list_ison_backfill = []
        ### Regions looping:
        for iter_region in list_region:
            ### Defining start of region date:
            date_first_valid = ser_res_universe.loc[ser_res_universe == iter_region].first_valid_index()[0]
            ### Creating dates index to backfilling:
            idx_date_backfill = pd.date_range(end = date_first_valid, periods = int_backfill_months + 1, freq = 'BM')[: -1]
            ### Creating primary countries index to backfilling:            
            idx_region_backfill = ser_res_universe.loc[ser_res_universe == iter_region].loc[date_first_valid, All].index.get_level_values('Country')
            ### Creating full index:
            idx_ison_backfill = pd.MultiIndex.from_product([idx_date_backfill, idx_region_backfill])
            ### Series with backfilled data:
            list_ison_backfill.append(pd.Series(iter_region, index = idx_ison_backfill))
        ### Combination of backfilled series and original ISON data:    
        ser_res_universe = ser_res_universe.combine_first(pd.concat(list_ison_backfill, axis = 0)).sort_index()  
        ser_res_universe.index.names = ['Date', 'Country']
    ### Converting to daily frequency:
    if bool_daily:
        ser_res_universe = ser_res_universe.reset_index('Country').groupby('Country').resample('B').ffill()['Market'].swaplevel().sort_index()    
    ### Results output:
    ser_res_universe.name = 'Market'
    return ser_res_universe

In [53]:
### RUN EVERY TIME: COMMON DATA EXTRACTION STEPS

### World Country Codes:
df_country_codes = get_country_codes()
### ISON membership history:
ser_ison_membership = ison_membership_converting(str_path_universe, pd.to_datetime(str_date_end))
### ISON LONG IDs list:
list_ison_long = list(df_country_codes.loc[df_country_codes['ISO SHORT'].isin(ser_ison_membership.index.get_level_values('Country').unique()), 'ISO LONG'].values)
### ISON current status:
ser_ison_status = ser_ison_membership.loc[str_date_end].droplevel('Date')
### ISON stats:
int_ison_number = len(list_ison_long)
list_regions = ['DM', 'EM', 'FM']
dict_ison_len = {}
dict_ison_len['Full Universe'] = int_ison_number
for iter_region in list_regions:
    dict_ison_len[iter_region] = len(ser_ison_status[ser_ison_status == iter_region])
ser_market_len = pd.Series(dict_ison_len)
ser_market_len.index.names = ['Market']    

In [54]:
### IMF CDIS: GENERAL DATA PREPARATION

### Constants:
All = slice(None)
dict_request_headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'}
str_imf_base_url = 'http://dataservices.imf.org/REST/SDMX_JSON.svc/'
str_imf_dataflow_add = 'DataFlow'
str_imf_datastructure_add = 'DataStructure/'
str_imf_codelist_add = 'CodeList/'
str_imf_dataset_add = 'CompactData/'
int_seconds_to_sleep = 1
int_imf_country_limit = 30

In [55]:
### IMF CDIS: REQUESTS SESSION INITIALIZING

request_session = requests.Session()
### For avoiding data request errors from IMF Data Service:
request_session.headers.update(dict_request_headers)

In [56]:
### IMF CDIS: DATAFLOW SEARCHING

obj_imf_dataflow_list = request_session.get(str_imf_base_url + str_imf_dataflow_add).json()
df_imf_dataflow = pd.DataFrame(obj_imf_dataflow_list['Structure']['Dataflows']['Dataflow'])
df_imf_dataflow = df_imf_dataflow.assign(Description = df_imf_dataflow['Name'].apply(pd.Series)['#text'].values)[['@id', 'Description']]
ser_imf_dataflow = df_imf_dataflow.set_index('@id', drop = True).squeeze()
### Searching DataFlow code for further requests:
str_imf_cdis_id = ser_imf_dataflow[ser_imf_dataflow.str.contains('CDIS')].index[0].replace('DS-', '')
print(str_imf_cdis_id)

CDIS


In [57]:
### IMF CDIS: DATASTRUCTURE SEARCHING

obj_imf_cdis_structure = request_session.get(str_imf_base_url + str_imf_datastructure_add + str_imf_cdis_id).json()
df_imf_cdis_params = pd.DataFrame(obj_imf_cdis_structure['Structure']['KeyFamilies']['KeyFamily']['Components']['Dimension'])\
                                [['@conceptRef', '@codelist', '@isFrequencyDimension']]
### Receiving DataFlow parameters and code lists for each of them:
print(df_imf_cdis_params)

        @conceptRef          @codelist @isFrequencyDimension
0  FREQ              CL_FREQ            true                
1  REF_AREA          CL_AREA_CDIS       NaN                 
2  INDICATOR         CL_INDICATOR_CDIS  NaN                 
3  COUNTERPART_AREA  CL_AREA_CDIS       NaN                 


In [58]:
### IMF CDIS: CODES DESCRIPTIONS LOADING

for int_counter, str_param_code in enumerate(df_imf_cdis_params['@codelist']):
    if (int_counter == 2):
        time.sleep(int_seconds_to_sleep)    
        obj_imf_cdis_param = request_session.get(str_imf_base_url + str_imf_codelist_add + str_param_code).json()
        df_imf_cdis_param =  pd.DataFrame(obj_imf_cdis_param['Structure']['CodeLists']['CodeList']['Code'])
        ### Receiving values for each code list:
        df_imf_cdis_param = df_imf_cdis_param.assign(Text = df_imf_cdis_param['Description'].apply(pd.Series)['#text'].values)[['@value', 'Text']]
        dict_indicator = dict(zip(df_imf_cdis_param['@value'], df_imf_cdis_param['Text']))
        
list_ison_countries = sorted(list(map(str, ser_ison_membership.index.get_level_values(1).unique())))
str_cdis_freq = 'A' # 'B' # 

In [None]:
### TEMP

dict_indicator

In [14]:
### TEMP

dict_to_download = {iter_key: dict_indicator[iter_key] for iter_key in dict_indicator \
                    if ((iter_key[1] == 'O') & iter_key.endswith('_USD') & (', Derived' not in dict_indicator[iter_key]))}
dict_to_download
#pd.Series(dict_indicator).to_excel('Data_Files/Test_Files/IMF_CDIS_Indicators.xlsx')

{'IOWDA_BP6_USD': 'Outward Debt Instruments Assets Positions (Gross), US Dollars',
 'IOWDL_BP6_USD': 'Outward Debt Instruments Liabilities Positions (Gross), US Dollars',
 'IOWD_BP6_USD': 'Outward Debt Instruments Positions (Net), US Dollars',
 'IOWDN_BP6_USD': 'Outward Debt Positions (Net): Resident Enterprises that are not Financial Intermediaries, US Dollars',
 'IOWDM_BP6_USD': 'Outward Debt Positions (Net): Resident Financial Intermediaries, US Dollars',
 'IOWFA_BP6_USD': 'Outward Direct Investment Assets Positions (Gross) with Fellow Enterprises, US Dollars',
 'IOWFL_BP6_USD': 'Outward Direct Investment Liabilities Positions (Gross) with Fellow Enterprises, US Dollars',
 'IOWF_BP6_USD': 'Outward Direct Investment Positions (Net) with Fellow Enterprises, US Dollars',
 'IOW_BP6_USD': 'Outward Direct Investment Positions, US Dollars',
 'IOWE_BP6_USD': 'Outward Equity Positions (Net), US Dollars'}

In [61]:
### IMF CDIS: REPORTED DIRECT INVESTMENT INDICATORS COMPARISION

gc.collect()
### Extracting needed part of indicators:
#dict_to_download = {iter_key: dict_indicator[iter_key] for iter_key in ('IOWDA_BP6_USD', 'IOWE_BP6_USD', 'IIWDL_BP6_USD', 'IIWE_BP6_USD')}
dict_to_download = {iter_key: dict_indicator[iter_key] for iter_key in dict_indicator \
                    if ((iter_key[1] == 'O') & iter_key.endswith('_USD') & (', Derived' not in dict_indicator[iter_key]))}
### List of bilateral dataframes for future concatenation:
list_cdis_bilateral = [] 
### Beggining of request URL:
str_cdis_const_url = str_imf_base_url + str_imf_dataset_add + str_imf_cdis_id + '/' 
### Looping over reporter:
#for iter_investor in list_ison_countries:
for iter_investor in ['GB', 'JP', 'US']:
    ### Looping over indicator:
    for iter_indicator in dict_to_download:        
#    for iter_indicator in ['IOWE_BP6_USD', 'IIWE_BP6_USD']:        
        if (iter_indicator[1] == 'O'):
            str_cdis_full_url = str_cdis_const_url + '.'.join([str_cdis_freq, iter_investor, iter_indicator, ''])
        else:
            str_cdis_full_url = str_cdis_const_url + '.'.join([str_cdis_freq, '', iter_indicator, iter_investor])
        obj_cdis_set = request_session.get(str_cdis_full_url)
        ### Data reading as JSON:
        dict_cdis_set = json.loads(obj_cdis_set.text.replace('@OBS_STATUS', '@OBS_VALUE'))
        ### Converting each bilateral dataset to dataframe and it's mungling:
        if ('Series' in dict_cdis_set['CompactData']['DataSet']):
            if isinstance(dict_cdis_set['CompactData']['DataSet']['Series'], list):
                list_series = dict_cdis_set['CompactData']['DataSet']['Series']
            else:
                list_series = [dict_cdis_set['CompactData']['DataSet']['Series']]
            for dict_cdis_pair in list_series:
                if isinstance(dict_cdis_pair['Obs'], list):
                    dict_bilateral = dict_cdis_pair['Obs']
                else:
                    dict_bilateral = [dict_cdis_pair['Obs']]
                df_cdis_bilateral = pd.DataFrame(dict_bilateral)
                if '@OBS_VALUE' in df_cdis_bilateral.columns:
                    df_cdis_bilateral = df_cdis_bilateral[['@TIME_PERIOD', '@OBS_VALUE']]
                    df_cdis_bilateral.columns = ['Date', 'Value']
                    df_cdis_bilateral = df_cdis_bilateral.assign(Indicator = dict_cdis_pair['@INDICATOR'])
                    df_cdis_bilateral = df_cdis_bilateral.assign(Reporter_ID = dict_cdis_pair['@REF_AREA'])
                    df_cdis_bilateral = df_cdis_bilateral.assign(Partner_ID = dict_cdis_pair['@COUNTERPART_AREA'])
                    list_cdis_bilateral.append(df_cdis_bilateral)  
        else:
            print('No data in response of the next request:\n', str_cdis_full_url)
        time.sleep(int_seconds_to_sleep)                    
#        break            
    print(iter_investor, ': loading completed')
#    break
### Bilateral datasets aggregating:
df_cdis_raw = pd.concat(list_cdis_bilateral, axis = 0, ignore_index = True, sort = False)
df_cdis_raw['Date'] = pd.to_datetime(df_cdis_raw['Date']) + pd.offsets.BYearEnd()
df_cdis_raw.loc[df_cdis_raw['Value'] == 'C', 'Value'] = np.NaN
df_cdis_raw.loc[df_cdis_raw['Value'] == '-', 'Value'] = np.NaN
df_cdis_raw = df_cdis_raw[df_cdis_raw['Reporter_ID'] != df_cdis_raw['Partner_ID']]
df_cdis_raw = df_cdis_raw[df_cdis_raw['Partner_ID'].isin(df_country_codes['ISO SHORT'].values)]
print('Unique partners number:', len(df_cdis_raw['Partner_ID'].unique()))
df_cdis_raw.rename({'Reporter_ID': 'Reporter', 'Partner_ID': 'Partner'}, axis = 1, inplace = True)
df_cdis_raw = df_cdis_raw.astype({'Indicator': 'str', 'Reporter': 'str', 'Partner': 'str', 
                                  'Value': 'float32'})
#df_cdis_raw['Value'].clip(lower = 0.0, inplace = True)
#df_cdis_raw['Indicator'].replace(dict_to_download, inplace = True)
#df_cdis_raw['Direction'] = df_cdis_raw['Indicator'].str.partition(' ')[0]
#df_cdis_raw['Type'] = df_cdis_raw['Indicator'].str.partition(' ')[2].str.partition(' ')[0].replace({'Direct': 'Total'})
#### Data saving:
#ser_cdis_parts = df_cdis_raw.set_index(['Type', 'Direction', 'Date', 'Reporter', 'Partner'])['Value'].sort_index()
#ser_cdis_total = ser_cdis_parts.groupby(['Direction', 'Date', 'Reporter', 'Partner']).sum()
#ser_cdis_total = pd.concat({'Total': ser_cdis_total}, names = ['Type'])
#ser_cdis_full = pd.concat([ser_cdis_parts, ser_cdis_total]).sort_index()
#del df_cdis_raw

No data in response of the next request:
 http://dataservices.imf.org/REST/SDMX_JSON.svc/CompactData/CDIS/A.GB.IOWDN_BP6_USD.
No data in response of the next request:
 http://dataservices.imf.org/REST/SDMX_JSON.svc/CompactData/CDIS/A.GB.IOWDM_BP6_USD.
GB : loading completed
No data in response of the next request:
 http://dataservices.imf.org/REST/SDMX_JSON.svc/CompactData/CDIS/A.JP.IOWDN_BP6_USD.
No data in response of the next request:
 http://dataservices.imf.org/REST/SDMX_JSON.svc/CompactData/CDIS/A.JP.IOWDM_BP6_USD.
No data in response of the next request:
 http://dataservices.imf.org/REST/SDMX_JSON.svc/CompactData/CDIS/A.JP.IOWFA_BP6_USD.
No data in response of the next request:
 http://dataservices.imf.org/REST/SDMX_JSON.svc/CompactData/CDIS/A.JP.IOWFL_BP6_USD.
No data in response of the next request:
 http://dataservices.imf.org/REST/SDMX_JSON.svc/CompactData/CDIS/A.JP.IOWF_BP6_USD.
JP : loading completed
No data in response of the next request:
 http://dataservices.imf.org/RES

In [63]:
### IMF CDIS: INDICATORS TEST

df_cdis_raw['Indicator'] = df_cdis_raw['Indicator'].replace(dict_indicator)
ser_cdis_raw = df_cdis_raw.set_index(['Indicator', 'Date', 'Reporter', 'Partner']).squeeze()
ser_cdis_raw[:, '2020-12-31', 'US', 'JP']

Indicator
Outward Debt Instruments Assets Positions (Gross), US Dollars                                           10493.0 
Outward Debt Instruments Liabilities Positions (Gross), US Dollars                                      42040.0 
Outward Debt Instruments Positions (Net), US Dollars                                                   -31547.0 
Outward Debt Positions (Net): Resident Enterprises that are not Financial Intermediaries, US Dollars   NaN      
Outward Debt Positions (Net): Resident Financial Intermediaries, US Dollars                            NaN      
Outward Direct Investment Positions, US Dollars                                                         121809.0
Outward Equity Positions (Net), US Dollars                                                              153356.0
Name: Value, dtype: float32

In [64]:
### IMF CDIS: INDICATORS TEST: OECD FDI LOADING

ser_total_oecd_assets = pd.read_hdf(path_or_buf = str_path_oecd_fdi_options, key = str_key_total_oecd_fdi_options)['Assets_Only']
ser_equity_oecd_assets = pd.read_hdf(path_or_buf = str_path_oecd_fdi_options, key = str_key_equity_oecd_fdi_options)['Assets_Only']
display(ser_total_oecd_assets['2020-12-31', 'GB', 'US'])
display(ser_equity_oecd_assets['2020-12-31', 'GB', 'US'])

633428.1

562428.06

In [None]:
### IMF CDIS: REPORTED DIRECT INVESTMENT NET EQUITY RESEARCH

gc.collect()
### Extracting needed part of indicators:
dict_to_download = {iter_key: dict_indicator[iter_key] for iter_key in ('IOWDA_BP6_USD', 'IOWE_BP6_USD')}
### List of bilateral dataframes for future concatenation:
list_cdis_bilateral = [] 
### Beggining of request URL:
str_cdis_const_url = str_imf_base_url + str_imf_dataset_add + str_imf_cdis_id + '/' 
### Looping over reporter:
for iter_investor in list_ison_countries:
#for iter_investor in ['GB']:
    ### Looping over indicator:
    for iter_indicator in dict_to_download:        
#    for iter_indicator in ['IOWE_BP6_USD', 'IIWE_BP6_USD']:        
        if (iter_indicator[1] == 'O'):
            str_cdis_full_url = str_cdis_const_url + '.'.join([str_cdis_freq, iter_investor, iter_indicator, ''])
        else:
            str_cdis_full_url = str_cdis_const_url + '.'.join([str_cdis_freq, '', iter_indicator, iter_investor])
        obj_cdis_set = request_session.get(str_cdis_full_url)
        ### Data reading as JSON:
        dict_cdis_set = json.loads(obj_cdis_set.text.replace('@OBS_STATUS', '@OBS_VALUE'))
        ### Converting each bilateral dataset to dataframe and it's mungling:
        if ('Series' in dict_cdis_set['CompactData']['DataSet']):
            if isinstance(dict_cdis_set['CompactData']['DataSet']['Series'], list):
                list_series = dict_cdis_set['CompactData']['DataSet']['Series']
            else:
                list_series = [dict_cdis_set['CompactData']['DataSet']['Series']]
            for dict_cdis_pair in list_series:
                if isinstance(dict_cdis_pair['Obs'], list):
                    dict_bilateral = dict_cdis_pair['Obs']
                else:
                    dict_bilateral = [dict_cdis_pair['Obs']]
                df_cdis_bilateral = pd.DataFrame(dict_bilateral)
                if '@OBS_VALUE' in df_cdis_bilateral.columns:
                    df_cdis_bilateral = df_cdis_bilateral[['@TIME_PERIOD', '@OBS_VALUE']]
                    df_cdis_bilateral.columns = ['Date', 'Value']
                    df_cdis_bilateral = df_cdis_bilateral.assign(Indicator = dict_cdis_pair['@INDICATOR'])
                    df_cdis_bilateral = df_cdis_bilateral.assign(Reporter_ID = dict_cdis_pair['@REF_AREA'])
                    df_cdis_bilateral = df_cdis_bilateral.assign(Partner_ID = dict_cdis_pair['@COUNTERPART_AREA'])
                    list_cdis_bilateral.append(df_cdis_bilateral)  
        else:
            print('No data in response of the next request:\n', str_cdis_full_url)
        time.sleep(int_seconds_to_sleep)                    
#        break            
    print(iter_investor, ': loading completed')
#    break
### Bilateral datasets aggregating:
df_cdis_raw = pd.concat(list_cdis_bilateral, axis = 0, ignore_index = True, sort = False)
df_cdis_raw['Date'] = pd.to_datetime(df_cdis_raw['Date']) + pd.offsets.BYearEnd()
df_cdis_raw.loc[df_cdis_raw['Value'] == 'C', 'Value'] = np.NaN
df_cdis_raw.loc[df_cdis_raw['Value'] == '-', 'Value'] = np.NaN
df_cdis_raw = df_cdis_raw[df_cdis_raw['Reporter_ID'] != df_cdis_raw['Partner_ID']]
df_cdis_raw = df_cdis_raw[df_cdis_raw['Partner_ID'].isin(df_country_codes['ISO SHORT'].values)]
print('Unique partners number:', len(df_cdis_raw['Partner_ID'].unique()))
df_cdis_raw.rename({'Reporter_ID': 'Reporter', 'Partner_ID': 'Partner'}, axis = 1, inplace = True)
df_cdis_raw = df_cdis_raw.astype({'Indicator': 'str', 'Reporter': 'str', 'Partner': 'str', 
                                  'Value': 'float32'})
df_cdis_raw['Value'].clip(lower = 0.0, inplace = True)
df_cdis_raw['Indicator'].replace(dict_to_download, inplace = True)
df_cdis_raw['Direction'] = df_cdis_raw['Indicator'].str.partition(' ')[0]
df_cdis_raw['Type'] = df_cdis_raw['Indicator'].str.partition(' ')[2].str.partition(' ')[0].replace({'Direct': 'Total'})
### Data saving:
ser_cdis_parts = df_cdis_raw.set_index(['Type', 'Direction', 'Date', 'Reporter', 'Partner'])['Value'].sort_index()
ser_cdis_total = ser_cdis_parts.groupby(['Direction', 'Date', 'Reporter', 'Partner']).sum()
ser_cdis_total = pd.concat({'Total': ser_cdis_total}, names = ['Type'])
ser_cdis_full = pd.concat([ser_cdis_parts, ser_cdis_total]).sort_index()
del df_cdis_raw

In [31]:
### IMF CDIS: INDICATORS TEST

ser_cdis_equity = ser_cdis_full['Equity'].droplevel('Direction')
ser_cdis_equity.name = 'IMF_Net'
ser_oecd_equity = pd.read_hdf(path_or_buf = str_path_oecd_fdi_options, key = str_key_equity_oecd_fdi_options)['Assets_Only']
ser_oecd_equity.name = 'OECD_Asset'
df_equity_compare = pd.concat([ser_cdis_equity, ser_oecd_equity], axis = 1).replace({0.0: np.NaN}).dropna().sort_index()

In [46]:
### TEMP

df_equity_compare['Ratio'] = (df_equity_compare['IMF_Net'] + df_equity_compare['OECD_Asset']).abs() / df_equity_compare['OECD_Asset']
#display(df_equity_compare.loc[df_equity_compare['Ratio'].idxmax()])
display(df_equity_compare.loc[df_equity_compare['Ratio'].nlargest(5).index])
display(df_equity_compare.loc[df_equity_compare['Ratio'].nsmallest(5).index])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,IMF_Net,OECD_Asset,Ratio
Date,Reporter,Partner,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2013-12-31,NO,PK,2165.953857,1.81,1197.659668
2019-12-31,TR,MG,13.0,0.05,261.0
2019-12-31,TR,MD,1765.0,16.49,108.034569
2019-12-31,TR,MK,17504.0,210.889999,84.000618
2020-12-31,TR,MD,1956.0,24.200001,81.826439


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,IMF_Net,OECD_Asset,Ratio
Date,Reporter,Partner,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2013-12-31,KR,,1.2e-05,0.68,1.000018
2013-12-31,KR,YE,0.396095,302.040009,1.001311
2019-12-31,TR,NL,57.0,17504.220703,1.003256
2021-12-31,TR,NL,67.0,20308.279297,1.003299
2020-12-31,TR,NL,151.0,19285.730469,1.00783


In [45]:
### IMF CDIS: INDICATORS TEST

df_equity_sum = df_equity_compare.groupby(['Reporter', 'Partner']).sum()
df_equity_sum['Ratio'] = (df_equity_sum['IMF_Net'] + df_equity_sum['OECD_Asset']).abs() / df_equity_sum['OECD_Asset']
#display(df_equity_sum.loc[df_equity_sum['Ratio'].idxmax()])
display(df_equity_sum.groupby('Reporter')['Ratio'].median())

Reporter
BE    1.973411
CA    1.995688
CH    2.000035
CR    2.000005
CZ    1.996521
DE    1.998893
DK    1.990427
EE    1.993750
ES    1.993533
FI    1.995604
FR    1.993535
GB    1.987922
GR    1.993202
HU    1.990750
IE    1.992307
IS    1.997585
IT    1.994282
KR    2.000018
LT    1.895999
LV    1.992681
NL    1.994210
NO    1.996090
PL    1.991884
SE    1.993668
SI    1.994673
SK    1.994451
TR    2.000109
US    2.000000
Name: Ratio, dtype: float32