In [7]:
### OECD FDI: FOREIGN DIRECT INVESTMENT

In [8]:
### RUN EVERY TIME: INITIALIZATION

import numpy as np
import pandas as pd
pd.set_option('display.max_colwidth', -1) ### To display long strings
import math
import requests
import json ### To correct JSON structure before unpacking
import xml.etree.ElementTree as et
import gc
import os
import datetime
import time
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as mticker
import seaborn as sns
#%load_ext line_profiler

In [9]:
### RUN EVERY TIME: VERSION CONTROL

from platform import python_version
print('pandas version: ', pd.__version__)
print('python version: ', python_version())

pandas version:  0.25.3
python version:  3.7.4


In [10]:
### RUN EVERY TIME: MAIN CONSTANTS

### MultiIndex level slice constant:
All = slice(None)
### Universe path:
str_path_universe = 'Data_Files/Source_Files/acadian_universe.xlsx'
### OECD FDI datasets:
str_path_oecd_fdi_full = 'Data_Files/Source_Files/oecd_full.h5'
str_key_oecd_fdi_full = 'fdi_full'

str_path_oecd_fdi_dataset = 'Data_Files/Source_Files/oecd_assets.h5'
str_key_do_total_oecd_fdi_dataset = 'fdi_total_outward_assets'
str_key_di_total_oecd_fdi_dataset = 'fdi_total_inward_assets'
str_key_do_equity_oecd_fdi_dataset = 'fdi_equity_outward_assets'
str_key_di_equity_oecd_fdi_dataset = 'fdi_equity_inward_assets'
str_path_oecd_fdi_augmented = 'Data_Files/Source_Files/oecd_augmented_assets.h5'
str_key_do_total_oecd_fdi_augmented = 'fdi_total_outward_augmented_assets'
str_key_do_equity_oecd_fdi_augmented = 'fdi_equity_outward_augmented_assets'
str_path_oecd_fdi_options = 'Data_Files/Source_Files/oecd_options_assets.h5'
str_path_oecd_fdi_total_options = 'Data_Files/Source_Files/oecd_total_assets.h5'
str_path_oecd_fdi_equity_options = 'Data_Files/Source_Files/oecd_equity_assets.h5'
str_key_total_oecd_fdi_options = 'fdi_total_outward_options_assets'
str_key_equity_oecd_fdi_options = 'fdi_equity_outward_options_assets'

### Technical Constants:
str_date_end = '2022-10-31'
date_start = pd.Timestamp('1989-12-29')
date_end = pd.Timestamp(str_date_end)
date_ison = pd.Timestamp('1994-12-31')

In [11]:
### DEFINING COUNTRY CODES EXTRACTOR

def get_country_codes(use_local_copy = False):  
    ### In case if URL is unavailable:
    if (use_local_copy):
        url_country_code = 'Data_Files/Source_Files/countrycode.html'
    ### Online extraction:
    else:
        url_country_code = 'https://countrycode.org/'
    df_full_codes = pd.read_html(url_country_code, index_col = 'COUNTRY')[0]
    df_full_codes[['ISO SHORT', 'ISO LONG']] = df_full_codes['ISO CODES'].str.split(' / ', expand = True)
    df_result = df_full_codes[['ISO SHORT', 'ISO LONG']].sort_index()    
    df_result.index = df_result.index.str.upper()
    ### Results output:
    return df_result

In [12]:
### DEFINING EXTRACTION UNIVERSE DATA FROM MS EXCEL SOURCE (TO BE IGNORED IN PRODUCT CODE)

def ison_membership_converting(str_path_universe, date_end, bool_daily = False, int_backfill_months = 0):
    ### Defining business-month-end reindexation on country level:
    def country_modify(ser_raw_country, date_end):
        ser_res_country = ser_raw_country.droplevel(0).resample('MS').last().resample('BM').last()
        range_country = pd.date_range(ser_res_country.index[0], date_end, freq = 'BM')
        return ser_res_country.reindex(range_country).ffill()
    ### Markets encoding table:
    dict_markets = {50 : 'DM', 57 : 'EM', 504 : 'FM', 0: np.NaN}     
    ### Loading source file:
    df_raw_universe = pd.read_excel(engine = 'openpyxl', io = str_path_universe, sheet_name = 'Switchers', header = 0, parse_dates = True, index_col = [0, 1],
                                 na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                             '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null'], keep_default_na = False)
    ### Converting source file:
    df_raw_universe.index.names = ['Country', 'Date']
    ser_raw_universe = df_raw_universe['Region']
    ser_raw_universe.fillna(0, inplace = True)
    ser_raw_universe.name = 'Market'
    ### By country reindexation and translation:
    ser_res_universe = ser_raw_universe.groupby('Country').apply(country_modify, date_end)
    ser_res_universe.index.names = ['Country', 'Date']
    ser_res_universe = ser_res_universe.replace(dict_markets).reorder_levels([1, 0]).sort_index() 
    ### Expanding membership for primary regions members by backfilling:
    if int_backfill_months:
        ### List of regions:
        list_region = list(ser_res_universe.dropna().unique())
        ### Initialising of collection of series with backfilled data for each region:
        list_ison_backfill = []
        ### Regions looping:
        for iter_region in list_region:
            ### Defining start of region date:
            date_first_valid = ser_res_universe.loc[ser_res_universe == iter_region].first_valid_index()[0]
            ### Creating dates index to backfilling:
            idx_date_backfill = pd.date_range(end = date_first_valid, periods = int_backfill_months + 1, freq = 'BM')[: -1]
            ### Creating primary countries index to backfilling:            
            idx_region_backfill = ser_res_universe.loc[ser_res_universe == iter_region].loc[date_first_valid, All].index.get_level_values('Country')
            ### Creating full index:
            idx_ison_backfill = pd.MultiIndex.from_product([idx_date_backfill, idx_region_backfill])
            ### Series with backfilled data:
            list_ison_backfill.append(pd.Series(iter_region, index = idx_ison_backfill))
        ### Combination of backfilled series and original ISON data:    
        ser_res_universe = ser_res_universe.combine_first(pd.concat(list_ison_backfill, axis = 0)).sort_index()  
        ser_res_universe.index.names = ['Date', 'Country']
    ### Converting to daily frequency:
    if bool_daily:
        ser_res_universe = ser_res_universe.reset_index('Country').groupby('Country').resample('B').ffill()['Market'].swaplevel().sort_index()    
    ### Results output:
    ser_res_universe.name = 'Market'
    return ser_res_universe

In [13]:
### RUN EVERY TIME: COMMON DATA EXTRACTION STEPS

### World Country Codes:
df_country_codes = get_country_codes()
### ISON membership history:
ser_ison_membership = ison_membership_converting(str_path_universe, pd.to_datetime(str_date_end))
### ISON LONG IDs list:
list_ison_long = list(df_country_codes.loc[df_country_codes['ISO SHORT'].isin(ser_ison_membership.index.get_level_values('Country').unique()), 'ISO LONG'].values)
### ISON current status:
ser_ison_status = ser_ison_membership.loc[str_date_end].droplevel('Date')
### ISON stats:
int_ison_number = len(list_ison_long)
list_regions = ['DM', 'EM', 'FM']
dict_ison_len = {}
dict_ison_len['Full Universe'] = int_ison_number
for iter_region in list_regions:
    dict_ison_len[iter_region] = len(ser_ison_status[ser_ison_status == iter_region])
ser_market_len = pd.Series(dict_ison_len)
ser_market_len.index.names = ['Market']    

In [28]:
### OECD FDI: GENERAL DATA PREPARATION

### Constants:
All = slice(None)
str_oecd_base_url = 'https://stats.oecd.org/sdmx-json/data/'
str_oecd_structure_url = 'https://stats.oecd.org/restsdmx/sdmx.ashx/GetDataStructure/'
str_fdi_pos_dataset_add = 'FDI_POS_CTRY'

In [29]:
### OECD FDI: REQUESTS SESSION INITIALIZING

request_session = requests.Session()
### For avoiding data request errors:
dict_header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'}
request_session.headers.update(dict_header)

In [30]:
### OECD FDI: FDI POSITION STRUCTURE REQUEST

obj_oecd_structure = request_session.get(str_oecd_structure_url + str_fdi_pos_dataset_add)
xml_tree_root = et.fromstring(obj_oecd_structure.content)
dict_concepts = {}
dict_dimensions = {}
dict_codelists = {}
for xml_tree_child in xml_tree_root:
    if xml_tree_child.tag.endswith('Concepts'):
        for xml_tree_grand in xml_tree_child:
            str_concept_id = xml_tree_grand.attrib['id']
            str_concept_name = xml_tree_grand[0].text
            dict_concepts[str_concept_id] = str_concept_name
    if xml_tree_child.tag.endswith('KeyFamilies'):
        for xml_tree_family in xml_tree_child:
            for xml_tree_component in xml_tree_family:
                if xml_tree_component.tag.endswith('Components'):
                    for xml_tree_measure in xml_tree_component:
                        if xml_tree_measure.tag.endswith('Dimension'):
                            str_concept_id = xml_tree_measure.attrib['conceptRef']
                            str_concept_cl_id = xml_tree_measure.attrib['codelist']
                            dict_dimensions[str_concept_id] = str_concept_cl_id
    if xml_tree_child.tag.endswith('CodeLists'):       
        for num_tree_grand, xml_tree_grand in enumerate(xml_tree_child):
            str_codelist_id = xml_tree_grand.attrib['id']
            dict_codelist = {}
            for xml_tree_codelist in xml_tree_grand:                
                if xml_tree_codelist.tag.endswith('Code'):
                    str_code_id = xml_tree_codelist.attrib['value']
                    str_code_value = xml_tree_codelist[0].text
                    dict_codelist[str_code_id] = str_code_value
            dict_codelists[str_codelist_id] = dict_codelist

In [31]:
### OECD FDI: DIMENSIONS

pd.concat([pd.Series(dict_concepts), pd.Series(dict_dimensions)], axis = 1, sort = False).dropna()

Unnamed: 0,0,1
COU,Reporting country,CL_FDI_POS_CTRY_COU
MEASURE,Currency,CL_FDI_POS_CTRY_MEASURE
MEASURE_PRINCIPLE,Measurement principle,CL_FDI_POS_CTRY_MEASURE_PRINCIPLE
FDI_TYPE,Type of FDI,CL_FDI_POS_CTRY_FDI_TYPE
TYPE_ENTITY,Type of entity,CL_FDI_POS_CTRY_TYPE_ENTITY
ACCOUNTING_ENTRY,Accounting entry,CL_FDI_POS_CTRY_ACCOUNTING_ENTRY
LEVEL_COUNTERPART,Level of counterpart,CL_FDI_POS_CTRY_LEVEL_COUNTERPART
COUNTERPART_AREA,Partner country/territory,CL_FDI_POS_CTRY_COUNTERPART_AREA
TIME,Year,CL_FDI_POS_CTRY_TIME


In [32]:
### OECD FDI: FDI POSITION CONCEPT SOURCE CODELISTS:

dict_codelists['CL_FDI_POS_CTRY_FDI_TYPE']

{'LE_FA_F': 'FDI positions -Total',
 'LE_FA_F5': 'FDI positions - Equity (including reinvestment of earnings)',
 'LE_FA_FL': 'FDI positions - Debt'}

In [33]:
### OECD FDI: FDI TOTAL POSITION PARAMETERS PREPARATION:

### Currency:
str_measure = 'USD'
### Direction:
str_direction = '+'.join(['DI', 'DO']) # 'DO' # 
### Investment type:
str_fdi_type = '+'.join(['LE_FA_F', 'LE_FA_F5']) # 'LE_FA_F' # 
### Residence defining:
str_residence = 'ALL'
### Accounting way:
str_accounting =   '+'.join(['A', 'NET', 'L']) # '+'.join(['A', 'L']) # 'NET' # 
### Level counterpart(???):
str_counterpart = 'IMC'

In [34]:
### OECD FDI: FDI POSITION PARAMETERS PREPARATION:

### ISON Countries collecting:
df_ison_countries = df_country_codes.set_index('ISO SHORT', append = True).reset_index('COUNTRY', drop = True)
df_ison_countries = df_ison_countries.reindex(ser_ison_membership.index.get_level_values(1).unique().to_list())
ser_ison_countries = df_ison_countries.reset_index().set_index('ISO LONG').squeeze()
### OECD reporters vs ISON members:
ser_oecd_reporters = pd.Series(dict_codelists['CL_FDI_POS_CTRY_COU'])
ser_oecd_reporters = ser_oecd_reporters.to_frame().join(ser_ison_countries).drop(0, axis = 1).squeeze()
for iter_iso_long in (ser_oecd_reporters[ser_oecd_reporters.isna()].index.get_level_values(0)):
    if iter_iso_long in ser_ison_countries.index:
        print('OECD Reporter country with no ISON match:', iter_iso_long)
### ISON countries with no OECD reporter match:
set_no_reporters = set(ser_ison_countries.dropna().index) - set(ser_oecd_reporters.index)
print('ISON countries with no OECD reporter match:', sorted(list(set_no_reporters)), '(', len(set_no_reporters), ')')           
### OECD partners vs ISON members:
ser_oecd_partners = pd.Series(dict_codelists['CL_FDI_POS_CTRY_COUNTERPART_AREA'])
ser_oecd_partners = ser_oecd_partners.to_frame().join(ser_ison_countries).drop(0, axis = 1).squeeze()
for iter_iso_long in (ser_oecd_partners[ser_oecd_partners.isna()].index.get_level_values(0)):
    if iter_iso_long in ser_ison_countries.index:
        print('OECD Partner country with no ISON match:', iter_iso_long)
### ISON countries with no OECD partner match:
set_no_partners = set(ser_ison_countries.dropna().index) - set(ser_oecd_partners.index)
print('ISON countries with no OECD partner match:', sorted(list(set_no_partners)), '(', len(set_no_partners), ')')

ISON countries with no OECD reporter match: ['ARE', 'ARG', 'BGD', 'BGR', 'BHR', 'BRA', 'BWA', 'CHN', 'CIV', 'CYP', 'ECU', 'EGY', 'GHA', 'HKG', 'HRV', 'IDN', 'IND', 'JOR', 'KAZ', 'KEN', 'KWT', 'LBN', 'LKA', 'MAR', 'MLT', 'MUS', 'MYS', 'NAM', 'NGA', 'OMN', 'PAK', 'PAN', 'PER', 'PHL', 'QAT', 'ROU', 'RUS', 'SAU', 'SGP', 'SRB', 'THA', 'TUN', 'TWN', 'UGA', 'UKR', 'VNM', 'ZAF', 'ZMB'] ( 48 )
ISON countries with no OECD partner match: ['ROU'] ( 1 )


In [35]:
### OECD FDI: FDI POSITION REQUEST CONSTRUCTING

str_fdi_pos_request_params = '.'.join(['', str_measure, str_direction, str_fdi_type, str_residence, str_accounting, str_counterpart, ''])
str_fdi_pos_request = str_oecd_base_url + str_fdi_pos_dataset_add + '/' + str_fdi_pos_request_params + '/all?startTime=' + str(date_start.year) + \
                      '&endTime=' + str(date_end.year) + '&detail=DataOnly'
obj_fdi_pos_dataset = request_session.get(str_fdi_pos_request).json()

In [36]:
### OECD FDI: FDI POSITION INDEX DATA COLLECTING:

### Dates:
list_idx_dates = []
for tup_date in obj_fdi_pos_dataset['structure']['dimensions']['observation'][0]['values']:
    list_idx_dates.append(pd.to_datetime(tup_date['id']) + pd.offsets.BYearEnd())
### Parameters:    
list_idx_library = []
for iter_position in obj_fdi_pos_dataset['structure']['dimensions']['series']:
    list_param_values = []
    for tup_parameter in iter_position['values']:
        list_param_values.append(tup_parameter['id'])            
    list_idx_library.append(list_param_values)
### Result:
list_idx_library.append(list_idx_dates)
### Converting to dictionary for future replacing:
list_idx_dict = []
for iter_list in list_idx_library:
    list_idx_dict.append(dict(zip(map(str, range(len(iter_list))), iter_list)))

In [37]:
### OECD FDI: FDI POSITION DATASET RESAMPLING

dict_datasets_res = {}
dict_datasets_source = obj_fdi_pos_dataset['dataSets'][0]['series']
### Parameters and date indexes integration:
for iter_dataset in dict_datasets_source:
    dict_observations = dict_datasets_source[iter_dataset]['observations']
    for iter_observation in dict_observations:
        str_iter_idx = iter_dataset + ':' + iter_observation
        flo_iter_value = dict_observations[iter_observation][0]
        dict_datasets_res[str_iter_idx] = flo_iter_value

In [38]:
### OECD FDI: FDI POSITION DATASET REINDEXATION

gc.collect()
df_fdi_pos_data = pd.Series(dict_datasets_res)
df_fdi_pos_data.index = pd.MultiIndex.from_arrays(zip(*df_fdi_pos_data.index.str.split(':')))
int_levels_number = df_fdi_pos_data.index.nlevels
df_fdi_pos_data = df_fdi_pos_data.reset_index()
### Replacing numbers with parameter values:
for iter_level in range(int_levels_number):
    df_fdi_pos_data['level_' + str(iter_level)].replace(list_idx_dict[iter_level], inplace = True)
    ### Replacing long ISO names with short ISO names:
    if (iter_level == 0):
        df_fdi_pos_data['level_' + str(iter_level)].replace(dict(zip(df_country_codes['ISO LONG'].values, df_country_codes['ISO SHORT'].values)), inplace = True)
    elif (iter_level == 7):
        df_fdi_pos_data['level_' + str(iter_level)].replace(dict(zip(df_country_codes['ISO LONG'].values, df_country_codes['ISO SHORT'].values)), inplace = True)
    ### Directions renaming:
    elif (iter_level == 2):
        df_fdi_pos_data['level_' + str(iter_level)].replace({'DI': 'Inward', 'DO': 'Outward'}, inplace = True)
    ### Investment types renaming:
    elif (iter_level == 3):
        df_fdi_pos_data['level_' + str(iter_level)].replace({'LE_FA_F': 'Total', 'LE_FA_F5': 'Equity'}, inplace = True)         
    ### Flow types renaming:
    elif (iter_level == 5):
        df_fdi_pos_data['level_' + str(iter_level)].replace({'NET': 'Net', 'A': 'Asset', 'L': 'Liability'}, inplace = True)      

### Intergated observations dropping:
df_fdi_pos_data = df_fdi_pos_data.loc[
                                      df_fdi_pos_data['level_0'].isin(df_country_codes['ISO SHORT'].values) & 
                                      df_fdi_pos_data['level_7'].isin(df_country_codes['ISO SHORT'].values)
                                     ]
### Indexes defining:
ser_fdi_pos_data = df_fdi_pos_data.drop(['level_1', 'level_4', 'level_6'], axis = 1)\
                    .set_index(['level_3', 'level_2', 'level_5', 'level_8', 'level_0', 'level_7']).squeeze()
ser_fdi_pos_data.index.names = ['Type', 'Direction', 'Account', 'Date', 'Reporter', 'Partner']
ser_fdi_pos_data.sort_index(inplace = True)
ser_fdi_pos_data = ser_fdi_pos_data[ser_fdi_pos_data.index.get_level_values('Reporter') != ser_fdi_pos_data.index.get_level_values('Partner')]
#ser_fdi_pos_data[ser_fdi_pos_data < 0.0] = 0.0
ser_fdi_pos_data.name = 'FDI Positions'
ser_fdi_pos_data.to_hdf(path_or_buf = str_path_oecd_fdi_full, key = str_key_oecd_fdi_full, mode = 'w', format = 'fixed')

In [39]:
### TEMP

#df_fdi_pos_acc = pd.read_hdf(path_or_buf = str_path_oecd_fdi_full, key = str_key_oecd_fdi_full).loc['Total'].unstack('Account')
ser_fdi_pos_data.loc['Total', :, :, '2012-12-31', ['CL', 'GB'], ['CL', 'GB']]

Type   Direction  Account  Date        Reporter  Partner
Total  Inward     Net      2012-12-31  CL        GB         2613.815192
       Outward    Net      2012-12-31  CL        GB        -254.151301 
Name: FDI Positions, dtype: float64

In [40]:
### OECD FDI: FDI POSITION DATASET CONVERTING TO ASSET / LIABILITY DIMENSION: EMPTY VALUES FILLING

df_fdi_pos_acc = pd.read_hdf(path_or_buf = str_path_oecd_fdi_full, key = str_key_oecd_fdi_full).loc['Total'].unstack('Account')

idx_fill = (df_fdi_pos_acc.index.get_level_values('Direction') == 'Outward') & (df_fdi_pos_acc['Net'] < 0.0) & \
            (df_fdi_pos_acc['Asset'].isna() & df_fdi_pos_acc['Liability'].isna())
df_fdi_pos_acc.loc[idx_fill, 'Liability'] = -df_fdi_pos_acc.loc[idx_fill, 'Net'].values
idx_fill = (df_fdi_pos_acc.index.get_level_values('Direction') == 'Outward') & (df_fdi_pos_acc['Net'] >= 0.0) & \
            (df_fdi_pos_acc['Asset'].isna() & df_fdi_pos_acc['Liability'].isna())
df_fdi_pos_acc.loc[idx_fill, 'Asset'] = df_fdi_pos_acc.loc[idx_fill, 'Net'].values
idx_fill = (df_fdi_pos_acc.index.get_level_values('Direction') == 'Inward') & (df_fdi_pos_acc['Net'] < 0.0) & \
            (df_fdi_pos_acc['Asset'].isna() & df_fdi_pos_acc['Liability'].isna())
df_fdi_pos_acc.loc[idx_fill, 'Asset'] = -df_fdi_pos_acc.loc[idx_fill, 'Net'].values
idx_fill = (df_fdi_pos_acc.index.get_level_values('Direction') == 'Inward') & (df_fdi_pos_acc['Net'] >= 0.0) & \
            (df_fdi_pos_acc['Asset'].isna() & df_fdi_pos_acc['Liability'].isna())
df_fdi_pos_acc.loc[idx_fill, 'Liability'] = df_fdi_pos_acc.loc[idx_fill, 'Net'].values

print('Only Net is filled:\n', df_fdi_pos_acc[df_fdi_pos_acc['Net'].notna() & (df_fdi_pos_acc['Asset'].isna() & df_fdi_pos_acc['Liability'].isna())])

idx_fill = (df_fdi_pos_acc.index.get_level_values('Direction') == 'Outward') & df_fdi_pos_acc['Net'].isna()
df_fdi_pos_acc.loc[idx_fill, 'Net'] = (df_fdi_pos_acc.loc[idx_fill, 'Asset'] - df_fdi_pos_acc.loc[idx_fill, 'Liability']).values
idx_fill = (df_fdi_pos_acc.index.get_level_values('Direction') == 'Inward') & df_fdi_pos_acc['Net'].isna()
df_fdi_pos_acc.loc[idx_fill, 'Net'] = (df_fdi_pos_acc.loc[idx_fill, 'Liability'] - df_fdi_pos_acc.loc[idx_fill, 'Asset']).values

print('Empty Net value when Asset & Liability are filled:\n', 
      df_fdi_pos_acc[df_fdi_pos_acc['Net'].isna() & df_fdi_pos_acc['Asset'].notna() & df_fdi_pos_acc['Liability'].notna()])

idx_fill = (df_fdi_pos_acc.index.get_level_values('Direction') == 'Outward') & df_fdi_pos_acc['Net'].isna() & df_fdi_pos_acc['Asset'].notna()
df_fdi_pos_acc.loc[idx_fill, 'Net'] = df_fdi_pos_acc.loc[idx_fill, 'Asset'].values
idx_fill = (df_fdi_pos_acc.index.get_level_values('Direction') == 'Outward') & df_fdi_pos_acc['Net'].isna() & df_fdi_pos_acc['Liability'].notna()
df_fdi_pos_acc.loc[idx_fill, 'Net'] = -df_fdi_pos_acc.loc[idx_fill, 'Liability'].values
idx_fill = (df_fdi_pos_acc.index.get_level_values('Direction') == 'Inward') & df_fdi_pos_acc['Net'].isna() & df_fdi_pos_acc['Asset'].notna()
df_fdi_pos_acc.loc[idx_fill, 'Net'] = -df_fdi_pos_acc.loc[idx_fill, 'Asset'].values
idx_fill = (df_fdi_pos_acc.index.get_level_values('Direction') == 'Inward') & df_fdi_pos_acc['Net'].isna() & df_fdi_pos_acc['Liability'].notna()
df_fdi_pos_acc.loc[idx_fill, 'Net'] = df_fdi_pos_acc.loc[idx_fill, 'Liability'].values

print('Empty Net value when Asset or Liability are filled:\n', 
      df_fdi_pos_acc[df_fdi_pos_acc['Net'].isna() & (df_fdi_pos_acc['Asset'].notna() | df_fdi_pos_acc['Liability'].notna())])

idx_fill = (df_fdi_pos_acc.index.get_level_values('Direction') == 'Outward') & df_fdi_pos_acc['Net'].notna() & \
            df_fdi_pos_acc['Asset'].notna() & df_fdi_pos_acc['Liability'].isna()
df_fdi_pos_acc.loc[idx_fill, 'Liability'] = (df_fdi_pos_acc.loc[idx_fill, 'Asset'] - df_fdi_pos_acc.loc[idx_fill, 'Net']).values
idx_fill = (df_fdi_pos_acc.index.get_level_values('Direction') == 'Outward') & df_fdi_pos_acc['Net'].notna() & \
            df_fdi_pos_acc['Asset'].isna() & df_fdi_pos_acc['Liability'].notna()
df_fdi_pos_acc.loc[idx_fill, 'Asset'] = (df_fdi_pos_acc.loc[idx_fill, 'Net'] + df_fdi_pos_acc.loc[idx_fill, 'Liability']).values
idx_fill = (df_fdi_pos_acc.index.get_level_values('Direction') == 'Inward') & df_fdi_pos_acc['Net'].notna() & \
            df_fdi_pos_acc['Asset'].notna() & df_fdi_pos_acc['Liability'].isna()
df_fdi_pos_acc.loc[idx_fill, 'Liability'] = (df_fdi_pos_acc.loc[idx_fill, 'Net'] + df_fdi_pos_acc.loc[idx_fill, 'Asset']).values
idx_fill = (df_fdi_pos_acc.index.get_level_values('Direction') == 'Inward') & df_fdi_pos_acc['Net'].notna() & \
            df_fdi_pos_acc['Asset'].isna() & df_fdi_pos_acc['Liability'].notna()
df_fdi_pos_acc.loc[idx_fill, 'Asset'] = (df_fdi_pos_acc.loc[idx_fill, 'Liability'] - df_fdi_pos_acc.loc[idx_fill, 'Net']).values

print('Empty Net / Asset / Liability value when two others are filled:\n', set(df_fdi_pos_acc.dropna(thresh = 2).index) - set(df_fdi_pos_acc.dropna().index))

Only Net is filled:
 Empty DataFrame
Columns: [Asset, Liability, Net]
Index: []
Empty Net value when Asset & Liability are filled:
 Empty DataFrame
Columns: [Asset, Liability, Net]
Index: []
Empty Net value when Asset or Liability are filled:
 Empty DataFrame
Columns: [Asset, Liability, Net]
Index: []
Empty Net / Asset / Liability value when two others are filled:
 set()


In [42]:
### TEMP

df_fdi_pos_acc.abs().sum()

Account
Asset        3.438384e+08
Liability    2.889626e+08
Net          5.045866e+08
dtype: float64

In [43]:
### OECD FDI: OBSERVATIONS ADDED BY NET TO ASSET PROCEDURE

df_fdi_outward = df_fdi_pos_acc.loc['Outward', :, ser_ison_status.index]
print('Share of Observations Added to Outward Assets by Net Values:')
df_net_share = pd.DataFrame()
df_net_share['Volume'] = df_fdi_outward.loc[(df_fdi_outward['Asset'] > 0.0) & 
                                            (df_fdi_outward['Asset'] == df_fdi_outward['Net']), 'Asset'].groupby('Date').sum() / \
                         df_fdi_outward.loc[(df_fdi_outward['Asset'] > 0.0), 'Asset'].groupby('Date').sum()
df_net_share['Number'] = df_fdi_outward.loc[(df_fdi_outward['Asset'] > 0.0) & 
                                            (df_fdi_outward['Asset'] == df_fdi_outward['Net']), 'Asset'].groupby('Date').count() / \
                         df_fdi_outward.loc[(df_fdi_outward['Asset'] > 0.0), 'Asset'].groupby('Date').count()
display(df_net_share)

df_fdi_inward = df_fdi_pos_acc.loc['Inward']
print('Share of Observations Added to Inward Liabilities by Net Values:')
df_net_share = pd.DataFrame()
df_net_share['Volume'] = df_fdi_inward.loc[(df_fdi_inward['Liability'] > 0.0) & 
                                           (df_fdi_inward['Liability'] == df_fdi_inward['Net']), 'Liability'].groupby('Date').sum() / \
                         df_fdi_inward.loc[(df_fdi_inward['Liability'] > 0.0), 'Liability'].groupby('Date').sum()
df_net_share['Number'] = df_fdi_inward.loc[(df_fdi_inward['Liability'] > 0.0) & 
                                           (df_fdi_inward['Liability'] == df_fdi_inward['Net']), 'Liability'].groupby('Date').count() / \
                         df_fdi_inward.loc[(df_fdi_inward['Liability'] > 0.0), 'Liability'].groupby('Date').count()
display(df_net_share)

Share of Observations Added to Outward Assets by Net Values:


Unnamed: 0_level_0,Volume,Number
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2005-12-30,0.026585,0.528226
2006-12-29,0.022426,0.514706
2007-12-31,0.019717,0.55
2008-12-31,0.019063,0.466667
2009-12-31,0.135499,0.54811
2010-12-31,0.165361,0.545763
2011-12-30,0.156171,0.59828
2012-12-31,0.164436,0.61615
2013-12-31,0.340785,0.57617
2014-12-31,0.343209,0.485464


Share of Observations Added to Inward Liabilities by Net Values:


Unnamed: 0_level_0,Volume,Number
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2005-12-30,0.075391,0.550898
2006-12-29,0.072659,0.566298
2007-12-31,0.074081,0.525606
2008-12-31,0.041151,0.405039
2009-12-31,0.180978,0.453689
2010-12-31,0.205014,0.454829
2011-12-30,0.203535,0.495564
2012-12-31,0.183152,0.491853
2013-12-31,0.306022,0.440503
2014-12-31,0.302532,0.412727


In [44]:
### OECD FDI: OPTION CONTAINER INITIALIZING

dict_option = {}

In [45]:
### OECD FDI: TOTAL SUM DATA AGGREGATION: SUM OF DIRECTIONAL VALUES BY ACCOUNTS

ser_fdi_sum_acc = df_fdi_pos_acc.stack('Account', dropna = False).unstack('Direction').sort_index().loc[(All, All, All, ['Asset', 'Liability']), All].sum(axis = 1)
ser_fdi_sum_acc = ser_fdi_sum_acc.reorder_levels([-1, 0, 1, 2]).sort_index()
ser_fdi_sum_acc.index.rename('Position', level = 0, inplace = True)

In [46]:
### OECD FDI: TOTAL SUM DATA AGGREGATION: DATASETS PREPARATION

ser_fdi_sum_asset = ser_fdi_sum_acc['Asset']
ser_fdi_sum_asset[ser_fdi_sum_asset < 0.0] = 0.0
ser_fdi_sum_asset.replace({0.0: np.NaN})
ser_fdi_sum_asset.name = 'Asset'
ser_fdi_sum_liability = ser_fdi_sum_acc['Liability'].swaplevel('Reporter', 'Partner').sort_index()
ser_fdi_sum_liability[ser_fdi_sum_liability < 0.0] = 0.0
ser_fdi_sum_liability.index.names = ser_fdi_sum_asset.index.names
ser_fdi_sum_liability.name = 'Liability_Inverted'
df_fdi_sum_to_augment = pd.concat([ser_fdi_sum_asset, ser_fdi_sum_liability], axis = 1, names = 'Data Source').sort_index().astype('float32').round(2)

In [47]:
### OECD FDI: TOTAL SUM DATA AGGREGATION: DATA COMBINING

dict_option['Sum_Full'] = df_fdi_sum_to_augment.loc[(All, ser_ison_status.index, All), 'Asset'].replace({0.0: np.NaN})\
                                .combine_first(df_fdi_sum_to_augment.loc[(All, ser_ison_status.index, All), 'Liability_Inverted']).replace({0.0: np.NaN})
dict_option['Sum_ISON'] = df_fdi_sum_to_augment.loc[(All, ser_ison_status.index, ser_ison_status.index), 'Asset'].replace({0.0: np.NaN})\
                            .combine_first(df_fdi_sum_to_augment.loc[(All, ser_ison_status.index, ser_ison_status.index), 'Liability_Inverted']).replace({0.0: np.NaN})

In [54]:
### OECD FDI: TOTAL OUTWARD DATA AGGREGATION: DATA COMBINING

ser_fdi_clear_asset = df_fdi_pos_acc.loc[('Outward', All, All, All), 'Asset'].droplevel('Direction')
ser_fdi_clear_asset[ser_fdi_clear_asset < 0.0] = 0.0
ser_fdi_clear_asset.replace({0.0: np.NaN}, inplace = True)
ser_fdi_clear_asset.name = 'Asset'
ser_fdi_clear_liability = df_fdi_pos_acc.loc[('Inward', All, All, All), 'Liability'].swaplevel('Reporter', 'Partner').droplevel('Direction').sort_index()
ser_fdi_clear_liability[ser_fdi_clear_liability < 0.0] = 0.0
ser_fdi_clear_liability.index.names = ser_fdi_clear_asset.index.names
ser_fdi_clear_liability.name = 'Liability_Inverted'
df_fdi_clear_to_augment = pd.concat([ser_fdi_clear_asset, ser_fdi_clear_liability], axis = 1, names = 'Data Source').sort_index().astype('float32').round(2)
dict_option['Outward_World'] = df_fdi_clear_to_augment.loc[(All, ser_ison_status.index, All), 'Asset'].replace({0.0: np.NaN})\
                        .combine_first(df_fdi_clear_to_augment.loc[(All, ser_ison_status.index, All), 'Liability_Inverted']).replace({0.0: np.NaN})
dict_option['Outward_ISON'] = df_fdi_clear_to_augment.loc[(All, ser_ison_status.index, ser_ison_status.index), 'Asset'].replace({0.0: np.NaN})\
                        .combine_first(df_fdi_clear_to_augment.loc[(All, ser_ison_status.index, ser_ison_status.index), 'Liability_Inverted']).replace({0.0: np.NaN})

In [55]:
### OECD FDI: TOTAL OUTWARD DATA AGGREGATION: DATA SAVING

dict_option['Sum_Full'].to_hdf('Data_Files/Source_Files/oecd_old_style_world.h5', key = 'old_style', mode = 'w')
dict_option['Sum_ISON'].to_hdf('Data_Files/Source_Files/oecd_old_style_ison.h5', key = 'old_style', mode = 'w')
dict_option['Outward_ISON'].to_hdf('Data_Files/Source_Files/oecd_outward_ison.h5', key = 'old_style', mode = 'w')
dict_option['Outward_World'].to_hdf('Data_Files/Source_Files/oecd_outward_world.h5', key = 'old_style', mode = 'w')

In [56]:
### OECD FDI: TEST: TOTAL OUTWARD DATA AGGREGATION: RESULTS COMPARISION

gc.collect()

#dict_option['Sum_ISON'].dropna()

ser_old_version = pd.read_hdf('Data_Files/Source_Files/oecd_combined_old.h5').set_index(['Date', 'Reporter', 'Partner'])['Asset_Augmented'].astype('float32').round(2)
ser_old_version.name = 'Old_Version'
df_test = pd.concat([dict_option['Sum_ISON'], ser_old_version], axis = 1)
df_test = df_test.replace({0.0: np.NaN})
print('Sum of the original attempt values:')
display(df_test['Old_Version'].sum())
print('Sum of the repeated attempt values:')
display(df_test['Asset'].sum())

Sum of the original attempt values:


280995520.0

Sum of the repeated attempt values:


281379140.0

In [57]:
### TEMP

pd.read_hdf('Data_Files/Source_Files/oecd_outward_world.h5').dropna().loc[['2021-12-31'], ['AE'], ['AU']]
pd.read_hdf('Data_Files/Source_Files/oecd_outward_world.h5').sum()

302012260.0