In [1]:
### OECD FDI: FOREIGN DIRECT INVESTMENT

In [2]:
### RUN EVERY TIME: INITIALIZATION

import numpy as np
import pandas as pd
pd.set_option('display.max_colwidth', -1) ### To display long strings
import math
import requests
import json ### To correct JSON structure before unpacking
import xml.etree.ElementTree as et
import gc
import os
import datetime
import time
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as mticker
import seaborn as sns
#%load_ext line_profiler

In [3]:
### RUN EVERY TIME: VERSION CONTROL

from platform import python_version
print('pandas version: ', pd.__version__)
print('python version: ', python_version())

pandas version:  0.25.3
python version:  3.7.4


In [4]:
### RUN EVERY TIME: MAIN CONSTANTS

### MultiIndex level slice constant:
All = slice(None)
### Universe path:
str_path_universe = 'Data_Files/Source_Files/acadian_universe.xlsx'
### OECD FDI dataset:
str_path_fdi_dataset = 'Data_Files/Source_Files/oecd_dataset.h5'
str_fdi_flow_oecd_dataset = 'fdi_flow_dataset'
str_fdi_pos_oecd_dataset = 'fdi_pos_dataset'
str_path_oecd_fdi_combined = 'Data_Files/Source_Files/oecd_combined.h5'
str_full_oecd_fdi_combined = 'oecd_full_combined'
str_path_oecd_fdi_out_net = 'Data_Files/Source_Files/oecd_outward_net.h5'
str_full_oecd_fdi_out_net = 'oecd_outward_net'
### Technical Constants:
str_date_end = '2022-10-31'
date_start = pd.Timestamp('1989-12-29')
date_end = pd.Timestamp(str_date_end)
date_ison = pd.Timestamp('1994-12-31')

In [5]:
### DEFINING COUNTRY CODES EXTRACTOR

def get_country_codes(use_local_copy = False):  
    ### In case if URL is unavailable:
    if (use_local_copy):
        url_country_code = 'Data_Files/Source_Files/countrycode.html'
    ### Online extraction:
    else:
        url_country_code = 'https://countrycode.org/'
    df_full_codes = pd.read_html(url_country_code, index_col = 'COUNTRY')[0]
    df_full_codes[['ISO SHORT', 'ISO LONG']] = df_full_codes['ISO CODES'].str.split(' / ', expand = True)
    df_result = df_full_codes[['ISO SHORT', 'ISO LONG']].sort_index()    
    df_result.index = df_result.index.str.upper()
    ### Results output:
    return df_result

In [6]:
### DEFINING EXTRACTION UNIVERSE DATA FROM MS EXCEL SOURCE (TO BE IGNORED IN PRODUCT CODE)

def ison_membership_converting(str_path_universe, date_end, bool_daily = False, int_backfill_months = 0):
    ### Defining business-month-end reindexation on country level:
    def country_modify(ser_raw_country, date_end):
        ser_res_country = ser_raw_country.droplevel(0).resample('MS').last().resample('BM').last()
        range_country = pd.date_range(ser_res_country.index[0], date_end, freq = 'BM')
        return ser_res_country.reindex(range_country).ffill()
    ### Markets encoding table:
    dict_markets = {50 : 'DM', 57 : 'EM', 504 : 'FM', 0: np.NaN}     
    ### Loading source file:
    df_raw_universe = pd.read_excel(engine = 'openpyxl', io = str_path_universe, sheet_name = 'Switchers', header = 0, parse_dates = True, index_col = [0, 1],
                                 na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                             '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null'], keep_default_na = False)
    ### Converting source file:
    df_raw_universe.index.names = ['Country', 'Date']
    ser_raw_universe = df_raw_universe['Region']
    ser_raw_universe.fillna(0, inplace = True)
    ser_raw_universe.name = 'Market'
    ### By country reindexation and translation:
    ser_res_universe = ser_raw_universe.groupby('Country').apply(country_modify, date_end)
    ser_res_universe.index.names = ['Country', 'Date']
    ser_res_universe = ser_res_universe.replace(dict_markets).reorder_levels([1, 0]).sort_index() 
    ### Expanding membership for primary regions members by backfilling:
    if int_backfill_months:
        ### List of regions:
        list_region = list(ser_res_universe.dropna().unique())
        ### Initialising of collection of series with backfilled data for each region:
        list_ison_backfill = []
        ### Regions looping:
        for iter_region in list_region:
            ### Defining start of region date:
            date_first_valid = ser_res_universe.loc[ser_res_universe == iter_region].first_valid_index()[0]
            ### Creating dates index to backfilling:
            idx_date_backfill = pd.date_range(end = date_first_valid, periods = int_backfill_months + 1, freq = 'BM')[: -1]
            ### Creating primary countries index to backfilling:            
            idx_region_backfill = ser_res_universe.loc[ser_res_universe == iter_region].loc[date_first_valid, All].index.get_level_values('Country')
            ### Creating full index:
            idx_ison_backfill = pd.MultiIndex.from_product([idx_date_backfill, idx_region_backfill])
            ### Series with backfilled data:
            list_ison_backfill.append(pd.Series(iter_region, index = idx_ison_backfill))
        ### Combination of backfilled series and original ISON data:    
        ser_res_universe = ser_res_universe.combine_first(pd.concat(list_ison_backfill, axis = 0)).sort_index()  
        ser_res_universe.index.names = ['Date', 'Country']
    ### Converting to daily frequency:
    if bool_daily:
        ser_res_universe = ser_res_universe.reset_index('Country').groupby('Country').resample('B').ffill()['Market'].swaplevel().sort_index()    
    ### Results output:
    ser_res_universe.name = 'Market'
    return ser_res_universe

In [7]:
### RUN EVERY TIME: COMMON DATA EXTRACTION STEPS

### World Country Codes:
df_country_codes = get_country_codes()
### ISON membership history:
ser_ison_membership = ison_membership_converting(str_path_universe, pd.to_datetime(str_date_end))
### ISON LONG IDs list:
list_ison_long = list(df_country_codes.loc[df_country_codes['ISO SHORT'].isin(ser_ison_membership.index.get_level_values('Country').unique()), 'ISO LONG'].values)
### ISON current status:
ser_ison_status = ser_ison_membership.loc[str_date_end].droplevel('Date')
### ISON stats:
int_ison_number = len(list_ison_long)
list_regions = ['DM', 'EM', 'FM']
dict_ison_len = {}
dict_ison_len['Full Universe'] = int_ison_number
for iter_region in list_regions:
    dict_ison_len[iter_region] = len(ser_ison_status[ser_ison_status == iter_region])
ser_market_len = pd.Series(dict_ison_len)
ser_market_len.index.names = ['Market']    

In [8]:
### OECD FDI: GENERAL DATA PREPARATION

### Constants:
All = slice(None)
str_oecd_base_url = 'https://stats.oecd.org/sdmx-json/data/'
str_oecd_structure_url = 'https://stats.oecd.org/restsdmx/sdmx.ashx/GetDataStructure/'
str_fdi_flow_dataset_add = 'FDI_FLOW_CTRY'
str_fdi_pos_dataset_add = 'FDI_POS_CTRY'

In [9]:
### OECD FDI: REQUESTS SESSION INITIALIZING

request_session = requests.Session()
### For avoiding data request errors:
dict_header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'}
request_session.headers.update(dict_header)

In [10]:
### OECD FDI: FDI POSITION STRUCTURE REQUEST

obj_oecd_structure = request_session.get(str_oecd_structure_url + str_fdi_pos_dataset_add)
xml_tree_root = et.fromstring(obj_oecd_structure.content)
dict_concepts = {}
dict_dimensions = {}
dict_codelists = {}
for xml_tree_child in xml_tree_root:
    if xml_tree_child.tag.endswith('Concepts'):
        for xml_tree_grand in xml_tree_child:
            str_concept_id = xml_tree_grand.attrib['id']
            str_concept_name = xml_tree_grand[0].text
            dict_concepts[str_concept_id] = str_concept_name
    if xml_tree_child.tag.endswith('KeyFamilies'):
        for xml_tree_family in xml_tree_child:
            for xml_tree_component in xml_tree_family:
                if xml_tree_component.tag.endswith('Components'):
                    for xml_tree_measure in xml_tree_component:
                        if xml_tree_measure.tag.endswith('Dimension'):
                            str_concept_id = xml_tree_measure.attrib['conceptRef']
                            str_concept_cl_id = xml_tree_measure.attrib['codelist']
                            dict_dimensions[str_concept_id] = str_concept_cl_id
    if xml_tree_child.tag.endswith('CodeLists'):       
        for num_tree_grand, xml_tree_grand in enumerate(xml_tree_child):
            str_codelist_id = xml_tree_grand.attrib['id']
            dict_codelist = {}
            for xml_tree_codelist in xml_tree_grand:                
                if xml_tree_codelist.tag.endswith('Code'):
                    str_code_id = xml_tree_codelist.attrib['value']
                    str_code_value = xml_tree_codelist[0].text
                    dict_codelist[str_code_id] = str_code_value
            dict_codelists[str_codelist_id] = dict_codelist

In [11]:
### OECD FDI: DIMENSIONS

pd.concat([pd.Series(dict_concepts), pd.Series(dict_dimensions)], axis = 1, sort = False).dropna()

Unnamed: 0,0,1
COU,Reporting country,CL_FDI_POS_CTRY_COU
MEASURE,Currency,CL_FDI_POS_CTRY_MEASURE
MEASURE_PRINCIPLE,Measurement principle,CL_FDI_POS_CTRY_MEASURE_PRINCIPLE
FDI_TYPE,Type of FDI,CL_FDI_POS_CTRY_FDI_TYPE
TYPE_ENTITY,Type of entity,CL_FDI_POS_CTRY_TYPE_ENTITY
ACCOUNTING_ENTRY,Accounting entry,CL_FDI_POS_CTRY_ACCOUNTING_ENTRY
LEVEL_COUNTERPART,Level of counterpart,CL_FDI_POS_CTRY_LEVEL_COUNTERPART
COUNTERPART_AREA,Partner country/territory,CL_FDI_POS_CTRY_COUNTERPART_AREA
TIME,Year,CL_FDI_POS_CTRY_TIME


In [13]:
### OECD FDI: FDI POSITION CONCEPT SOURCE CODELISTS:

dict_codelists['CL_FDI_POS_CTRY_FDI_TYPE']

{'LE_FA_F': 'FDI positions -Total',
 'LE_FA_F5': 'FDI positions - Equity (including reinvestment of earnings)',
 'LE_FA_FL': 'FDI positions - Debt'}

In [12]:
### OECD FDI: FDI POSITION PARAMETERS PREPARATION: Non-country parameters:

### Currency:
str_measure = 'USD'
### Direction:
str_direction = '+'.join(['DI', 'DO'])
### Investment type:
str_fdi_type = 'LE_FA_F'
### Residence defining:
str_residence = 'ALL'
### Accounting way:
str_accounting = '+'.join(['NET', 'A', 'L']) # 'NET' # 
### Level counterpart(???):
str_counterpart = 'IMC'

In [13]:
### OECD FDI: FDI POSITION PARAMETERS PREPARATION: Reporters and partners control and preparation

### ISON Countries collecting:
df_ison_countries = df_country_codes.set_index('ISO SHORT', append = True).reset_index('COUNTRY', drop = True)
df_ison_countries = df_ison_countries.reindex(ser_ison_membership.index.get_level_values(1).unique().to_list())
ser_ison_countries = df_ison_countries.reset_index().set_index('ISO LONG').squeeze()
### OECD reporters vs ISON members:
ser_oecd_reporters = pd.Series(dict_codelists['CL_FDI_POS_CTRY_COU'])
ser_oecd_reporters = ser_oecd_reporters.to_frame().join(ser_ison_countries).drop(0, axis = 1).squeeze()
for iter_iso_long in (ser_oecd_reporters[ser_oecd_reporters.isna()].index.get_level_values(0)):
    if iter_iso_long in ser_ison_countries.index:
        print('OECD Reporter country with no ISON match:', iter_iso_long)
### ISON countries with no OECD reporter match:
set_no_reporters = set(ser_ison_countries.dropna().index) - set(ser_oecd_reporters.index)
print('ISON countries with no OECD reporter match:', set_no_reporters, '(', len(set_no_reporters), ')')           
### OECD partners vs ISON members:
ser_oecd_partners = pd.Series(dict_codelists['CL_FDI_POS_CTRY_COUNTERPART_AREA'])
ser_oecd_partners = ser_oecd_partners.to_frame().join(ser_ison_countries).drop(0, axis = 1).squeeze()
for iter_iso_long in (ser_oecd_partners[ser_oecd_partners.isna()].index.get_level_values(0)):
    if iter_iso_long in ser_ison_countries.index:
        print('OECD Partner country with no ISON match:', iter_iso_long)
### ISON countries with no OECD partner match:
set_no_partners = set(ser_ison_countries.dropna().index) - set(ser_oecd_partners.index)
print('ISON countries with no OECD partner match:', set_no_partners, '(', len(set_no_partners), ')')
### Lists preparation:
str_reporters_all = '+'.join(ser_oecd_reporters.dropna().index.to_list())
str_partners_all = '+'.join(ser_oecd_partners.dropna().index.to_list())

ISON countries with no OECD reporter match: {'PER', 'MYS', 'PAK', 'CIV', 'NGA', 'KEN', 'SRB', 'MLT', 'BHR', 'BGR', 'SAU', 'EGY', 'TUN', 'MAR', 'OMN', 'BRA', 'ROU', 'KAZ', 'ECU', 'TWN', 'MUS', 'JOR', 'LKA', 'ZAF', 'CHN', 'CYP', 'ARG', 'RUS', 'VNM', 'THA', 'BWA', 'PHL', 'NAM', 'QAT', 'BGD', 'SGP', 'PAN', 'IND', 'UKR', 'UGA', 'HRV', 'GHA', 'ARE', 'IDN', 'HKG', 'ZMB', 'LBN', 'KWT'} ( 48 )
ISON countries with no OECD partner match: {'ROU'} ( 1 )


In [14]:
### OECD FDI: FDI POSITION REQUEST CONSTRUCTING

str_fdi_pos_request_params = '.'.join(['', str_measure, str_direction, str_fdi_type, str_residence, str_accounting, str_counterpart, ''])
str_fdi_pos_request = str_oecd_base_url + str_fdi_pos_dataset_add + '/' + str_fdi_pos_request_params + '/all?startTime=' + str(date_start.year) + \
                      '&endTime=' + str(date_end.year) + '&detail=DataOnly'
obj_fdi_pos_dataset = request_session.get(str_fdi_pos_request).json()

In [15]:
### OECD FDI: FDI POSITION INDEX DATA COLLECTING:

### Dates:
list_idx_dates = []
for tup_date in obj_fdi_pos_dataset['structure']['dimensions']['observation'][0]['values']:
    list_idx_dates.append(pd.to_datetime(tup_date['id']) + pd.offsets.BYearEnd())
### Parameters:    
list_idx_library = []
for iter_position in obj_fdi_pos_dataset['structure']['dimensions']['series']:
    list_param_values = []
    for tup_parameter in iter_position['values']:
        list_param_values.append(tup_parameter['id'])            
    list_idx_library.append(list_param_values)
### Result:
list_idx_library.append(list_idx_dates)
### Converting to dictionary for future replacing:
list_idx_dict = []
for iter_list in list_idx_library:
    list_idx_dict.append(dict(zip(map(str, range(len(iter_list))), iter_list)))

In [16]:
### OECD FDI: FDI POSITION DATASET RESAMPLING

dict_datasets_res = {}
dict_datasets_source = obj_fdi_pos_dataset['dataSets'][0]['series']
### Parameters and date indexes integration:
for iter_dataset in dict_datasets_source:
    dict_observations = dict_datasets_source[iter_dataset]['observations']
    for iter_observation in dict_observations:
        str_iter_idx = iter_dataset + ':' + iter_observation
        flo_iter_value = dict_observations[iter_observation][0]
        dict_datasets_res[str_iter_idx] = flo_iter_value

In [17]:
### OECD FDI: FDI POSITION DATASET REINDEXATION

gc.collect()
df_fdi_pos_data = pd.Series(dict_datasets_res)
df_fdi_pos_data.index = pd.MultiIndex.from_arrays(zip(*df_fdi_pos_data.index.str.split(':')))
int_levels_number = df_fdi_pos_data.index.nlevels
df_fdi_pos_data = df_fdi_pos_data.reset_index()
### Replacing numbers with parameter values:
for iter_level in range(int_levels_number):
    df_fdi_pos_data['level_' + str(iter_level)].replace(list_idx_dict[iter_level], inplace = True)
    ### Replacing long ISO names with short ISO names:
    if (iter_level == 0):
        df_fdi_pos_data['level_' + str(iter_level)].replace(dict(zip(df_country_codes['ISO LONG'].values, df_country_codes['ISO SHORT'].values)), inplace = True)
    elif (iter_level == 7):
        df_fdi_pos_data['level_' + str(iter_level)].replace(dict(zip(df_country_codes['ISO LONG'].values, df_country_codes['ISO SHORT'].values)), inplace = True)
    ### Directions renaming:
    elif (iter_level == 2):
        df_fdi_pos_data['level_' + str(iter_level)].replace({'DI': 'Inward', 'DO': 'Outward'}, inplace = True)
    ### Flow types renaming:
    elif (iter_level == 5):
        df_fdi_pos_data['level_' + str(iter_level)].replace({'NET': 'Net', 'A': 'Asset', 'L': 'Liability'}, inplace = True)      

### Intergated observations dropping:
df_fdi_pos_data = df_fdi_pos_data.loc[
                                      df_fdi_pos_data['level_0'].isin(df_country_codes['ISO SHORT'].values) & 
                                      df_fdi_pos_data['level_7'].isin(df_country_codes['ISO SHORT'].values)
                                     ]
### Indexes defining:
ser_fdi_pos_data = df_fdi_pos_data.drop(['level_3', 'level_4', 'level_6'], axis = 1)\
                    .set_index(['level_2', 'level_8', 'level_0', 'level_7', 'level_1', 'level_5']).squeeze()
ser_fdi_pos_data.index.names = ['Direction', 'Date', 'Reporter_ID', 'Partner_ID', 'Currency', 'Accounting']
ser_fdi_pos_data.sort_index(inplace = True)
ser_fdi_pos_data = ser_fdi_pos_data[ser_fdi_pos_data.index.get_level_values('Reporter_ID') != ser_fdi_pos_data.index.get_level_values('Partner_ID')]
ser_fdi_pos_data.name = 'FDI Positions'

In [18]:
### OECD FDI: FDI POSITION DATASET CONVERTING TO ASSET / LIABILITY DIMENSION: EMPTY VALUES FILLING

df_fdi_pos_acc = ser_fdi_pos_data.droplevel('Currency').unstack('Accounting')

df_fdi_pos_acc.loc[(df_fdi_pos_acc.index.get_level_values('Direction') == 'Outward') & 
                   (df_fdi_pos_acc['Net'] < 0.0) & (df_fdi_pos_acc['Asset'].isna() & df_fdi_pos_acc['Liability'].isna()), 'Liability'] = -df_fdi_pos_acc['Net']
df_fdi_pos_acc.loc[(df_fdi_pos_acc.index.get_level_values('Direction') == 'Outward') & 
                   (df_fdi_pos_acc['Net'] >= 0.0) & (df_fdi_pos_acc['Asset'].isna() & df_fdi_pos_acc['Liability'].isna()), 'Asset'] = df_fdi_pos_acc['Net']
df_fdi_pos_acc.loc[(df_fdi_pos_acc.index.get_level_values('Direction') == 'Inward') & 
                   (df_fdi_pos_acc['Net'] < 0.0) & (df_fdi_pos_acc['Asset'].isna() & df_fdi_pos_acc['Liability'].isna()), 'Asset'] = -df_fdi_pos_acc['Net']
df_fdi_pos_acc.loc[(df_fdi_pos_acc.index.get_level_values('Direction') == 'Inward') & 
                   (df_fdi_pos_acc['Net'] >= 0.0) & (df_fdi_pos_acc['Asset'].isna() & df_fdi_pos_acc['Liability'].isna()), 'Liability'] = df_fdi_pos_acc['Net']
print('Only Net is filled:\n', df_fdi_pos_acc[df_fdi_pos_acc['Net'].notna() & (df_fdi_pos_acc['Asset'].isna() & df_fdi_pos_acc['Liability'].isna())])

df_fdi_pos_acc.loc[(df_fdi_pos_acc.index.get_level_values('Direction') == 'Outward') & df_fdi_pos_acc['Net'].isna(), 'Net'] = \
                                                                      df_fdi_pos_acc['Asset'] - df_fdi_pos_acc['Liability']
df_fdi_pos_acc.loc[(df_fdi_pos_acc.index.get_level_values('Direction') == 'Inward') & df_fdi_pos_acc['Net'].isna(), 'Net'] = \
                                                                      df_fdi_pos_acc['Liability'] - df_fdi_pos_acc['Asset']
print('Empty Net value when Asset & Liability are filled:\n', 
      df_fdi_pos_acc[df_fdi_pos_acc['Net'].isna() & df_fdi_pos_acc['Asset'].notna() & df_fdi_pos_acc['Liability'].notna()])

df_fdi_pos_acc.loc[(df_fdi_pos_acc.index.get_level_values('Direction') == 'Outward') & df_fdi_pos_acc['Net'].isna() & df_fdi_pos_acc['Asset'].notna(), 'Net'] = \
                                                                                                                                      df_fdi_pos_acc['Asset']
df_fdi_pos_acc.loc[(df_fdi_pos_acc.index.get_level_values('Direction') == 'Outward') & df_fdi_pos_acc['Net'].isna() & df_fdi_pos_acc['Liability'].notna(), 'Net'] = \
                                                                                                                                     -df_fdi_pos_acc['Liability']
df_fdi_pos_acc.loc[(df_fdi_pos_acc.index.get_level_values('Direction') == 'Inward') & df_fdi_pos_acc['Net'].isna() & df_fdi_pos_acc['Asset'].notna(), 'Net'] = \
                                                                                                                                    -df_fdi_pos_acc['Asset']
df_fdi_pos_acc.loc[(df_fdi_pos_acc.index.get_level_values('Direction') == 'Inward') & df_fdi_pos_acc['Net'].isna() & df_fdi_pos_acc['Liability'].notna(), 'Net'] = \
                                                                                                                                     df_fdi_pos_acc['Liability']
print('Empty Net value when Asset or Liability are filled:\n', 
      df_fdi_pos_acc[df_fdi_pos_acc['Net'].isna() & (df_fdi_pos_acc['Asset'].notna() | df_fdi_pos_acc['Liability'].notna())])

df_fdi_pos_acc.loc[(df_fdi_pos_acc.index.get_level_values('Direction') == 'Outward') & 
                   df_fdi_pos_acc['Net'].notna() & df_fdi_pos_acc['Asset'].notna() & df_fdi_pos_acc['Liability'].isna(), 'Liability'] = \
                                                                                      df_fdi_pos_acc['Asset'] - df_fdi_pos_acc['Net']
df_fdi_pos_acc.loc[(df_fdi_pos_acc.index.get_level_values('Direction') == 'Outward') & 
                   df_fdi_pos_acc['Net'].notna() & df_fdi_pos_acc['Asset'].isna() & df_fdi_pos_acc['Liability'].notna(), 'Asset'] = \
                                                                              df_fdi_pos_acc['Net'] + df_fdi_pos_acc['Liability']
df_fdi_pos_acc.loc[(df_fdi_pos_acc.index.get_level_values('Direction') == 'Inward') & 
                   df_fdi_pos_acc['Net'].notna() & df_fdi_pos_acc['Asset'].notna() & df_fdi_pos_acc['Liability'].isna(), 'Liability'] = \
                                                                                      df_fdi_pos_acc['Net'] + df_fdi_pos_acc['Asset']
df_fdi_pos_acc.loc[(df_fdi_pos_acc.index.get_level_values('Direction') == 'Inward') & 
                   df_fdi_pos_acc['Net'].notna() & df_fdi_pos_acc['Asset'].isna() & df_fdi_pos_acc['Liability'].notna(), 'Asset'] = \
                                                                              df_fdi_pos_acc['Liability'] - df_fdi_pos_acc['Net']
print('Empty Net / Asset / Liability value when two others are filled:\n', set(df_fdi_pos_acc.dropna(thresh = 2).index) - set(df_fdi_pos_acc.dropna().index))

Only Net is filled:
 Empty DataFrame
Columns: [Asset, Liability, Net]
Index: []
Empty Net value when Asset & Liability are filled:
 Empty DataFrame
Columns: [Asset, Liability, Net]
Index: []
Empty Net value when Asset or Liability are filled:
 Empty DataFrame
Columns: [Asset, Liability, Net]
Index: []
Empty Net / Asset / Liability value when two others are filled:
 set()


In [25]:
### OECD FDI: FDI POSITION DATASET CONVERTING TO ASSET / LIABILITY DIMENSION: CALCULATING & SAVING

ser_fdi_pos_acc = df_fdi_pos_acc.stack('Accounting', dropna = False).unstack('Direction').sort_index().loc[(All, All, All, ['Asset', 'Liability']), All].sum(axis = 1)
ser_fdi_pos_acc = ser_fdi_pos_acc.reorder_levels([-1, 0, 1, 2]).sort_index()
ser_fdi_pos_acc.index.rename('Position', level = 0, inplace = True)
ser_fdi_pos_acc.replace({0.0 : np.NaN}).to_hdf(path_or_buf = str_path_fdi_dataset, key = str_fdi_pos_oecd_dataset, mode = 'w', format = 'fixed')

In [19]:
### OECD FDI: FDI POSITION NET ASSET DATASET SAVING

ser_fdi_asset = df_fdi_pos_acc.loc[('Outward', All, All, All), 'Net'].droplevel('Direction').replace({0.0 : np.NaN})
ser_fdi_asset.index.names = ['Date', 'Reporter', 'Partner']
ser_fdi_asset.sort_index().to_hdf(path_or_buf = str_path_oecd_fdi_out_net, key = str_full_oecd_fdi_out_net, mode = 'w', format = 'fixed')

In [26]:
### TEMP

ser_out_net_equity = pd.read_hdf(path_or_buf = 'Data_Files/Source_Files/oecd_outward_net_equity.h5', key = str_full_oecd_fdi_out_net)
ser_out_net_equity.name = 'Equity'
ser_out_net_total = pd.read_hdf(path_or_buf = 'Data_Files/Source_Files/oecd_outward_net_total.h5', key = str_full_oecd_fdi_out_net)
ser_out_net_total.name = 'Total'
df_test = pd.concat([ser_out_net_equity, ser_out_net_total], axis = 1)

In [30]:
## TEMP

df_test[df_test['Equity'] == df_test['Total']]

85242

In [None]:
### OECD FDI: LIABILITY DATA INCORPORATION

ser_fdi_asset = pd.read_hdf(path_or_buf = str_path_fdi_dataset, key = str_fdi_pos_oecd_dataset).loc['Asset', :, ser_ison_status.index.to_list(), :]\
                                                                                               .droplevel(0).sort_index().astype('float32')
ser_fdi_asset.index.names = ['Date', 'Reporter', 'Partner']
ser_fdi_liability_inv = pd.read_hdf(path_or_buf = str_path_fdi_dataset, key = str_fdi_pos_oecd_dataset).loc['Liability', :, :, ser_ison_status.index.to_list()]\
                                                                                                       .droplevel(0).sort_index().astype('float32')
ser_fdi_liability_inv.index.names = ['Date', 'Partner', 'Reporter']
ser_fdi_liability_inv = ser_fdi_liability_inv.reorder_levels(['Date', 'Reporter', 'Partner']).sort_index()
df_asset_aug_total = pd.concat([ser_fdi_asset, ser_fdi_liability_inv], axis = 1, names = 'Source Position', keys = ['Asset', 'Liability'])
df_asset_aug_total = df_asset_aug_total.join(ser_ison_status, on = 'Reporter').set_index('Market', append = True)
df_asset_aug_total['Asset_Augmented'] = df_asset_aug_total['Asset'].combine_first(df_asset_aug_total['Liability'])

In [77]:
### OECD FDI: FDI AUGMENTED DATASET SAVING

df_asset_aug_total.to_hdf(path_or_buf = str_path_oecd_fdi_combined, key = str_full_oecd_fdi_combined, mode = 'w', format = 'fixed')

In [78]:
### TEMP

df_asset_aug_total = pd.read_hdf(path_or_buf = str_path_oecd_fdi_combined, key = str_full_oecd_fdi_combined)
df_asset_aug_total[df_asset_aug_total['Asset'] < 0.0]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Asset,Liability,Asset_Augmented
Date,Reporter,Partner,Market,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2005-12-30,EE,DM,FM,-0.138025,,-0.138025
2005-12-30,EE,MD,FM,-0.460083,,-0.460083
2005-12-30,FR,GQ,DM,-1.179700,,-1.179700
2006-12-29,EE,DM,FM,-0.169893,,-0.169893
2007-12-31,EE,CA,FM,-0.373913,,-0.373913
...,...,...,...,...,...,...
2021-12-31,TR,MG,EM,-1.740000,,-1.740000
2021-12-31,TR,ML,EM,-11.680000,,-11.680000
2021-12-31,TR,PA,EM,-0.860000,,-0.860000
2021-12-31,TR,PH,EM,-1.310000,,-1.310000
