In [1]:
### IMF CPIS: BILATERAL EQUITY & DEBT INVESTMENT POSITIONS

In [2]:
### RUN EVERY TIME: INITIALIZATION

import numpy as np
import pandas as pd
pd.set_option('display.max_colwidth', -1) ### To display long strings
import math
import requests
import json ### To correct JSON structure before unpacking
import gc
import os
import datetime
import time
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as mticker
import seaborn as sns
%load_ext line_profiler

In [3]:
### RUN EVERY TIME: VERSION CONTROL

from platform import python_version
print('pandas version: ', pd.__version__)
print('python version: ', python_version())

pandas version:  0.25.3
python version:  3.7.4


In [4]:
### RUN EVERY TIME: MAIN CONSTANTS

### MultiIndex level slice constant:
All = slice(None)
### Universe path:
str_path_universe = 'Data_Files/Source_Files/acadian_universe.xlsx'
### IMF CPIS dataset of total investment values:
str_path_imf_cpis_dataset = 'Data_Files/Source_Files/cpis_dataset.h5'
str_asset_imf_cpis_dataset = 'cpis_asset_dataset'
str_liability_imf_cpis_dataset = 'cpis_liability_dataset'
str_full_imf_cpis_dataset = 'cpis_full_dataset'
str_path_imf_cpis_combined = 'Data_Files/Source_Files/cpis_combined.h5'
str_full_imf_cpis_combined = 'cpis_full_combined'
### Detailed IMF CPIS dataset:
str_path_imf_cpis_detailed_raw = 'Data_Files/Source_Files/cpis_detailed_raw.h5'
str_key_imf_cpis_assets = 'cpis_detailed_assets'
str_key_imf_cpis_liabilities = 'cpis_detailed_liabilities'
### Filtered IMF CPIS dataset:
str_path_imf_cpis_filtered = 'Data_Files/Source_Files/cpis_filtered.h5'
str_key_imf_cpis_filtered = 'cpis_filtered'
### Technical Constants:
str_date_end = '2022-10-31'
date_start = pd.Timestamp('1989-12-29')
date_end = pd.Timestamp(str_date_end)
date_ison = pd.Timestamp('1994-12-31')

In [5]:
### DEFINING COUNTRY CODES EXTRACTOR

def get_country_codes(use_local_copy = False):  
    ### In case if URL is unavailable:
    if (use_local_copy):
        url_country_code = 'Data_Files/Source_Files/countrycode.html'
    ### Online extraction:
    else:
        url_country_code = 'https://countrycode.org/'
    df_full_codes = pd.read_html(url_country_code, index_col = 'COUNTRY')[0]
    df_full_codes[['ISO SHORT', 'ISO LONG']] = df_full_codes['ISO CODES'].str.split(' / ', expand = True)
    df_result = df_full_codes[['ISO SHORT', 'ISO LONG']].sort_index()    
    df_result.index = df_result.index.str.upper()
    ### Results output:
    return df_result

In [6]:
### DEFINING EXTRACTION UNIVERSE DATA FROM MS EXCEL SOURCE (TO BE IGNORED IN PRODUCT CODE)

def ison_membership_converting(str_path_universe, date_end, bool_daily = False, int_backfill_months = 0):
    ### Defining business-month-end reindexation on country level:
    def country_modify(ser_raw_country, date_end):
        ser_res_country = ser_raw_country.droplevel(0).resample('MS').last().resample('BM').last()
        range_country = pd.date_range(ser_res_country.index[0], date_end, freq = 'BM')
        return ser_res_country.reindex(range_country).ffill()
    ### Markets encoding table:
    dict_markets = {50 : 'DM', 57 : 'EM', 504 : 'FM', 0: np.NaN}     
    ### Loading source file:
    df_raw_universe = pd.read_excel(engine = 'openpyxl', io = str_path_universe, sheet_name = 'Switchers', header = 0, parse_dates = True, index_col = [0, 1],
                                 na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                             '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null'], keep_default_na = False)
    ### Converting source file:
    df_raw_universe.index.names = ['Country', 'Date']
    ser_raw_universe = df_raw_universe['Region']
    ser_raw_universe.fillna(0, inplace = True)
    ser_raw_universe.name = 'Market'
    ### By country reindexation and translation:
    ser_res_universe = ser_raw_universe.groupby('Country').apply(country_modify, date_end)
    ser_res_universe.index.names = ['Country', 'Date']
    ser_res_universe = ser_res_universe.replace(dict_markets).reorder_levels([1, 0]).sort_index() 
    ### Expanding membership for primary regions members by backfilling:
    if int_backfill_months:
        ### List of regions:
        list_region = list(ser_res_universe.dropna().unique())
        ### Initialising of collection of series with backfilled data for each region:
        list_ison_backfill = []
        ### Regions looping:
        for iter_region in list_region:
            ### Defining start of region date:
            date_first_valid = ser_res_universe.loc[ser_res_universe == iter_region].first_valid_index()[0]
            ### Creating dates index to backfilling:
            idx_date_backfill = pd.date_range(end = date_first_valid, periods = int_backfill_months + 1, freq = 'BM')[: -1]
            ### Creating primary countries index to backfilling:            
            idx_region_backfill = ser_res_universe.loc[ser_res_universe == iter_region].loc[date_first_valid, All].index.get_level_values('Country')
            ### Creating full index:
            idx_ison_backfill = pd.MultiIndex.from_product([idx_date_backfill, idx_region_backfill])
            ### Series with backfilled data:
            list_ison_backfill.append(pd.Series(iter_region, index = idx_ison_backfill))
        ### Combination of backfilled series and original ISON data:    
        ser_res_universe = ser_res_universe.combine_first(pd.concat(list_ison_backfill, axis = 0)).sort_index()  
        ser_res_universe.index.names = ['Date', 'Country']
    ### Converting to daily frequency:
    if bool_daily:
        ser_res_universe = ser_res_universe.reset_index('Country').groupby('Country').resample('B').ffill()['Market'].swaplevel().sort_index()    
    ### Results output:
    ser_res_universe.name = 'Market'
    return ser_res_universe

In [7]:
### RUN EVERY TIME: COMMON DATA EXTRACTION STEPS

### World Country Codes:
df_country_codes = get_country_codes()
### ISON membership history:
ser_ison_membership = ison_membership_converting(str_path_universe, pd.to_datetime(str_date_end))
### ISON LONG IDs list:
list_ison_long = list(df_country_codes.loc[df_country_codes['ISO SHORT'].isin(ser_ison_membership.index.get_level_values('Country').unique()), 'ISO LONG'].values)
### ISON current status:
ser_ison_status = ser_ison_membership.loc[str_date_end].droplevel('Date')
### ISON stats:
int_ison_number = len(list_ison_long)
list_regions = ['DM', 'EM', 'FM']
dict_ison_len = {}
dict_ison_len['Full Universe'] = int_ison_number
for iter_region in list_regions:
    dict_ison_len[iter_region] = len(ser_ison_status[ser_ison_status == iter_region])
ser_market_len = pd.Series(dict_ison_len)
ser_market_len.index.names = ['Market']    

In [8]:
### IMF CPIS: GENERAL DATA PREPARATION

### Constants:
All = slice(None)
dict_request_headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'}
str_imf_base_url = 'http://dataservices.imf.org/REST/SDMX_JSON.svc/'
str_imf_dataflow_add = 'DataFlow'
str_imf_datastructure_add = 'DataStructure/'
str_imf_codelist_add = 'CodeList/'
str_imf_dataset_add = 'CompactData/'
int_seconds_to_sleep = 1
int_imf_country_limit = 30

In [9]:
### IMF CPIS: REQUESTS SESSION INITIALIZING

request_session = requests.Session()
### For avoiding data request errors from IMF Data Service:
request_session.headers.update(dict_request_headers)

In [10]:
### IMF CPIS: DATAFLOW SEARCHING

obj_imf_dataflow_list = request_session.get(str_imf_base_url + str_imf_dataflow_add).json()
df_imf_dataflow = pd.DataFrame(obj_imf_dataflow_list['Structure']['Dataflows']['Dataflow'])
df_imf_dataflow = df_imf_dataflow.assign(Description = df_imf_dataflow['Name'].apply(pd.Series)['#text'].values)[['@id', 'Description']]
ser_imf_dataflow = df_imf_dataflow.set_index('@id', drop = True).squeeze()
### Searching DataFlow code for further requests:
str_imf_cpis_id = ser_imf_dataflow[ser_imf_dataflow.str.contains('CPIS')].index[0].replace('DS-', '')
print(str_imf_cpis_id)

CPIS


In [11]:
### IMF CPIS: DATASTRUCTURE SEARCHING

obj_imf_cpis_structure = request_session.get(str_imf_base_url + str_imf_datastructure_add + str_imf_cpis_id).json()
df_imf_cpis_params = pd.DataFrame(obj_imf_cpis_structure['Structure']['KeyFamilies']['KeyFamily']['Components']['Dimension'])\
                                [['@conceptRef', '@codelist', '@isFrequencyDimension']]
### Receiving DataFlow parameters and code lists for each of them:
print(df_imf_cpis_params)

          @conceptRef          @codelist @isFrequencyDimension
0  FREQ                CL_FREQ            true                
1  REF_AREA            CL_AREA_CPIS       NaN                 
2  INDICATOR           CL_INDICATOR_CPIS  NaN                 
3  REF_SECTOR          CL_SECTOR_CPIS     NaN                 
4  COUNTERPART_SECTOR  CL_SECTOR_CPIS     NaN                 
5  COUNTERPART_AREA    CL_AREA_CPIS       NaN                 


In [12]:
### IMF CPIS: CODES DESCRIPTIONS LOADING

for int_counter, str_param_code in enumerate(df_imf_cpis_params['@codelist']):
    if (int_counter == 2):
        time.sleep(int_seconds_to_sleep)    
        obj_imf_cpis_param = request_session.get(str_imf_base_url + str_imf_codelist_add + str_param_code).json()
        df_imf_cpis_param =  pd.DataFrame(obj_imf_cpis_param['Structure']['CodeLists']['CodeList']['Code'])
        ### Receiving values for each code list:
        df_imf_cpis_param = df_imf_cpis_param.assign(Text = df_imf_cpis_param['Description'].apply(pd.Series)['#text'].values)[['@value', 'Text']]
#        print(int_counter, ':', df_imf_cpis_params.iloc[int_counter, All]['@conceptRef'], ':', str_param_code, ':\n', df_imf_cpis_param.head(20))
        dict_indicator = dict(zip(df_imf_cpis_param[: 10]['@value'], df_imf_cpis_param[: 10]['Text']))
    elif (int_counter == 3):
        time.sleep(int_seconds_to_sleep)    
        obj_imf_cpis_param = request_session.get(str_imf_base_url + str_imf_codelist_add + str_param_code).json()
        df_imf_cpis_param =  pd.DataFrame(obj_imf_cpis_param['Structure']['CodeLists']['CodeList']['Code'])
        ### Receiving values for each code list:
        df_imf_cpis_param = df_imf_cpis_param.assign(Text = df_imf_cpis_param['Description'].apply(pd.Series)['#text'].values)[['@value', 'Text']]
#        print(int_counter, ':', df_imf_cpis_params.iloc[int_counter, All]['@conceptRef'], ':', str_param_code, ':\n', df_imf_cpis_param.head(20))        
        dict_sector = dict(zip(df_imf_cpis_param['@value'], df_imf_cpis_param['Text']))
        list_sector_filtered = ['T', 'CB', 'GG', 'HH', 'NP']

list_ison_countries = sorted(list(map(str, ser_ison_membership.index.get_level_values(1).unique())))
str_cpis_freq = 'A' # 'B' # 
#str_cpis_asset_indicator = 'I_A_T_T_T_BP6_USD' 
#str_cpis_liability_indicator = 'I_L_T_T_T_BP6_USD'
#str_cpis_ref_sector = 'T'
#str_cpis_cp_sector = 'T'
# 0: FREQ == 'B' # Semi-annual frequency - they don't have Quaterly or Monthly frequency data
# 1: REF_AREA == '??' # Country
# 2: INDICATOR  == 'I_A_T_T_T_BP6_USD' # Assets, Total Investment, BPM6, US Dollars & I_L_T_T_T_BP6_USD    Liabilities, Total Investment, BPM6, US Dollars
# 3: REF_SECTOR == 'T' # Total Holdings (all sectors)
# 4: COUNTERPART_SECTOR  == 'T' # Total Holdings (all sectors)
# 5: COUNTERPART_AREA == '??' # Country

In [None]:
### IMF CPIS : REPORTED PORTFOLIO INVESTMENT ASSETS DATASET RETRIEVING

gc.collect()
### List of bilateral dataframes for future concatenation:
list_cpis_bilateral = [] 
### Beggining of request URL:
str_cpis_const_url = str_imf_base_url + str_imf_dataset_add + str_imf_cpis_id + '/' 
### Looping over reporter:
for iter_investor in list_ison_countries:
#for iter_reporter in ['US', 'BD']:  
    ### Looping over indicator:
    for iter_indicator in dict_indicator:        
        if (iter_indicator[2] == 'A'):
            str_reporter_sector = '+'.join(list_sector_filtered)        
            str_partner_sector = '+'.join(list_sector_filtered)
            str_cpis_full_url = str_cpis_const_url + '.'.join([str_cpis_freq, iter_investor, iter_indicator, str_reporter_sector, str_partner_sector])
            obj_cpis_set = request_session.get(str_cpis_full_url)
            ### Data reading as JSON:
            dict_cpis_set = json.loads(obj_cpis_set.text.replace('@OBS_STATUS', '@OBS_VALUE'))
            ### Converting each bilateral dataset to dataframe and it's mungling:
            if ('Series' in dict_cpis_set['CompactData']['DataSet']):
                if isinstance(dict_cpis_set['CompactData']['DataSet']['Series'], list):
                    list_series = dict_cpis_set['CompactData']['DataSet']['Series']
                else:
                    list_series = [dict_cpis_set['CompactData']['DataSet']['Series']]
                for dict_cpis_pair in list_series:
                    if isinstance(dict_cpis_pair['Obs'], list):
                        dict_bilateral = dict_cpis_pair['Obs']
                    else:
                        dict_bilateral = [dict_cpis_pair['Obs']]
                    df_cpis_bilateral = pd.DataFrame(dict_bilateral)
                    df_cpis_bilateral = df_cpis_bilateral[['@TIME_PERIOD', '@OBS_VALUE']]
                    df_cpis_bilateral.columns = ['Date', 'Value']
                    df_cpis_bilateral = df_cpis_bilateral.assign(Indicator = dict_cpis_pair['@INDICATOR'])
                    df_cpis_bilateral = df_cpis_bilateral.assign(Reporter_Sector = dict_cpis_pair['@REF_SECTOR'])
                    df_cpis_bilateral = df_cpis_bilateral.assign(Partner_Sector = dict_cpis_pair['@COUNTERPART_SECTOR'])                    
                    df_cpis_bilateral = df_cpis_bilateral.assign(Reporter_ID = dict_cpis_pair['@REF_AREA'])
                    df_cpis_bilateral = df_cpis_bilateral.assign(Partner_ID = dict_cpis_pair['@COUNTERPART_AREA'])
                    list_cpis_bilateral.append(df_cpis_bilateral)  
            else:
                print('No data in response of the next request:\n', str_cpis_full_url)
            time.sleep(int_seconds_to_sleep)                    
#        break
    print(iter_investor, ': loading completed')
#    break
### Bilateral datasets aggregating:
df_cpis_raw = pd.concat(list_cpis_bilateral, axis = 0, ignore_index = True)
df_cpis_raw['Date'] = pd.to_datetime(df_cpis_raw['Date']) + pd.offsets.BYearEnd()
df_cpis_raw.loc[df_cpis_raw['Value'] == 'C', 'Value'] = np.NaN
df_cpis_raw.loc[df_cpis_raw['Value'] == '-', 'Value'] = np.NaN
df_cpis_raw = df_cpis_raw[df_cpis_raw['Reporter_ID'] != df_cpis_raw['Partner_ID']]
df_cpis_raw = df_cpis_raw[df_cpis_raw['Partner_ID'].isin(df_country_codes['ISO SHORT'].values)]
print('Unique partners number:', len(df_cpis_raw['Partner_ID'].unique()))
df_cpis_raw.rename({'Reporter_ID': 'Reporter', 'Partner_ID': 'Partner'}, axis = 1, inplace = True)
df_cpis_raw = df_cpis_raw.astype({'Indicator': 'str', 'Reporter_Sector': 'str', 'Partner_Sector': 'str', 'Reporter': 'str', 'Partner': 'str', 
                                  'Value': 'float32'})    
### Data saving:
ser_cpis_asset = df_cpis_raw.set_index(['Date', 'Indicator', 'Reporter_Sector', 'Partner_Sector', 'Reporter', 'Partner'])['Value'].sort_index().astype('float32')
del df_cpis_raw
gc.collect()
ser_cpis_asset.to_hdf(path_or_buf = str_path_imf_cpis_detailed_raw, key = str_key_imf_cpis_assets, mode = 'w', format = 'fixed')

In [None]:
### IMF CPIS : REPORTED PORTFOLIO INVESTMENT LIABILITIES DATASET RETRIEVING

gc.collect()
### List of bilateral dataframes for future concatenation:
list_cpis_bilateral = [] 
### Beggining of request URL:
str_cpis_const_url = str_imf_base_url + str_imf_dataset_add + str_imf_cpis_id + '/' 
### Looping over reporter:
for iter_investor in list_ison_countries:
#for iter_investor in ['US', 'BD']:  
    ### Looping over indicator:
    for iter_indicator in dict_indicator:        
        if (iter_indicator[2] == 'L'):
            str_reporter_sector = '+'.join(list_sector_filtered)        
            str_partner_sector = '+'.join(list_sector_filtered)
            str_cpis_full_url = str_cpis_const_url + '.'.join([str_cpis_freq, '', iter_indicator, str_reporter_sector, str_partner_sector, iter_investor])
            obj_cpis_set = request_session.get(str_cpis_full_url)
            ### Data reading as JSON:
            dict_cpis_set = json.loads(obj_cpis_set.text.replace('@OBS_STATUS', '@OBS_VALUE'))
            ### Converting each bilateral dataset to dataframe and it's mungling:
            if ('Series' in dict_cpis_set['CompactData']['DataSet']):
                if isinstance(dict_cpis_set['CompactData']['DataSet']['Series'], list):
                    list_series = dict_cpis_set['CompactData']['DataSet']['Series']
                else:
                    list_series = [dict_cpis_set['CompactData']['DataSet']['Series']]
                for dict_cpis_pair in list_series:
                    if isinstance(dict_cpis_pair['Obs'], list):
                        dict_bilateral = dict_cpis_pair['Obs']
                    else:
                        dict_bilateral = [dict_cpis_pair['Obs']]
                    df_cpis_bilateral = pd.DataFrame(dict_bilateral)
                    df_cpis_bilateral = df_cpis_bilateral[['@TIME_PERIOD', '@OBS_VALUE']]
                    df_cpis_bilateral.columns = ['Date', 'Value']
                    df_cpis_bilateral = df_cpis_bilateral.assign(Indicator = dict_cpis_pair['@INDICATOR'])
                    df_cpis_bilateral = df_cpis_bilateral.assign(Reporter_S = dict_cpis_pair['@REF_SECTOR'])
                    df_cpis_bilateral = df_cpis_bilateral.assign(Partner_S = dict_cpis_pair['@COUNTERPART_SECTOR'])                    
                    df_cpis_bilateral = df_cpis_bilateral.assign(Reporter_ID = dict_cpis_pair['@REF_AREA'])
                    df_cpis_bilateral = df_cpis_bilateral.assign(Partner_ID = dict_cpis_pair['@COUNTERPART_AREA'])
                    list_cpis_bilateral.append(df_cpis_bilateral)  
            else:
                print('No data in response of the next request:\n', str_cpis_full_url)
            time.sleep(int_seconds_to_sleep)                    
#        break
    print(iter_investor, ': loading completed')
#    break
### Bilateral datasets aggregating:
df_cpis_raw = pd.concat(list_cpis_bilateral, axis = 0, ignore_index = True)
df_cpis_raw['Date'] = pd.to_datetime(df_cpis_raw['Date']) + pd.offsets.BYearEnd()
df_cpis_raw.loc[df_cpis_raw['Value'] == 'C', 'Value'] = np.NaN
df_cpis_raw.loc[df_cpis_raw['Value'] == '-', 'Value'] = np.NaN
df_cpis_raw = df_cpis_raw[df_cpis_raw['Reporter_ID'] != df_cpis_raw['Partner_ID']]
df_cpis_raw = df_cpis_raw[df_cpis_raw['Reporter_ID'].isin(df_country_codes['ISO SHORT'].values)]
print('Unique reporters number:', len(df_cpis_raw['Reporter_ID'].unique()))
df_cpis_raw.rename({'Reporter_ID': 'Partner', 'Partner_ID': 'Reporter', 'Reporter_S': 'Partner_Sector', 'Partner_S': 'Reporter_Sector'}, axis = 1, inplace = True)
df_cpis_raw = df_cpis_raw.astype({'Indicator': 'str', 'Reporter_Sector': 'str', 'Partner_Sector': 'str', 'Reporter': 'str', 'Partner': 'str', 
                                  'Value': 'float32'})    
### Data saving:
ser_cpis_liability_inv = df_cpis_raw.set_index(['Date', 'Indicator', 'Reporter_Sector', 'Partner_Sector', 'Reporter', 'Partner'])['Value'].sort_index()\
                                    .astype('float32')
del df_cpis_raw
gc.collect()
ser_cpis_liability_inv.to_hdf(path_or_buf = str_path_imf_cpis_detailed_raw, key = str_key_imf_cpis_liabilities, mode = 'a', format = 'fixed')

In [96]:
### IMF CPIS: RAW ASSET DATA LOADING

gc.collect()
ser_cpis_asset = pd.read_hdf(path_or_buf = str_path_imf_cpis_detailed_raw, key = str_key_imf_cpis_assets)

55540

In [97]:
### IMF CPIS: ASSSET FILTERING

list_valid_indicators = ['I_A_T_T_T_BP6_USD', 'I_A_D_S_T_BP6_USD']
list_valid_reporter_sectors = ['T', 'CB', 'GG']
list_valid_partner_sectors = ['T']
list_valid_partners = df_country_codes['ISO SHORT'].values

ser_asset_filtered = ser_cpis_asset.loc[:, list_valid_indicators, list_valid_reporter_sectors, list_valid_partner_sectors, :, list_valid_partners]\
                     .droplevel('Partner_Sector').reorder_levels([0, 1, 3, 4, 2]).sort_index().astype('float32')
del ser_cpis_asset
gc.collect()

23

In [104]:
### IMF CPIS: ASSET REPORTER SECTOR CLEARING

gc.collect()

def reindex_reporter_sectors(ser_group):
    ser_group = ser_group.droplevel(['Date', 'Indicator', 'Reporter', 'Partner'])
    if (len(ser_group.index) < 3):
        ser_group = ser_group.reindex(list_valid_reporter_sectors).fillna(0.0)
    ser_sectors_cleared = ser_group['T'] - (ser_group['CB'] + ser_group['GG'])
    return ser_sectors_cleared

def get_commerce(ser_raw):
    return ser_raw.groupby(['Date', 'Indicator', 'Reporter', 'Partner']).apply(reindex_reporter_sectors)

#list_test_date = ['1997-12-31', '2021-12-31'] # ['2021-12-31'] # 
#list_test_indicator = ['I_A_D_S_T_BP6_USD']
#list_test_reporter = ['PT', 'AT']
#list_test_partner = ['US', 'IT'] # ['IT'] # 

#ser_test_raw = ser_asset_filtered.loc[:, :, list_test_reporter, :, :]
#display(ser_test_raw.loc[list_test_date, list_test_indicator, list_test_reporter, list_test_partner, :])
#ser_test_res = ser_test_raw.groupby(['Date', 'Indicator', 'Reporter', 'Partner']).apply(reindex_reporter_sectors)
#ser_test_res = get_commerce(ser_test_raw)
#display(ser_test_res.loc[list_test_date, list_test_indicator, list_test_reporter, list_test_partner])

#%timeit get_commerce(ser_test_raw)

#%lprun -f reindex_reporter_sectors get_commerce(ser_test_raw)

ser_asset_commerce = ser_asset_filtered.groupby(['Date', 'Indicator', 'Reporter', 'Partner']).apply(reindex_reporter_sectors)
ser_asset_commerce = ser_asset_commerce.reorder_levels([0, 2, 3, 1]).sort_index()

In [107]:
### IMF CPIS: ASSET CLEARING TEST

list_test_date = ['1997-12-31', '2021-12-31'] # ['2021-12-31'] # 
list_test_indicator = ['I_A_D_S_T_BP6_USD']
list_test_reporter = ['PT']
list_test_partner = ['US', 'IT'] # ['IT'] # 

display(ser_asset_filtered.loc[list_test_date, list_test_reporter, list_test_partner, list_test_indicator])
display(ser_asset_commerce.loc[list_test_date, list_test_reporter, list_test_partner, list_test_indicator])

Date        Indicator          Reporter  Partner  Reporter_Sector
1997-12-31  I_A_D_S_T_BP6_USD  PT        IT       T                  0.000000   
                                         US       T                  91.923386  
2021-12-31  I_A_D_S_T_BP6_USD  PT        IT       CB                 204.796738 
                                                  GG                 0.000000   
                                                  T                  3494.207031
                                         US       CB                 0.000000   
                                                  GG                 0.000000   
                                                  T                  3.329844   
Name: Value, dtype: float32

Date        Indicator          Reporter  Partner
1997-12-31  I_A_D_S_T_BP6_USD  PT        IT         0.000000   
                                         US         91.923386  
2021-12-31  I_A_D_S_T_BP6_USD  PT        IT         3289.410400
                                         US         3.329844   
Name: Value, dtype: float64

In [117]:
### IMF CPIS: SHORT-TERM DEBTS EXCLUDING

gc.collect()

def reindex_indicators(ser_group):
    ser_group = ser_group.droplevel(['Date', 'Reporter', 'Partner'])
    if (len(ser_group.index) < 3):
        ser_group = ser_group.reindex(list_valid_indicators).fillna(0.0)
    ser_indicators_cleared = ser_group['I_A_T_T_T_BP6_USD'] - ser_group['I_A_D_S_T_BP6_USD']
    return ser_indicators_cleared

#list_test_date = ['1997-12-31', '2021-12-31'] # ['2021-12-31'] # 
#list_test_reporter = ['PT']
#list_test_partner = ['US', 'IT'] # ['IT'] # 

#ser_test_raw = ser_asset_commerce.loc[:, list_test_reporter, :, :]
#display(ser_test_raw.loc[list_test_date, list_test_reporter, list_test_partner, :])
#ser_test_res = ser_test_raw.groupby(['Date', 'Reporter', 'Partner']).apply(reindex_indicators)
#display(ser_test_res.loc[list_test_date, list_test_reporter, list_test_partner])

ser_asset_minus_short_term = ser_asset_commerce.groupby(['Date', 'Reporter', 'Partner']).apply(reindex_indicators)

In [119]:
### IMF CPIS: ASSET CLEARING TEST

list_test_date = ['1997-12-31', '2021-12-31'] # ['2021-12-31'] # 
list_test_reporter = ['PT']
list_test_partner = ['US', 'IT'] # ['IT'] # 

display(ser_asset_commerce.loc[list_test_date, list_test_reporter, list_test_partner])
display(ser_asset_minus_short_term.loc[list_test_date, list_test_reporter, list_test_partner])

Date        Reporter  Partner  Indicator        
1997-12-31  PT        IT       I_A_D_S_T_BP6_USD    0.000000    
                               I_A_T_T_T_BP6_USD    507.593262  
                      US       I_A_D_S_T_BP6_USD    91.923386   
                               I_A_T_T_T_BP6_USD    3332.449951 
2021-12-31  PT        IT       I_A_D_S_T_BP6_USD    3289.410400 
                               I_A_T_T_T_BP6_USD    18311.003906
                      US       I_A_D_S_T_BP6_USD    3.329844    
                               I_A_T_T_T_BP6_USD    13202.219727
Name: Value, dtype: float64

Date        Reporter  Partner
1997-12-31  PT        IT         507.593262  
                      US         3240.526566 
2021-12-31  PT        IT         15021.593506
                      US         13198.889883
Name: Value, dtype: float64

In [122]:
### IMF CPIS: FILTERED ASSET DATASET SAVING

ser_asset_minus_short_term.replace({0.0: np.NaN}).to_hdf(path_or_buf = str_path_imf_cpis_filtered, key = str_key_imf_cpis_filtered, mode = 'w', format = 'fixed')

In [123]:
### TEMP

pd.read_hdf(path_or_buf = str_path_imf_cpis_filtered)

Date        Reporter  Partner
1997-12-31  AR        AD        NaN        
                      AE        NaN        
                      AF        NaN        
                      AG        NaN        
                      AI        NaN        
                                 ..        
2021-12-31  ZA        XK        NaN        
                      YE        NaN        
                      YT        NaN        
                      ZM         24.268476 
                      ZW         345.479980
Name: Value, Length: 215508, dtype: float64

In [10]:
### IMF CPIS: RAW DATA AGGREGATION (TOTAL / TOTAL / TOTAL)

gc.collect()
ser_cpis_asset_total = ser_cpis_asset.loc[:, 'I_A_T_T_T_BP6_USD', 'T', 'T', :, :]
ser_cpis_liability_inv_total = ser_cpis_liability_inv.loc[:, 'I_L_T_T_T_BP6_USD', 'T', 'T', :, :]
df_asset_aug_total = pd.concat([ser_cpis_asset_total, ser_cpis_liability_inv_total], axis = 1, names = 'Source Position', keys = ['Asset', 'Liability'])
df_asset_aug_total = df_asset_aug_total.join(ser_ison_status, on = 'Reporter').set_index('Market', append = True)
df_asset_aug_total['Asset_Augmented'] = df_asset_aug_total['Asset'].combine_first(df_asset_aug_total['Liability'])

In [11]:
### IMF CPIS: AGGREGATED DATA SAVING

df_asset_aug_total.replace({0.0: np.NaN}).to_hdf(path_or_buf = str_path_imf_cpis_combined, key = str_full_imf_cpis_combined, mode = 'w', format = 'fixed')
del df_asset_aug_total
gc.collect()

78

In [12]:
### TEMP

df_asset_aug_total = pd.read_hdf(path_or_buf = str_path_imf_cpis_combined, key = str_full_imf_cpis_combined)
df_asset_aug_total[df_asset_aug_total['Asset'] < 0.0]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Asset,Liability,Asset_Augmented
Date,Reporter,Partner,Market,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2001-12-31,CY,AR,FM,-1.918966,,-1.918966
2001-12-31,CY,BR,FM,-48.776989,,-48.776989
2001-12-31,CY,VE,FM,-26.326931,,-26.326931
2001-12-31,GB,KW,DM,-14.504000,,-14.504000
2002-12-31,CY,PE,FM,-4.910376,,-4.910376
...,...,...,...,...,...,...
2020-12-31,GB,FO,DM,-39.186401,,-39.186401
2021-12-31,DK,NE,DM,-0.152411,,-0.152411
2021-12-31,GB,LC,DM,-0.671700,,-0.671700
2021-12-31,GB,LV,DM,-7.926060,37.375801,-7.926060


In [8]:
### IMF CPIS : RAW DATA CONVERTATION

df_cpis_raw = pd.read_hdf(path_or_buf = str_path_imf_cpis_detailed_raw, key = str_key_imf_cpis_raw)
df_cpis_full = df_cpis_raw[df_cpis_raw['Partner_ID'].isin(df_country_codes['ISO SHORT'].to_list() + ['W00'])]
dict_options_bilateral = {}
dict_options_world = {}

In [15]:
### IMF CPIS: TEST : LOADING DATASET COMPARISION

df_cpis_ison = pd.read_hdf(path_or_buf = str_path_imf_cpis_dataset, key = str_full_imf_cpis_dataset)
print('Old (ISON only Total / Total) volumes:\n', df_cpis_ison.groupby(['Position'])['Value'].sum())
df_cpis_test = df_cpis_full[df_cpis_full['Indicator'].isin(['I_A_T_T_T_BP6_USD', 'I_L_T_T_T_BP6_USD']) & 
                            df_cpis_full['Partner_ID'].isin(ser_ison_status.index.to_list()) &
                            (df_cpis_full['Reporter_Sector'] == 'T') &
                            (df_cpis_full['Partner_Sector'] == 'T')]
print('New filtered (ISON only Total / Total) volumes:\n', df_cpis_test.groupby(['Indicator'])['Value'].sum())
df_cpis_test = df_cpis_full[df_cpis_full['Indicator'].isin(['I_A_T_T_T_BP6_USD', 'I_L_T_T_T_BP6_USD']) & 
                            df_cpis_full['Partner_ID'].isin(ser_ison_status.index.to_list()) & 
                            (df_cpis_full['Partner_ID'] != 'W00')]
print('New Total / Total volumes:\n', df_cpis_test.groupby(['Indicator'])['Value'].sum())

Old (ISON only Total / Total) volumes:
 Position
Asset        574910272.0
Liability    70967144.0 
Name: Value, dtype: float32
New filtered (ISON only Total / Total) volumes:
 Indicator
I_A_T_T_T_BP6_USD    574910272.0
I_L_T_T_T_BP6_USD    70967144.0 
Name: Value, dtype: float32
New Total / Total volumes:
 Indicator
I_A_T_T_T_BP6_USD    637180544.0
I_L_T_T_T_BP6_USD    70967144.0 
Name: Value, dtype: float32


In [19]:
### IMF CPIS: TEST : SYMMETRY CHECK

df_cpis_test = df_cpis_full[df_cpis_full['Indicator'].isin(['I_A_T_T_T_BP6_USD', 'I_L_T_T_T_BP6_USD']) & 
                            df_cpis_full['Partner_ID'].isin(df_country_codes['ISO SHORT'].to_list()) &
                            (df_cpis_full['Reporter_Sector'] == 'T') &
                            (df_cpis_full['Partner_Sector'] == 'T')]
ser_cpis_asset = df_cpis_test[df_cpis_test['Indicator'] == 'I_A_T_T_T_BP6_USD'].set_index(['Date', 'Reporter_ID', 'Partner_ID'])['Value']
ser_cpis_asset.index.names = ['Date', 'Investor', 'Borrower']
ser_cpis_asset.name = 'Asset'
df_cpis_liability_inv = df_cpis_test[df_cpis_test['Indicator'] == 'I_L_T_T_T_BP6_USD'].drop(['Indicator', 'Reporter_Sector', 'Partner_Sector'], axis = 1)
df_cpis_liability_inv.columns = ['Date', 'Value', 'Partner_ID', 'Reporter_ID']
ser_cpis_liability_inv = df_cpis_liability_inv.set_index(['Date', 'Reporter_ID', 'Partner_ID'])['Value']
ser_cpis_liability_inv.name = 'Liability_Inverted'
ser_cpis_liability_inv.index.names = ['Date', 'Investor', 'Borrower']
df_cpis_compare = pd.concat([ser_cpis_asset, ser_cpis_liability_inv], axis = 1)
#df_cpis_compare = df_cpis_compare.replace({0.0 : np.NaN}).dropna()
#ser_pair_median = df_cpis_compare.groupby(['Investor', 'Borrower']).apply(lambda df_pair: (df_pair['Liability_Inverted'] / df_pair['Asset']).median())
#ser_investor_median = ser_pair_median.groupby('Investor').mean()
#print('Smallest Liability to Asset coefficient:\n', ser_investor_median.nsmallest(5))
##print(df_cpis_compare.loc[(All, 'NO', All), :].groupby(['Investor', 'Borrower']).apply(lambda df_pair: (df_pair['Liability_Inverted'] / df_pair['Asset']).median()))
#print('Largest Liability to Asset values:\n', ser_investor_median.nlargest(5))
##print(df_cpis_compare.loc[(All, 'AR', All), :].groupby(['Investor', 'Borrower']).apply(lambda df_pair: (df_pair['Liability_Inverted'] / df_pair['Asset']).median()))
#print('Investors in [0.5, 1.5] Liability to Asset interval:\n', ser_investor_median[(ser_investor_median > 0.5) & (ser_investor_median < 1.5)])

In [62]:
### TEMP

#display(df_cpis_compare.loc[(All, ['UA', 'US'], ['UA', 'US']), :].sort_index(level = ['Investor', 'Date']))
df_cpis_compare.dropna().loc[(df_cpis_compare['Asset'] != 0) & (df_cpis_compare['Liability_Inverted'] != 0)]#.loc[(All, All, 'FR'), :].dropna()
#display(df_cpis_compare.loc[(All, ['ZA', 'JP'], ['ZA', 'JP']), :].sort_index(level = ['Investor', 'Date']))
#display(df_cpis_test[(df_cpis_test['Indicator'] == 'I_A_T_T_T_BP6_USD') & (df_cpis_test['Reporter_ID'] == 'RO') & (df_cpis_test['Partner_ID'] == 'JP')])
#display(df_cpis_test[(df_cpis_test['Indicator'] == 'I_L_T_T_T_BP6_USD') & (df_cpis_test['Reporter_ID'] == 'JP') & (df_cpis_test['Partner_ID'] == 'RO')])
#print(len(df_cpis_full))
#print(df_cpis_full.loc[df_cpis_full['Indicator'] == 'I_A_T_T_T_BP6_USD', 'Reporter_Sector'].value_counts())
#print(df_cpis_full.loc[df_cpis_full['Indicator'] == 'I_A_T_T_T_BP6_USD', 'Partner_Sector'].value_counts())
df_cpis_full.groupby(['Indicator', 'Reporter_Sector', 'Partner_Sector'])['Value'].count().swaplevel()

Indicator          Reporter_Sector  Partner_Sector
I_A_D_L_T_BP6_USD  CB               GG                2355  
                                    T                 87591 
                   GG               GG                2383  
                                    T                 103426
                   HH               GG                2588  
                                    T                 88597 
                   NP               GG                2423  
                                    T                 79612 
                   T                CB                28186 
                                    GG                32925 
                                    T                 179421
I_A_D_S_T_BP6_USD  CB               GG                2129  
                                    T                 85647 
                   GG               GG                2120  
                                    T                 99353 
                   HH             

In [101]:
### TEMP

str_investor = 'GB'
str_lender = 'JP'
str_date = '2020-12-31'

df_cpis_asset = df_cpis_full.loc[
                                 (df_cpis_full['Indicator'] == 'I_A_T_T_T_BP6_USD') & 
                                 (df_cpis_full['Date'] == str_date) &
                                 (df_cpis_full['Reporter_Sector'] == 'T') &    
                                 (df_cpis_full['Reporter_ID'] == str_investor) & 
                                 (df_cpis_full['Partner_ID'] == str_lender)
                                ]
display(df_cpis_asset.set_index(['Date', 'Indicator', 'Reporter_ID', 'Partner_ID', 'Reporter_Sector', 'Partner_Sector']))
df_cpis_liability = df_cpis_full.loc[
                                     (df_cpis_full['Indicator'] == 'I_L_T_T_T_BP6_USD') & 
                                     (df_cpis_full['Date'] == str_date) &  
                                     (df_cpis_full['Reporter_ID'] == str_lender) & 
                                     (df_cpis_full['Partner_ID'] == str_investor)
                                    ]
display(df_cpis_liability.set_index(['Date', 'Indicator', 'Reporter_ID', 'Partner_ID', 'Reporter_Sector', 'Partner_Sector']))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Value
Date,Indicator,Reporter_ID,Partner_ID,Reporter_Sector,Partner_Sector,Unnamed: 6_level_1
2020-12-31,I_A_T_T_T_BP6_USD,GB,JP,T,T,167008.953125


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Value
Date,Indicator,Reporter_ID,Partner_ID,Reporter_Sector,Partner_Sector,Unnamed: 6_level_1
2020-12-31,I_L_T_T_T_BP6_USD,JP,GB,T,T,407999.9375


In [100]:
### TEMP

df_cpis_liability = df_cpis_full.loc[
                                     (df_cpis_full['Indicator'] == 'I_L_T_T_T_BP6_USD') & 
                                     (df_cpis_full['Value'] > 0) &
                                     (df_cpis_full['Partner_ID'] != 'W00') &
                                     (df_cpis_full['Partner_ID'] != 'US')
                                    ]
df_cpis_liability.sort_values('Value')

Unnamed: 0,Date,Value,Indicator,Reporter_Sector,Partner_Sector,Reporter_ID,Partner_ID
2564736,2019-12-31,6.841784e-08,I_L_T_T_T_BP6_USD,T,T,MY,AR
2564737,2020-12-31,7.226514e-08,I_L_T_T_T_BP6_USD,T,T,MY,AR
2564735,2018-12-31,8.457170e-08,I_L_T_T_T_BP6_USD,T,T,MY,AR
2564738,2021-12-31,1.005747e-07,I_L_T_T_T_BP6_USD,T,T,MY,AR
2564734,2017-12-29,1.871000e-07,I_L_T_T_T_BP6_USD,T,T,MY,AR
...,...,...,...,...,...,...,...
2125062,2020-12-31,4.079999e+05,I_L_T_T_T_BP6_USD,T,T,JP,GB
2123173,2021-12-31,4.333805e+05,I_L_T_T_T_BP6_USD,T,T,JP,LU
2125047,2005-12-30,4.435934e+05,I_L_T_T_T_BP6_USD,T,T,JP,GB
2125048,2006-12-29,5.247969e+05,I_L_T_T_T_BP6_USD,T,T,JP,GB


In [16]:
### IMF CPIS : TESTS : NEGATIVE VALUES RESEARCH

### Dataset length:
print(len(df_cpis_full))
#### Negative values:
#display(df_cpis_full.loc[df_cpis_full['Value'] < -1.0, 'Indicator'].value_counts())
#df_cpis_full.loc[df_cpis_full['Value'] < -1.0].sort_values('Value')
### NaN values:
#df_cpis_full.loc[All, 'Value'] = df_cpis_full['Value'].fillna(0.0)
#df_cpis_full[df_cpis_full['Value'].isna()]
display(df_cpis_full.loc[(df_cpis_full['Value'] < 0.0) & 
                         (df_cpis_full['Indicator'] == 'I_A_T_T_T_BP6_USD') &
                         (df_cpis_full['Reporter_Sector'] == 'T') &
                         (df_cpis_full['Partner_Sector'] == 'T')])

3458960


Unnamed: 0,Date,Value,Indicator,Reporter_Sector,Partner_Sector,Reporter_ID,Partner_ID
183490,2007-12-31,-20.276800,I_A_T_T_T_BP6_USD,T,T,AU,AE
307093,2003-12-31,-1.754231,I_A_T_T_T_BP6_USD,T,T,BE,EE
307212,2004-12-31,-0.019546,I_A_T_T_T_BP6_USD,T,T,BE,DZ
307213,2005-12-30,-0.019680,I_A_T_T_T_BP6_USD,T,T,BE,DZ
345710,2018-12-31,-0.162847,I_A_T_T_T_BP6_USD,T,T,BG,BS
...,...,...,...,...,...,...,...
2583934,2014-12-31,-138.407394,I_A_T_T_T_BP6_USD,T,T,NL,NG
2597320,2008-12-31,-1198.253662,I_A_T_T_T_BP6_USD,T,T,NL,MU
2692115,2005-12-30,-3.545052,I_A_T_T_T_BP6_USD,T,T,NO,BG
3083092,2003-12-31,-1.040867,I_A_T_T_T_BP6_USD,T,T,SE,LR


In [17]:
### IMF CPIS : TESTS : NEGATIVE VALUES RESEARCH

df_test = df_cpis_full[
                       (df_cpis_full['Date'] == '2009-12-31') & 
                       (df_cpis_full['Reporter_ID'] == 'GB') & 
                       (df_cpis_full['Partner_ID'] == 'US') & 
                       (df_cpis_full['Reporter_Sector'] == 'HH') &
                       (df_cpis_full['Partner_Sector'] == 'T')
                      ]
df_test

Unnamed: 0,Date,Value,Indicator,Reporter_Sector,Partner_Sector,Reporter_ID,Partner_ID
1550766,2009-12-31,-3462.099854,I_A_T_T_T_BP6_USD,HH,T,GB,US
1563237,2009-12-31,20556.035156,I_A_E_T_T_BP6_USD,HH,T,GB,US
1573216,2009-12-31,-24018.134766,I_A_D_T_T_BP6_USD,HH,T,GB,US
1596567,2009-12-31,-26377.814453,I_A_D_L_T_BP6_USD,HH,T,GB,US
1611233,2009-12-31,2359.68042,I_A_D_S_T_BP6_USD,HH,T,GB,US


In [None]:
### IMF CPIS : TOTAL INVESTMENT FILTERING 



dict_ind_bilateral = {}
dict_ind_world = {}
dict_ind_bilateral['Total Holdings'] = df_cpis_full[
                                                    (df_cpis_full['Indicator'] == 'I_A_T_T_T_BP6_USD') & 
                                                    (df_cpis_full['Reporter_Sector'] == 'T') & 
                                                    (df_cpis_full['Partner_Sector'] == 'T') &   
                                                    (df_cpis_full['Partner_ID'] != 'W00')  
                                                   ]
dict_ind_world['Total Holdings'] = df_cpis_full[
                                                (df_cpis_full['Indicator'] == 'I_A_T_T_T_BP6_USD') & 
                                                (df_cpis_full['Reporter_Sector'] == 'T') & 
                                                (df_cpis_full['Partner_Sector'] == 'T') &   
                                                (df_cpis_full['Partner_ID'] == 'W00')  
                                               ]

In [78]:
### TEMP

gc.collect()

list_tup_sectors = [(sector_i, sector_j) for sector_i in list_sector_filtered for sector_j in list_sector_filtered[: 3]]
#def reindex_sectors(df_group):
#    df_group = df_group.drop(['Date', 'Indicator', 'Reporter_ID', 'Partner_ID'], axis = 1)
#    ser_reindexed = df_group.set_index(['Reporter_Sector', 'Partner_Sector']).reindex(list_tup_sectors).fillna(0.0)
#    return ser_reindexed

#def reindex_sectors(ser_group):
#    ser_group = ser_group.droplevel(['Date', 'Indicator', 'Reporter_ID', 'Partner_ID'])
#    ser_reindexed = ser_group.reindex(list_tup_sectors).fillna(0.0)
#    return ser_reindexed

def reindex_partner_sectors(ser_group):
    ser_group = ser_group.droplevel(['Date', 'Indicator', 'Reporter_ID', 'Partner_ID', 'Reporter_Sector'])
    ser_reindexed = ser_group.reindex(list_sector_filtered[: 3]).fillna(0.0)
    return ser_reindexed

def get_partner_total(ser_group):
    ser_group = ser_group.droplevel(['Date', 'Indicator', 'Reporter_ID', 'Partner_ID', 'Reporter_Sector'])
    ser_reindexed = ser_group['T']
    return ser_reindexed

#def get_partner_commerce(ser_group):
#    ser_group = ser_group.droplevel(['Date', 'Indicator', 'Reporter_ID', 'Partner_ID', 'Reporter_Sector'])
#    ser_reindexed = ser_group['T'] - ser_group.loc[['CB', 'GG']].sum()
#    return ser_reindexed

def get_partner_commerce(ser_group):
    ser_group = ser_group.droplevel(['Date', 'Indicator', 'Reporter_ID', 'Partner_ID', 'Reporter_Sector'])
    ser_reindexed = ser_group.reindex(list_sector_filtered[: 3]).fillna(0.0)
    ser_partner_commerce = ser_reindexed['T'] - ser_reindexed.loc[['CB', 'GG']].sum()
    return ser_partner_commerce


#df_test = df_cpis_full[(df_cpis_full['Indicator'] == 'I_A_T_T_T_BP6_USD') &
#                       df_cpis_full['Reporter_ID'].isin(['US']) & 
#                       df_cpis_full['Partner_ID'].isin(['MX', 'CA']) 
#                      ]
df_test = df_cpis_full[df_cpis_full['Reporter_ID'].isin(['US']) & 
                       df_cpis_full['Partner_ID'].isin(['CA', 'W00'])   
                      ]
ser_test = df_test.set_index(['Date', 'Indicator', 'Reporter_ID', 'Partner_ID', 'Reporter_Sector', 'Partner_Sector']).sort_index().squeeze()
#ser_test.groupby(['Date', 'Indicator', 'Reporter_ID', 'Partner_ID', 'Reporter_Sector']).apply(get_partner_total).fillna(0.0)
#ser_test.groupby(['Date', 'Indicator', 'Reporter_ID', 'Partner_ID', 'Reporter_Sector']).apply(get_partner_commerce).fillna(0.0)
#ser_cpis_full = df_cpis_full.set_index(['Date', 'Indicator', 'Reporter_ID', 'Partner_ID', 'Reporter_Sector', 'Partner_Sector']).sort_index().squeeze()
#ser_cpis_reindexed = ser_cpis_full.groupby(['Date', 'Indicator', 'Reporter_ID', 'Partner_ID', 'Reporter_Sector']).apply(reindex_partner_sectors)

In [81]:
### TEMP

ser_test.groupby(['Date', 'Indicator', 'Reporter_ID', 'Partner_ID', 'Reporter_Sector']).apply(get_partner_commerce).loc['2021-12-31', All, 'US', 'W00', All]

Date        Indicator          Reporter_ID  Partner_ID  Reporter_Sector
2021-12-31  I_A_D_L_T_BP6_USD  US           W00         CB                 0.0       
                                                        GG                 0.0       
                                                        HH                 0.0       
                                                        NP                 0.0       
                                                        T                  2890103.0 
            I_A_D_S_T_BP6_USD  US           W00         CB                 0.0       
                                                        GG                 0.0       
                                                        HH                 0.0       
                                                        NP                 0.0       
                                                        T                  468030.0  
            I_A_D_T_T_BP6_USD  US           W00         CB          

In [80]:
### TEMP

ser_test.loc['2021-12-31', All, 'US', 'W00', All, All]
df_cpis_full[df_cpis_full['Partner_Sector'] == 'CB']

Date        Indicator          Reporter_ID  Partner_ID  Reporter_Sector  Partner_Sector
2021-12-31  I_A_D_L_T_BP6_USD  US           W00         CB               T                 0.0       
                                                        GG               T                 0.0       
                                                        HH               T                 0.0       
                                                        NP               T                 0.0       
                                                        T                CB                0.0       
                                                                         GG                798052.0  
                                                                         T                 3688155.0 
            I_A_D_S_T_BP6_USD  US           W00         CB               T                 0.0       
                                                        GG               T                 0.0  