In [1]:
### GFD: 3-MONTH TREASURY BILL RATE FOR LOCAL INTEREST RATE

In [1]:
### INITIALIZATION

import requests
import pandas as pd
import datetime
import os
import getpass

In [3]:
### DEFINING GFD LOGIN FUNCTION

def gfd_auth(username = None, password = None):
    """
    Pulls a GFD API token and stores it as an environmental variable.
    Parameters
        username: GFD-approved email address.
        password: Password for GFD-approved email address.
    """
    if username is None:
        username = getpass.getpass('Please enter your GFD Finaeon username: ')

    if password is None:
        password = getpass.getpass('Please enter your GFD Finaeon password: ')

    url = 'https://api.globalfinancialdata.com/login/'
    parameters = {'username': username, 'password': password}
    resp = requests.post(url, data = parameters)
    #check for unsuccessful API returns
    if resp.status_code != 200:
        raise ValueError('GFD API request failed with HTTP status code %s' % resp.status_code)

    json_content = resp.json()
    os.environ['GFD_API_TOKEN'] = json_content['token'].strip('"')
    print("GFD API token recieved at %s" % str(datetime.datetime.now()))

In [None]:
### RUNNING AUTHORIZATION FUNCTION

gfd_auth('kaminski.ihar@tut.by', '1990757229')

In [1]:
### UN COMTRADE

In [8]:
### INITIALIZATION

import pandas as pd
import requests
import time

In [9]:
### COUNTRY ISO CODES EXTRACTOR

def get_country_codes(use_local_copy = False):  
    import pandas as pd
    
    if (use_local_copy):
        url_country_code = 'Data_Files/Source_Files/countrycode.html'
    else:
        url_country_code = 'https://countrycode.org/'
    df_full_codes = pd.read_html(url_country_code, index_col = 'COUNTRY')[0]
    df_full_codes[['ISO SHORT', 'ISO LONG']] = df_full_codes['ISO CODES'].str.split(' / ', expand = True)
    df_result = df_full_codes[['ISO SHORT', 'ISO LONG']]      
    df_result.index = df_result.index.str.upper()

    return df_result

In [10]:
### DEFINING EXTRACTION UNIVERSE DATA FROM GENERAL MS EXCEL SOURCE

def get_market_membership_from_excel():
    ### Importing standard modules and date-special modules:
    import numpy as np
    import pandas as pd
    ### Declaring local constants & variables: 
    path_msci = 'Data_Files/Source_Files/sample_universe.xlsx' ### Path for membership source    
    tab_monthly = 'universe_joined'    
    arr_markets_needed = ['DM', 'FM', 'EM']   
    dict_markets = {50 : 'DM', 57 : 'EM', 504 : 'FM'}
    no_slice = slice(None)
    ### Extracting universe data:
    df_universe = pd.read_excel(io = path_msci, sheet_name = tab_monthly, skiprows = [0, 2], header = 0, parse_dates = True, 
                                na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                             '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null'], keep_default_na = False)
    df_universe = df_universe.loc[no_slice, ['dates', 'region', 'ctry']]
    df_universe.columns = ['Date', 'Market', 'Country']
    df_universe.set_index(['Date', 'Country'], inplace = True)
    ser_universe = df_universe.squeeze()
    ser_universe.sort_index(level = [0, 1], inplace = True)
    ser_universe.replace(dict_markets, inplace = True)
    ser_market_membership = ser_universe[ser_universe.isin(arr_markets_needed)]
    ### Results output:
    return ser_market_membership

In [11]:
### UN COMTRADE COUNTRIES DATA EXTRACTION AND MODIFICATION

def get_un_comtrade_country_id(df_country_codes):
    ### Getting UN Comtrade country info from post request:
    str_UNC_countries_set = 'http://comtrade.un.org/data/cache/partnerAreas.json'
    obj_UNC_countries_set = requests.post(str_UNC_countries_set)
    ### Object to dataframe transformation:
    list_UNC_countries = obj_UNC_countries_set.json()['results']
    df_UNC_countries = pd.DataFrame(list_UNC_countries)
    df_UNC_countries.columns = ['UNC ID', 'COUNTRY']
    df_UNC_countries['COUNTRY'] = df_UNC_countries['COUNTRY'].str.upper()
    df_UNC_countries.replace(dict_map_to_replace, inplace = True)
    df_UNC_countries.set_index('COUNTRY', append = False, drop = True, inplace = True)
    df_UNC_country_id = df_UNC_countries.join(df_country_codes, on = 'COUNTRY', how = 'left').dropna(how = 'any').reset_index(drop = True)
    df_UNC_country_id.drop('ISO LONG', axis = 1, inplace = True)
    df_UNC_country_id.columns = ['Comtrade_ID', 'Country']
    ser_UNC_country_id = df_UNC_country_id.set_index('Country').squeeze().sort_index()
    ### Results output:
    return ser_UNC_country_id

In [12]:
### UN COMTRADE DATA REQUEST EXECUTION

def get_un_comtrade_data(str_rep_country_id, str_par_country_id, int_max_rec = 50000, str_type = 'C', str_freq = 'M', str_classification_system = 'HS', 
                         str_period = 'all', str_trade_flow = 'All', str_classification_code = 'TOTAL'):
    ### Trade flows codification:
    dict_trade_flow = {'All': 'all', 'Import': '1', 'Export': '2', 're-Export': '3', 're-Import': '4'}
    ### URL prefix:
    str_url_base = 'http://comtrade.un.org/api/get?'
    ### Request URL preparation:
    str_url_request = str_url_base
    list_parameters = []
    list_parameters.append('max=' + str(int_max_rec))
    list_parameters.append('type=' + str_type)
    list_parameters.append('freq=' + str_freq)
    list_parameters.append('px=' + str_classification_system)
    list_parameters.append('ps=' + str_period)
    list_parameters.append('r=' + str_rep_country_id)
    list_parameters.append('p=' + str_par_country_id)
    list_parameters.append('rg=' + dict_trade_flow[str_trade_flow])
    list_parameters.append('cc=' + str_classification_code)    
    list_parameters.append('fmt=json')        
    str_url_request += '&'.join(list_parameters)
    ### Getting UN Comtrade data from post request:    
    obj_unc_dataset = requests.post(str_url_request)
    ### Object to dataframe transformation:    
    list_unc_dataset = obj_unc_dataset.json()['dataset']
    if (len(list_unc_dataset) > 0):
        df_unc_dataset = pd.DataFrame(list_unc_dataset)[['period', 'rtCode', 'ptCode', 'TradeValue']]
        df_unc_dataset.columns = ['Period', 'Reporter_ID', 'Partner_ID', 'Value']    
        df_unc_dataset['Date'] = pd.to_datetime(df_unc_dataset['Period'], format = '%Y%m') + pd.offsets.BMonthEnd()    
        df_unc_dataset = df_unc_dataset[['Date', 'Reporter_ID', 'Partner_ID', 'Value']]
    else:
        df_unc_dataset = pd.DataFrame(columns = ['Date', 'Reporter_ID', 'Partner_ID', 'Value'])
    
    return df_unc_dataset

In [13]:
### GENERAL DATA PREPARATION

### Constants:
int_seconds_to_sleep = 35
int_unc_limit = 5
All = slice(None)
str_path_unc_dataset = 'Data_Files/Source_Files/unc_dataset.h5'
str_unc_exp_total_dataset = 'export_total_dataset'
str_unc_imp_total_dataset = 'import_total_dataset'
### UN Comtrade country names to rename:
dict_map_to_replace = {'BOLIVIA (PLURINATIONAL STATE OF)': 'BOLIVIA',
                       'BOSNIA HERZEGOVINA': 'BOSNIA AND HERZEGOVINA',
                       'BR. INDIAN OCEAN TERR.': 'BRITISH INDIAN OCEAN TERRITORY',
                       'BR. VIRGIN ISDS': 'BRITISH VIRGIN ISLANDS',
                       'BRUNEI DARUSSALAM': 'BRUNEI',
                       'CABO VERDE': 'CAPE VERDE',
                       'CAYMAN ISDS': 'CAYMAN ISLANDS',
                       'CENTRAL AFRICAN REP.': 'CENTRAL AFRICAN REPUBLIC',
                       'CHRISTMAS ISDS': 'CHRISTMAS ISLAND',
                       'COCOS ISDS': 'COCOS ISLANDS',
                       'COOK ISDS': 'COOK ISLANDS',                    
                       'CURAÇAO': 'CURACAO',                          
                       'CZECHIA': 'CZECH REPUBLIC',                    
                       'DEM. REP. OF THE CONGO': 'DEMOCRATIC REPUBLIC OF THE CONGO',                          
                       'DOMINICAN REP.': 'DOMINICAN REPUBLIC',                    
                       'TIMOR-LESTE': 'EAST TIMOR',                          
                       'FALKLAND ISDS (MALVINAS)': 'FALKLAND ISLANDS',                    
                       'FAEROE ISDS': 'FAROE ISLANDS',                                           
                       'CHINA, HONG KONG SAR': 'HONG KONG',                          
                       'CÔTE D\'IVOIRE': 'IVORY COAST',                                           
                       'LAO PEOPLE\'S DEM. REP.': 'LAOS',                                         
                       'CHINA, MACAO SAR': 'MACAU',                          
                       'TFYR OF MACEDONIA': 'MACEDONIA',                    
                       'MARSHALL ISDS': 'MARSHALL ISLANDS',                          
                       'FS MICRONESIA': 'MICRONESIA',                    
                       'REP. OF MOLDOVA': 'MOLDOVA',                          
                       'NETH. ANTILLES': 'NETHERLANDS ANTILLES',                          
                       'DEM. PEOPLE\'S REP. OF KOREA': 'NORTH KOREA',                          
                       'N. MARIANA ISDS': 'NORTHERN MARIANA ISLANDS',                    
                       'STATE OF PALESTINE': 'PALESTINE',                          
                       'CONGO': 'REPUBLIC OF THE CONGO',                          
                       'RÉUNION': 'REUNION',                    
                       'RUSSIAN FEDERATION': 'RUSSIA',                          
                       'SOLOMON ISDS': 'SOLOMON ISLANDS',                    
                       'REP. OF KOREA': 'SOUTH KOREA',                                       
                       'UNITED REP. OF TANZANIA': 'TANZANIA',     
                       'OTHER ASIA, NES': 'TAIWAN',
                       'TURKS AND CAICOS ISDS': 'TURKS AND CAICOS ISLANDS',                    
                       'US VIRGIN ISDS': 'U.S. VIRGIN ISLANDS',                          
                       'USA': 'UNITED STATES',                          
                       'HOLY SEE (VATICAN CITY STATE)': 'VATICAN',                    
                       'VIET NAM': 'VIETNAM',                          
                       'WALLIS AND FUTUNA ISDS': 'WALLIS AND FUTUNA'
                      }
### ISO country codes loading:
df_country_codes = get_country_codes()
### ISON membership loading:
ser_market_membership = get_market_membership_from_excel()
### Getting UN Comtrade country IDs:
ser_UNC_country_id = get_un_comtrade_country_id(df_country_codes)

In [None]:
### EXPORT DATA EXRACTION SCRIPT

### Filtering ISON countries only:
ser_UNC_country_id = ser_UNC_country_id.reindex(ser_market_membership.index.get_level_values(1).unique().to_list())
### Concatenation aggregator initializing:
list_dataset = []
### Reporter country looping (5 country groups):
for iter_reporter_group in range((len(ser_UNC_country_id.index) - 1) // int_unc_limit + 1):
    ### Partner country looping (5 country groups):
    for iter_partner_group in range((len(ser_UNC_country_id.index) - 1) // int_unc_limit + 1):
        print(iter_reporter_group * int_unc_limit, '-', (iter_reporter_group + 1) * int_unc_limit - 1, '/', 
              iter_partner_group * int_unc_limit, '-', (iter_partner_group + 1) * int_unc_limit - 1)
#        if (iter_partner_group > 1):
#            break        
        ### Country groups preparing:
        str_reporter_group = ','.join(ser_UNC_country_id.iloc[iter_reporter_group * int_unc_limit : (iter_reporter_group + 1) * int_unc_limit].to_list())
        str_partner_group = ','.join(ser_UNC_country_id.iloc[iter_partner_group * int_unc_limit : (iter_partner_group + 1) * int_unc_limit].to_list())    
        ### Request performing:
        df_iter_dataset = get_un_comtrade_data(str_reporter_group, str_partner_group, str_trade_flow = 'Export')
        list_dataset += [df_iter_dataset]
        ### Pause for API limitations:
        time.sleep(int_seconds_to_sleep)            
#    break
### Results concatenating:
df_loop_dataset = pd.concat(list_dataset, axis = 0, sort = False, ignore_index = True)[['Date', 'Reporter_ID', 'Partner_ID', 'Value']]
df_loop_dataset = df_loop_dataset.astype({'Reporter_ID': 'int16', 'Partner_ID': 'int16', 'Value': 'int64'})
### UN Comtrade country codes replacing to ISON country codes:
df_loop_dataset.loc[All, ['Reporter_ID', 'Partner_ID']] = df_loop_dataset.loc[All, ['Reporter_ID', 'Partner_ID']]\
                                                                         .replace(list(map(int, ser_UNC_country_id.values)), ser_UNC_country_id.index.to_list())
### Series indexing:
ser_unc_dataset = df_loop_dataset.set_index(['Date', 'Reporter_ID', 'Partner_ID'], drop = True).squeeze()
### Data saving:
ser_unc_dataset.to_hdf(path_or_buf = str_path_unc_dataset, key = str_unc_exp_total_dataset, mode = 'w')

In [None]:
### IMPORT DATA EXRACTION SCRIPT

### Filtering ISON countries only:
ser_UNC_country_id = ser_UNC_country_id.reindex(ser_market_membership.index.get_level_values(1).unique().to_list())
### Concatenation aggregator initializing:
list_dataset = []
### Reporter country looping (5 country groups):
for iter_reporter_group in range((len(ser_UNC_country_id.index) - 1) // int_unc_limit + 1):
    ### Partner country looping (5 country groups):    
    for iter_partner_group in range((len(ser_UNC_country_id.index) - 1) // int_unc_limit + 1):
        print(iter_reporter_group * int_unc_limit, '-', (iter_reporter_group + 1) * int_unc_limit - 1, '/', 
              iter_partner_group * int_unc_limit, '-', (iter_partner_group + 1) * int_unc_limit - 1)
#        if (iter_partner_group > 1):
#            break        
        ### Country groups preparing:
        str_reporter_group = ','.join(ser_UNC_country_id.iloc[iter_reporter_group * int_unc_limit : (iter_reporter_group + 1) * int_unc_limit].to_list())
        str_partner_group = ','.join(ser_UNC_country_id.iloc[iter_partner_group * int_unc_limit : (iter_partner_group + 1) * int_unc_limit].to_list())    
        ### Request performing:        
        df_iter_dataset = get_un_comtrade_data(str_reporter_group, str_partner_group, str_trade_flow = 'Import')
        list_dataset += [df_iter_dataset]
        ### Pause for API limitations:        
        time.sleep(int_seconds_to_sleep)            
#    break
### Results concatenating:
df_loop_dataset = pd.concat(list_dataset, axis = 0, sort = False, ignore_index = True)[['Date', 'Reporter_ID', 'Partner_ID', 'Value']]
df_loop_dataset = df_loop_dataset.astype({'Reporter_ID': 'int16', 'Partner_ID': 'int16', 'Value': 'int64'})
### UN Comtrade country codes replacing to ISON country codes:
df_loop_dataset.loc[All, ['Reporter_ID', 'Partner_ID']] = df_loop_dataset.loc[All, ['Reporter_ID', 'Partner_ID']]\
                                                                         .replace(list(map(int, ser_UNC_country_id.values)), ser_UNC_country_id.index.to_list())
### Series indexing:
ser_unc_dataset = df_loop_dataset.set_index(['Date', 'Reporter_ID', 'Partner_ID'], drop = True).squeeze()
### Data saving:
ser_unc_dataset.to_hdf(path_or_buf = str_path_unc_dataset, key = str_unc_imp_total_dataset, mode = 'r+')

In [None]:
### TEST: COMPARING WITH ONLINE DATA

ser_unc_total_export = pd.read_hdf(path_or_buf = str_path_unc_dataset, key = str_unc_exp_total_dataset)
ser_unc_total_import = pd.read_hdf(path_or_buf = str_path_unc_dataset, key = str_unc_imp_total_dataset)
print(ser_unc_total_export.loc[All, 'AT', 'BE'].tail())
print(ser_unc_total_import.loc[All, 'AT', 'BE'].tail())

In [1]:
### BIS: BILATERAL BANK LENDING

In [1]:
### INITIALIZATION

import pandas as pd
import requests
import zipfile
import io

In [2]:
### DEFINING EXTRACTION UNIVERSE DATA FROM GENERAL MS EXCEL SOURCE

def get_market_membership_from_excel():
    ### Importing standard modules and date-special modules:
    import numpy as np
    import pandas as pd
    ### Declaring local constants & variables: 
    path_msci = 'Data_Files/Source_Files/sample_universe.xlsx' ### Path for membership source    
    tab_monthly = 'universe_joined'    
    arr_markets_needed = ['DM', 'FM', 'EM']   
    dict_markets = {50 : 'DM', 57 : 'EM', 504 : 'FM'}
    no_slice = slice(None)
    ### Extracting universe data:
    df_universe = pd.read_excel(io = path_msci, sheet_name = tab_monthly, skiprows = [0, 2], header = 0, parse_dates = True, 
                                na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', 
                                             '1.#QNAN', 'N/A', 'NULL', 'NaN', 'n/a', 'nan', 'null'], keep_default_na = False)
    df_universe = df_universe.loc[no_slice, ['dates', 'region', 'ctry']]
    df_universe.columns = ['Date', 'Market', 'Country']
    df_universe.set_index(['Date', 'Country'], inplace = True)
    ser_universe = df_universe.squeeze()
    ser_universe.sort_index(level = [0, 1], inplace = True)
    ser_universe.replace(dict_markets, inplace = True)
    ser_market_membership = ser_universe[ser_universe.isin(arr_markets_needed)]
    ### Results output:
    return ser_market_membership

In [3]:
### GENERAL DATA PREPARATION

### Constants:
All = slice(None)
str_path_bis_csv = 'Data_Files/Source_Files/bis_bank_loans.csv'
str_url_bis_zip = 'https://www.bis.org/statistics/full_bis_lbs_diss_csv.zip'
str_csv_file_name = 'WEBSTATS_LBS_D_PUB_DATAFLOW_csv_col.csv'
str_path_bis_dataset = 'Data_Files/Source_Files/bis_dataset.h5'
str_claim_bis_dataset = 'claim_dataset'
### ISON membership loading:
ser_market_membership = get_market_membership_from_excel()
### BIS dataset filter:
list_ison_countries = list(map(str, ser_market_membership.index.get_level_values(1).unique()))
tup_bis_filter = (All, 'C', 'A', 'TO1', 'A', '5J', 'A', list_ison_countries, 'A', list_ison_countries, 'N')

In [4]:
### CSV LOADING

### File downloading:
obj_bis_zip = requests.get(str_url_bis_zip)
file_bis_zip = zipfile.ZipFile(io.BytesIO(obj_bis_zip.content))
### Offline alternative:
#df_bis_full_data = pd.read_csv(str_path_bis_csv, index_col = [*range(2, 23, 2)])
### DataFrame creating:
df_bis_full_data = pd.read_csv(file_bis_zip.open(str_csv_file_name), index_col = [*range(2, 23, 2)])

In [5]:
### BIS DATASET MUNGLING

### Text columns replacing and date columns stacking:
ser_bis_full_data = df_bis_full_data.drop(df_bis_full_data.columns[ : 14], axis = 1).stack()
### Quarterly date managing:
ser_bis_full_data.index.names = ser_bis_full_data.index.names[ : -1] + ['Date_Q']
ser_bis_full_data.name = 'Value'
### Dataset filtering:
ser_bis_filtered = ser_bis_full_data.loc[tup_bis_filter].reset_index(level = [1, 2, 3, 4, 5, 6, 8, 10], drop = True)
ser_bis_filtered = ser_bis_filtered.groupby(ser_bis_filtered.index.names[1 : ], group_keys = False).sum()
#### Date resampling to monthly:
df_bis_filtered = ser_bis_filtered.reset_index('Date_Q')
df_bis_filtered.index.names = ['Reporter_ID', 'Partner_ID']
df_bis_filtered['Date_Q'] = df_bis_filtered['Date_Q'].str.replace('-', '')
df_bis_filtered['Date'] = pd.to_datetime(df_bis_filtered['Date_Q']) + pd.offsets.BQuarterEnd()
df_bis_filtered.drop('Date_Q', axis = 1, inplace = True)
ser_bis_monthly = df_bis_filtered.set_index('Date', append = True).squeeze().reorder_levels([2, 0, 1])
ser_bis_monthly = ser_bis_monthly.groupby(['Reporter_ID', 'Partner_ID']).\
                                  apply(lambda iter_group: iter_group.droplevel(['Reporter_ID', 'Partner_ID']).resample('BM').bfill())
ser_bis_monthly.reorder_levels([2, 0, 1]).to_hdf(path_or_buf = str_path_bis_dataset, key = str_claim_bis_dataset, mode = 'w')

In [None]:
### TEST: RESULTS LOADING

pd.read_hdf(path_or_buf = str_path_bis_dataset, key = str_claim_bis_dataset).tail()

In [None]:
### TEST: COLUMNS LEARNING

list_ison = ser_market_membership.index.get_level_values(1).unique().to_list()
print(len([iter_country for iter_country in df_bis_full_data['L_PARENT_CTY'].unique() if (iter_country in list_ison)]))
print(len([iter_country for iter_country in df_bis_full_data['L_REP_CTY'].unique() if (iter_country in list_ison)]))
print(len([iter_country for iter_country in df_bis_full_data['L_CP_COUNTRY'].unique() if (iter_country in list_ison)]))
print(df_bis_full_data[(df_bis_full_data['L_REP_CTY'] != '5A') & (df_bis_full_data['L_PARENT_CTY'] != '5J') \
                 & (df_bis_full_data['L_REP_CTY'] != df_bis_full_data['L_PARENT_CTY'])][df_bis_full_data.columns[10 : 20]])
#df_bis_full_data[(df_bis_full_data['L_REP_CTY'] == 'US') & (df_bis_full_data['L_CP_COUNTRY'] == 'CA')][df_bis_full_data.columns[ : 23]].head()
print(df_bis_full_data['L_DENOM'].unique())