**SYPA: Fundamental Analysis of Foreign Direct Investment** <br>
*1_API* <br>
Harvard SYPA <br>
User: Jake Schneider <br>
Date Created: February 7, 2020 <br>
Date Updated: February 8, 2020

----

**Load Packages**

In [1]:
#Import libraries
import sys
import pandas as pd
from datetime import date, datetime, time, timedelta
import pendulum
import json
import requests
import numpy as np
import math

import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
sns.set(style='ticks', context='talk')

from matplotlib.offsetbox import AnchoredText
from sklearn import linear_model
from sklearn.metrics import r2_score
import statsmodels.api as sm
#import fbprophet

import warnings
import itertools

from flask import jsonify, make_response

from varname import varname

from matplotlib.backends.backend_pdf import PdfPages
from fpdf import FPDF
from PIL import Image, ImageDraw, ImageFont

In [2]:
# Create function 'jprint'

def jprint(obj):
    # create a formatted string of the Python JSON object
    text = json.dumps(obj, sort_keys=True, indent=4)
    print(text)

----

**Pull Economic Data from APIs: World Bank, Systemic Peace Project (Polity IV), World Wide Governance Indicators** <br>
*Bloomberg API for Python?*

In [3]:
# Configure World Bank

import config_world_bank

#print(config_world_bank)

#api_key_wb = config_world_bank.api_key #API key no longer required
user_agent = 'Jake_Schneider'

In [4]:
##Test
#
#test = requests.get('http://api.worldbank.org/v2/country/all/indicator/SP.POP.TOTL?date=2000:2001&format=json')
#jprint(test.json())

In [5]:
# Create Function For Pulling Economic Data from World Bank

def parse_wb(country, series, start_date = '1970', end_date = '2020'):
    values = requests.get('http://api.worldbank.org/v2/country/{}/indicator/{}?date={}:{}&format=json'.format(country, series, start_date, end_date))
    length = len(values.json())
    #print(length)
    jprint(values.json()) #["observations"][length-1])
    return values.json() #["observations"][length-1]

In [6]:
## Run Function 'Parse_wb'
## 'SP.POP.TOTL' = Total Population
#
#wb_indicators = ['SP.POP.TOTL']
#
#wb_master_data = []
#for indicator in wb_indicators:
#    wb_data = parse_wb(country = 'all', series = indicator)
#    wb_master_data.append(wb_data)

---

**Import Data through Pandas Data Reader** <br>
Docs: https://pandas-datareader.readthedocs.io/en/latest/remote_data.html#world-bank

*Import PDR*

In [7]:
## Import pandas_datareader
#
#from pandas_datareader import data, wb
#import pandas_datareader as pdr

*Test PDR*

In [8]:
## TEST: Use PDR for FRED
#
#pdr.get_data_fred('GS10')

*Find Indicators: Macro and Governance*

In [9]:
## Find Indicators for FDI Project: GDP
#
#matches_gdp = wb.search('gdp.*.*')
#print(len(matches_gdp))
#matches_gdp.head()

In [10]:
## Store Indicators As a Variable: GDP
#
#list_gdp = list(matches_gdp['id'])
#print("The number of GDP variables are: ", len(list_gdp))
##print(list_gdp)

*Download Indicators*

In [11]:
#for num, indicator in zip(range(1, len(list_gdp)+1), list_gdp):
#    if num % 10 == 0:
#        print(num, "/", len(list_gdp), ":", indicator)

#data_wb = wb.download(indicator='5.51.01.10.gdp', country='all', start=1960, end=2018)

In [12]:
#data_wb

----

**Import Data through wbdata** <br>
Docs: https://wbdata.readthedocs.io/en/stable/ <br>
Example: https://mcmayer.net/getting-worldbank-data-with-python-easily/

*Import wbdata*

In [13]:
# Import wbdata

import wbdata

*Download and Explore*

In [14]:
# Download wbdata sources

sources = wbdata.get_source(display = False)
#sources

*Find Indicators*

In [15]:
## Search for Indicators: GDP
#
#wbdata.search_indicators("gdp per capita")

*Cull Datasets: WDI (2), Doing Business (1), WGI (3), International Debt Statistics (6), CPIA (31), Worldwide Bureaucracy Indicators (64), ESG (75)*

In [16]:
# Download World Development Indicators (2)

wdi = wbdata.get_indicator(source=2, display = False)
#wdi

In [17]:
#Create list of WDI IDs

wdi_ids = []
wdi_names = []

for num in range(0, len(wdi)):
    wdi_ids.append(wdi[num]["id"])
    wdi_names.append(wdi[num]["name"])
    
print(len(wdi_ids))
print(wdi_ids[0:5])
print(wdi_names[0:5])

1429
['AG.AGR.TRAC.NO', 'AG.CON.FERT.PT.ZS', 'AG.CON.FERT.ZS', 'AG.LND.AGRI.K2', 'AG.LND.AGRI.ZS']
['Agricultural machinery, tractors', 'Fertilizer consumption (% of fertilizer production)', 'Fertilizer consumption (kilograms per hectare of arable land)', 'Agricultural land (sq. km)', 'Agricultural land (% of land area)']


In [18]:
# Download Doing Business (1)

doing_business = wbdata.get_indicator(source=1, display = False)
#doing_business

In [19]:
#Create list of Doing Business IDs

doing_business_ids = []
doing_business_names = []

for num in range(0, len(doing_business)):
    doing_business_ids.append(doing_business[num]["id"])
    doing_business_names.append(doing_business[num]["name"])
    
print(len(doing_business_ids))
print(doing_business_ids[0:5])
print(doing_business_names[0:5])

205
['ENF.CONT.COEN.ATDR', 'ENF.CONT.COEN.ATFE.PR', 'ENF.CONT.COEN.COST.ZS', 'ENF.CONT.COEN.COST.ZS.DFRN', 'ENF.CONT.COEN.CSMG']
['Enforcing contracts: Alternative dispute resolution (0-3) (DB16-20 methodology)', 'Enforcing contracts: Attorney fees (% of claim)', 'Enforcing contracts: Cost (% of claim)', 'Enforcing contracts: Cost (% of claim) - Score', 'Enforcing contracts: Case management (0-6) (DB16-20 methodology)']


In [20]:
# Download WGI (3)

wgi = wbdata.get_indicator(source=3, display = False)
#wgi

In [21]:
#Create list of WGI IDs

wgi_ids = []
wgi_names = []

for num in range(0, len(wgi)):
    wgi_ids.append(wgi[num]["id"])
    wgi_names.append(wgi[num]["name"])
    
print(len(wgi_ids))
print(wgi_ids[0:5])
print(wgi_names[0:5])

36
['CC.EST', 'CC.NO.SRC', 'CC.PER.RNK', 'CC.PER.RNK.LOWER', 'CC.PER.RNK.UPPER']
['Control of Corruption: Estimate', 'Control of Corruption: Number of Sources', 'Control of Corruption: Percentile Rank', 'Control of Corruption: Percentile Rank, Lower Bound of 90% Confidence Interval', 'Control of Corruption: Percentile Rank, Upper Bound of 90% Confidence Interval']


In [22]:
# Download International Debt Stats (6)

debt = wbdata.get_indicator(source=6, display = False)
#debt

In [23]:
#Create list of International Debt Statistics IDs

debt_ids = []
debt_names = []

for num in range(0, len(debt)):
    debt_ids.append(debt[num]["id"])
    debt_names.append(debt[num]["name"])
    
print(len(debt_ids))
print(debt_ids[0:5])
print(debt_names[0:5])

497
['BM.GSR.TOTL.CD', 'BN.CAB.XOKA.CD', 'BX.GRT.EXTA.CD.DT', 'BX.GRT.TECH.CD.DT', 'BX.GSR.TOTL.CD']
['Imports of goods, services and primary income (BoP, current US$)', 'Current account balance (BoP, current US$)', 'Grants, excluding technical cooperation (current US$)', 'Technical cooperation grants (current US$)', 'Exports of goods, services and primary income (BoP, current US$)']


In [24]:
# Download CPIA (31)

cpia = wbdata.get_indicator(source=31, display = False)
#cpia

In [25]:
#Create list of International Debt Statistics IDs

cpia_ids = []
cpia_names = []

for num in range(0, len(cpia)):
    cpia_ids.append(cpia[num]["id"])
    cpia_names.append(cpia[num]["name"])
    
print(len(cpia_ids))
print(cpia_ids[0:5])
print(cpia_names[0:5])

21
['IQ.CPA.BREG.XQ', 'IQ.CPA.DEBT.XQ', 'IQ.CPA.ECON.XQ', 'IQ.CPA.ENVR.XQ', 'IQ.CPA.FINQ.XQ']
['CPIA business regulatory environment rating (1=low to 6=high)', 'CPIA debt policy rating (1=low to 6=high)', 'CPIA economic management cluster average (1=low to 6=high)', 'CPIA policy and institutions for environmental sustainability rating (1=low to 6=high)', 'CPIA quality of budgetary and financial management rating (1=low to 6=high)']


In [26]:
# Download World Bureaucracy (64)

bureaucracy = wbdata.get_indicator(source=64, display = False)
#bureaucracy

In [27]:
#Create list of Bureaucracy IDs

bureaucracy_ids = []
bureaucracy_names = []

for num in range(0, len(bureaucracy)):
    bureaucracy_ids.append(bureaucracy[num]["id"])
    bureaucracy_names.append(bureaucracy[num]["name"])
    
print(len(bureaucracy_ids))
print(bureaucracy_ids[0:5])
print(bureaucracy_names[0:5])

87
['BI.EMP.FRML.PB.ZS', 'BI.EMP.PWRK.PB.FE.ZS', 'BI.EMP.PWRK.PB.MA.ZS', 'BI.EMP.PWRK.PB.RU.ZS', 'BI.EMP.PWRK.PB.UR.ZS']
['Public sector employment as a share of formal employment', 'Public sector employment as a share of paid employment by gender (Female)', 'Public sector employment as a share of paid employment by gender (Male)', 'Public sector employment as a share of paid employment by location (Rural)', 'Public sector employment as a share of paid employment by location (Urban)']


In [28]:
# Download ESG (75)

esg = wbdata.get_indicator(source=64, display = False)
#esg

In [29]:
#Create list of ESG IDs

esg_ids = []
esg_names = []

for num in range(0, len(esg)):
    esg_ids.append(esg[num]["id"])
    esg_names.append(esg[num]["name"])
    
print(len(esg_ids))
print(esg_ids[0:5])
print(esg_names[0:5])

87
['BI.EMP.FRML.PB.ZS', 'BI.EMP.PWRK.PB.FE.ZS', 'BI.EMP.PWRK.PB.MA.ZS', 'BI.EMP.PWRK.PB.RU.ZS', 'BI.EMP.PWRK.PB.UR.ZS']
['Public sector employment as a share of formal employment', 'Public sector employment as a share of paid employment by gender (Female)', 'Public sector employment as a share of paid employment by gender (Male)', 'Public sector employment as a share of paid employment by location (Rural)', 'Public sector employment as a share of paid employment by location (Urban)']


*Create Country Lists*

In [30]:
# Download Countries

countries = wbdata.get_country(display = False)
countries[0:5]

[{'id': 'ABW',
  'iso2Code': 'AW',
  'name': 'Aruba',
  'region': {'id': 'LCN', 'value': 'Latin America & Caribbean '},
  'adminregion': {'id': '', 'value': ''},
  'incomeLevel': {'id': 'HIC', 'value': 'High income'},
  'lendingType': {'id': 'LNX', 'value': 'Not classified'},
  'capitalCity': 'Oranjestad',
  'longitude': '-70.0167',
  'latitude': '12.5167'},
 {'id': 'AFG',
  'iso2Code': 'AF',
  'name': 'Afghanistan',
  'region': {'id': 'SAS', 'value': 'South Asia'},
  'adminregion': {'id': 'SAS', 'value': 'South Asia'},
  'incomeLevel': {'id': 'LIC', 'value': 'Low income'},
  'lendingType': {'id': 'IDX', 'value': 'IDA'},
  'capitalCity': 'Kabul',
  'longitude': '69.1761',
  'latitude': '34.5228'},
 {'id': 'AFR',
  'iso2Code': 'A9',
  'name': 'Africa',
  'region': {'id': 'NA', 'value': 'Aggregates'},
  'adminregion': {'id': '', 'value': ''},
  'incomeLevel': {'id': 'NA', 'value': 'Aggregates'},
  'lendingType': {'id': '', 'value': 'Aggregates'},
  'capitalCity': '',
  'longitude': '',
 

In [31]:
#Create list of Individual and Aggregate Countries with IDs

countries_individual = []
countries_aggregates = []

countries_individual_ids = []
countries_aggregates_ids = []

for num in range(0, len(countries)):
    if countries[num]['region']['value'] != 'Aggregates':
        countries_individual.append(countries[num])
        countries_individual_ids.append(countries[num]["id"])
    else:
        countries_aggregates.append(countries[num])
        countries_aggregates_ids.append(countries[num]["id"])

In [32]:
# View Number of Countries Individual and Aggregates

print("Countries Individual: ", len(countries_individual))
print("Countries Individual IDs: ", len(countries_individual_ids))

print("Country Aggregates: ", len(countries_aggregates))
print("Country Aggregate IDs: ", len(countries_aggregates_ids))

print("Total Countries: ", len(countries))

Countries Individual:  218
Countries Individual IDs:  218
Country Aggregates:  86
Country Aggregate IDs:  86
Total Countries:  304


In [33]:
# View Countries and Country IDs

print(countries_individual_ids[0:5])
countries_individual[0:5]

['ABW', 'AFG', 'AGO', 'ALB', 'AND']


[{'id': 'ABW',
  'iso2Code': 'AW',
  'name': 'Aruba',
  'region': {'id': 'LCN', 'value': 'Latin America & Caribbean '},
  'adminregion': {'id': '', 'value': ''},
  'incomeLevel': {'id': 'HIC', 'value': 'High income'},
  'lendingType': {'id': 'LNX', 'value': 'Not classified'},
  'capitalCity': 'Oranjestad',
  'longitude': '-70.0167',
  'latitude': '12.5167'},
 {'id': 'AFG',
  'iso2Code': 'AF',
  'name': 'Afghanistan',
  'region': {'id': 'SAS', 'value': 'South Asia'},
  'adminregion': {'id': 'SAS', 'value': 'South Asia'},
  'incomeLevel': {'id': 'LIC', 'value': 'Low income'},
  'lendingType': {'id': 'IDX', 'value': 'IDA'},
  'capitalCity': 'Kabul',
  'longitude': '69.1761',
  'latitude': '34.5228'},
 {'id': 'AGO',
  'iso2Code': 'AO',
  'name': 'Angola',
  'region': {'id': 'SSF', 'value': 'Sub-Saharan Africa '},
  'adminregion': {'id': 'SSA',
   'value': 'Sub-Saharan Africa (excluding high income)'},
  'incomeLevel': {'id': 'LMC', 'value': 'Lower middle income'},
  'lendingType': {'id': '

In [34]:
# Create Countries Individual DF

country_df = pd.DataFrame.from_dict(countries_individual)
country_df.head()

Unnamed: 0,id,iso2Code,name,region,adminregion,incomeLevel,lendingType,capitalCity,longitude,latitude
0,ABW,AW,Aruba,"{'id': 'LCN', 'value': 'Latin America & Caribb...","{'id': '', 'value': ''}","{'id': 'HIC', 'value': 'High income'}","{'id': 'LNX', 'value': 'Not classified'}",Oranjestad,-70.0167,12.5167
1,AFG,AF,Afghanistan,"{'id': 'SAS', 'value': 'South Asia'}","{'id': 'SAS', 'value': 'South Asia'}","{'id': 'LIC', 'value': 'Low income'}","{'id': 'IDX', 'value': 'IDA'}",Kabul,69.1761,34.5228
2,AGO,AO,Angola,"{'id': 'SSF', 'value': 'Sub-Saharan Africa '}","{'id': 'SSA', 'value': 'Sub-Saharan Africa (ex...","{'id': 'LMC', 'value': 'Lower middle income'}","{'id': 'IBD', 'value': 'IBRD'}",Luanda,13.242,-8.81155
3,ALB,AL,Albania,"{'id': 'ECS', 'value': 'Europe & Central Asia'}","{'id': 'ECA', 'value': 'Europe & Central Asia ...","{'id': 'UMC', 'value': 'Upper middle income'}","{'id': 'IBD', 'value': 'IBRD'}",Tirane,19.8172,41.3317
4,AND,AD,Andorra,"{'id': 'ECS', 'value': 'Europe & Central Asia'}","{'id': '', 'value': ''}","{'id': 'HIC', 'value': 'High income'}","{'id': 'LNX', 'value': 'Not classified'}",Andorra la Vella,1.5218,42.5075


In [35]:
#TEST

country_df["region"][0].get("value")

'Latin America & Caribbean '

In [36]:
# Reset Internal Dictionaries

for num in country_df.index:
    
    #Get the values for the dictionary columns
    region = country_df["region"][num].get("value")
    adminregion = country_df["adminregion"][num].get("value")
    incomeLevel = country_df["incomeLevel"][num].get("value")
    lendingType = country_df["lendingType"][num].get("value")
    
    #Input the new values
    country_df["region"][num] = region
    country_df["adminregion"][num] = adminregion
    country_df["incomeLevel"][num] = incomeLevel
    country_df["lendingType"][num] = lendingType
    
    #Print (if necessary)
    #print(region)

In [37]:
# View Country DF

country_df.head()

Unnamed: 0,id,iso2Code,name,region,adminregion,incomeLevel,lendingType,capitalCity,longitude,latitude
0,ABW,AW,Aruba,Latin America & Caribbean,,High income,Not classified,Oranjestad,-70.0167,12.5167
1,AFG,AF,Afghanistan,South Asia,South Asia,Low income,IDA,Kabul,69.1761,34.5228
2,AGO,AO,Angola,Sub-Saharan Africa,Sub-Saharan Africa (excluding high income),Lower middle income,IBRD,Luanda,13.242,-8.81155
3,ALB,AL,Albania,Europe & Central Asia,Europe & Central Asia (excluding high income),Upper middle income,IBRD,Tirane,19.8172,41.3317
4,AND,AD,Andorra,Europe & Central Asia,,High income,Not classified,Andorra la Vella,1.5218,42.5075


In [38]:
# Rename name to country

country_df = country_df.rename(columns={"name": "country"})
country_df.head()

Unnamed: 0,id,iso2Code,country,region,adminregion,incomeLevel,lendingType,capitalCity,longitude,latitude
0,ABW,AW,Aruba,Latin America & Caribbean,,High income,Not classified,Oranjestad,-70.0167,12.5167
1,AFG,AF,Afghanistan,South Asia,South Asia,Low income,IDA,Kabul,69.1761,34.5228
2,AGO,AO,Angola,Sub-Saharan Africa,Sub-Saharan Africa (excluding high income),Lower middle income,IBRD,Luanda,13.242,-8.81155
3,ALB,AL,Albania,Europe & Central Asia,Europe & Central Asia (excluding high income),Upper middle income,IBRD,Tirane,19.8172,41.3317
4,AND,AD,Andorra,Europe & Central Asia,,High income,Not classified,Andorra la Vella,1.5218,42.5075


In [39]:
# Sort by country

country_df = country_df.sort_values(by='country')
country_df.head()

Unnamed: 0,id,iso2Code,country,region,adminregion,incomeLevel,lendingType,capitalCity,longitude,latitude
1,AFG,AF,Afghanistan,South Asia,South Asia,Low income,IDA,Kabul,69.1761,34.5228
3,ALB,AL,Albania,Europe & Central Asia,Europe & Central Asia (excluding high income),Upper middle income,IBRD,Tirane,19.8172,41.3317
55,DZA,DZ,Algeria,Middle East & North Africa,Middle East & North Africa (excluding high inc...,Upper middle income,IBRD,Algiers,3.05097,36.7397
8,ASM,AS,American Samoa,East Asia & Pacific,East Asia & Pacific (excluding high income),Upper middle income,Not classified,Pago Pago,-170.691,-14.2846
4,AND,AD,Andorra,Europe & Central Asia,,High income,Not classified,Andorra la Vella,1.5218,42.5075


In [40]:
# Save Data

country_df.to_csv('../../2_Inputs/Culled/country_df.csv')

*Cull Data Using Indicators and Countries List*

In [41]:
# TEST Culling Data

test_df = wbdata.get_dataframe({'FP.CPI.TOTL.ZG': 'value'}, country=countries_individual_ids)
test_df

Unnamed: 0_level_0,Unnamed: 1_level_0,value
country,date,Unnamed: 2_level_1
Aruba,2019,
Aruba,2018,3.580394
Aruba,2017,-0.481315
Aruba,2016,-0.886758
Aruba,2015,0.475067
...,...,...
Zimbabwe,2004,
Zimbabwe,2003,
Zimbabwe,2002,
Zimbabwe,2001,


In [42]:
# TEST 2: WB Data

test_df2 = wbdata.get_dataframe({wdi_ids[100]: wdi_names[100]}, country=countries_individual_ids)
test_df2

Unnamed: 0_level_0,Unnamed: 1_level_0,"Net bilateral aid flows from DAC donors, Italy (current US$)"
country,date,Unnamed: 2_level_1
Aruba,2019,
Aruba,2018,
Aruba,2017,
Aruba,2016,
Aruba,2015,
...,...,...
Zimbabwe,2004,1440000.0
Zimbabwe,2003,1120000.0
Zimbabwe,2002,2090000.0
Zimbabwe,2001,1740000.0


In [43]:
## Pull WDI Data
#
#wdi_df = pd.DataFrame()
#
#for num, indicator, name in zip(range(1, len(wdi) + 1), wdi_ids, wdi_names):
#    if num % 10 == 0:
#        print(num, " / ", len(wdi), " : ", name)
#    df = wbdata.get_dataframe({indicator: name}, country=countries_individual_ids)
#    wdi_df = pd.concat([wdi_df, df], axis = 1)

In [44]:
## View WDI Data
#
#wdi_df

In [45]:
# Pull Doing Business Data

doing_business_df = pd.DataFrame()

for num, indicator, name in zip(range(1, len(doing_business) + 1), doing_business_ids, doing_business_names):
    if num % 10 == 0:
        print(num, " / ", len(doing_business), " : ", name)
    try:
        df = wbdata.get_dataframe({indicator: name}, country=countries_individual_ids)
    except:
        print(indicator, "failed")
        pass
    doing_business_df = pd.concat([doing_business_df, df], axis = 1)

10  /  205  :  Enforcing contracts (DB04-15 methodology) - Score
20  /  205  :  Enforcing contracts: Quality of the judicial processes index (0-18) (DB17-20 methodology)
30  /  205  :  Ease of doing business index (1=most business-friendly regulations)
40  /  205  :  Dealing with construction permits: Quality of building regulations index (0-2) (DB16-20 methodology)
50  /  205  :  Getting credit (DB15-20 methodology) - Score
60  /  205  :  Getting credit: Credit bureau coverage (% of adults)
IC.ELC.ACES.RK DB19 failed
70  /  205  :  Getting electricity: Mechanisms for monitoring outages (0-1) (DB16-20 methodology)
80  /  205  :  Getting electricity: System average interruption duration index (SAIDI) (DB16-20 methodology)
90  /  205  :  Starting a business: Time - Women (days)- Score
100  /  205  :  Registering property (DB05-15 methodology) - Score
110  /  205  :  Registering property: Procedures (number) - Score
120  /  205  :  Paying taxes: Time to comply with corporate income tax co

In [46]:
# View Doing Business Data

doing_business_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Enforcing contracts: Alternative dispute resolution (0-3) (DB16-20 methodology),Enforcing contracts: Attorney fees (% of claim),Enforcing contracts: Cost (% of claim),Enforcing contracts: Cost (% of claim) - Score,Enforcing contracts: Case management (0-6) (DB16-20 methodology),Enforcing contracts: Court automation (0-4) (DB17-20 methodology),Enforcing contracts: Court fees (% of claim),Enforcing contracts: Court structure and proceedings (0-5) (DB16 methodology),Enforcing contracts: Court structure and proceedings (0-5) (DB17-20 methodology),Enforcing contracts (DB04-15 methodology) - Score,...,Trading across borders: Cost to import (US$ per container deflated)(DB06-15 methodology),Trading across borders: Cost to import (US$ per container)(DB06-15 methodology) - Score,Trading across borders: Cost to import: Documentary compliance (USD) (DB16-20 methodology),Trading across borders: Cost to import: Documentary compliance (USD) (DB16-20 methodology) - Score,Time to import: Documentary compliance (hours) (DB16-20 methodology),Time to import (days) (DB06-15 methodology),Trading across borders: Time to import: Border compliance (hours) (DB16-20 methodology) - Score,Trading across borders: Time to import: Documentary compliance (hours) (DB16-20 methodology) - Score,Trading across borders: Time to import (days) (DB06-15 methodology) - Score,Rank: Trading across borders (1=most business-friendly regulations)
country,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Afghanistan,1960,,,,,,,,,,,...,,,,,,,,,,
Afghanistan,1961,,,,,,,,,,,...,,,,,,,,,,
Afghanistan,1962,,,,,,,,,,,...,,,,,,,,,,
Afghanistan,1963,,,,,,,,,,,...,,,,,,,,,,
Afghanistan,1964,,,,,,,,,,,...,,,,,,,,,,


In [47]:
# Save Doing Business Data

doing_business_df.to_csv('../../2_Inputs/Culled/doing_business_df.csv')

In [48]:
# Pull, Print and Save WGI Data

wgi_df = pd.DataFrame()

for num, indicator, name in zip(range(1, len(wgi) + 1), wgi_ids, wgi_names):
    if num % 10 == 0:
        print(num, " / ", len(wgi), " : ", name)
    try:
        df = wbdata.get_dataframe({indicator: name}, country=countries_individual_ids)
    except:
        print(indicator, "failed")
        pass
    wgi_df = pd.concat([wgi_df, df], axis = 1)
    
print(wgi_df.head())

wgi_df.to_csv('../../2_Inputs/Culled/wgi_df.csv')

10  /  36  :  Government Effectiveness: Percentile Rank, Lower Bound of 90% Confidence Interval
20  /  36  :  Rule of Law: Number of Sources
30  /  36  :  Regulatory Quality: Standard Error
              Control of Corruption: Estimate  \
country date                                    
Aruba   2018                         1.252027   
        2017                         1.291643   
        2016                         1.285848   
        2015                         1.297111   
        2014                         1.018918   

              Control of Corruption: Number of Sources  \
country date                                             
Aruba   2018                                       2.0   
        2017                                       2.0   
        2016                                       2.0   
        2015                                       2.0   
        2014                                       2.0   

              Control of Corruption: Percentile Rank  \
cou

In [49]:
# Pull, Print and Save Debt Data

debt_df = pd.DataFrame()

for num, indicator, name in zip(range(1, len(debt) + 1), debt_ids, debt_names):
    if num % 10 == 0:
        print(num, " / ", len(debt), " : ", name)
    try:
        df = wbdata.get_dataframe({indicator: name}, country=countries_individual_ids)
    except:
        print(indicator, "failed")
        pass
    debt_df = pd.concat([debt_df, df], axis = 1)
    
print(debt_df.head())

debt_df.to_csv('../../2_Inputs/Culled/debt_df.csv')

10  /  497  :  PPG, bilateral (AMT, current US$)
20  /  497  :  Principal repayments on external debt, general government sector (PPG) (AMT, current US$)
30  /  497  :  PPG, multilateral (AMT, current US$)
40  /  497  :  PPG, official creditors (AMT, current US$)
50  /  497  :  PPG, commercial banks (AMT, current US$)
60  /  497  :  PRVG, other private creditors (AMT, current US$)
70  /  497  :  Principal arrears, private creditors (current US$)
80  /  497  :  Commitments, official creditors (COM, current US$)
90  /  497  :  Currency composition of PPG debt, Pound sterling (%)
100  /  497  :  OPS, bilateral concessional (DIS, current US$)
110  /  497  :  Disbursements on external debt, public and publicly guaranteed (PPG) (DIS, current US$)
120  /  497  :  GG, multilateral concessional (DIS, current US$)
130  /  497  :  GG, bonds (DIS, current US$)
140  /  497  :  PNG, commercial banks and other creditors (DIS, current US$)
150  /  497  :  PRVG, private creditors (DIS, current US$)
160

In [50]:
# Pull, Print and Save CPIA Data

cpia_df = pd.DataFrame()

for num, indicator, name in zip(range(1, len(cpia) + 1), cpia_ids, cpia_names):
    if num % 10 == 0:
        print(num, " / ", len(cpia), " : ", name)
    try:
        df = wbdata.get_dataframe({indicator: name}, country=countries_individual_ids)
    except:
        print(indicator, "failed")
        pass
    cpia_df = pd.concat([cpia_df, df], axis = 1)
    
print(cpia_df.head())

cpia_df.to_csv('../../2_Inputs/Culled/cpia_df.csv')

10  /  21  :  IDA resource allocation index (1=low to 6=high)
20  /  21  :  CPIA trade rating (1=low to 6=high)
              CPIA business regulatory environment rating (1=low to 6=high)  \
country date                                                                  
Aruba   2019                                                NaN               
        2018                                                NaN               
        2017                                                NaN               
        2016                                                NaN               
        2015                                                NaN               

              CPIA debt policy rating (1=low to 6=high)  \
country date                                              
Aruba   2019                                        NaN   
        2018                                        NaN   
        2017                                        NaN   
        2016                           

In [51]:
# Pull, Print and Save Bureacracy Data

bureaucracy_df = pd.DataFrame()

for num, indicator, name in zip(range(1, len(bureaucracy) + 1), bureaucracy_ids, bureaucracy_names):
    if num % 10 == 0:
        print(num, " / ", len(bureaucracy), " : ", name)
    try:
        df = wbdata.get_dataframe({indicator: name}, country=countries_individual_ids)
    except:
        print(indicator, "failed")
        pass
    bureaucracy_df = pd.concat([bureaucracy_df, df], axis = 1)
    
print(bureaucracy_df.head())

bureaucracy_df.to_csv('../../2_Inputs/Culled/bureaucracy_df.csv')

10  /  87  :  Public sector employment as a share of total employment by location (Rural)
20  /  87  :  Share of private paid employees with a contract
30  /  87  :  Females as a share of private paid employees by occupation (Professionals)
40  /  87  :  Share of public paid employees with a contract
50  /  87  :  Number of public paid employees
60  /  87  :  Number of paid employees
70  /  87  :  Public sector wage premium by occupation (Professionals) (compared to formal wage employees)
80  /  87  :  Relative wage of Technicians in private sector  (using clerk as reference) 
                  Public sector employment as a share of formal employment  \
country     date                                                             
Afghanistan 2016                                                NaN          
            2015                                                NaN          
            2014                                                NaN          
            2013          

In [52]:
# Pull, Print and Save ESG Data

esg_df = pd.DataFrame()

for num, indicator, name in zip(range(1, len(esg) + 1), esg_ids, esg_names):
    if num % 10 == 0:
        print(num, " / ", len(esg), " : ", name)
    try:
        df = wbdata.get_dataframe({indicator: name}, country=countries_individual_ids)
    except:
        print(indicator, "failed")
        pass
    esg_df = pd.concat([esg_df, df], axis = 1)
    
print(esg_df.head())

esg_df.to_csv('../../2_Inputs/Culled/esg_df.csv')

10  /  87  :  Public sector employment as a share of total employment by location (Rural)
20  /  87  :  Share of private paid employees with a contract
30  /  87  :  Females as a share of private paid employees by occupation (Professionals)
40  /  87  :  Share of public paid employees with a contract
50  /  87  :  Number of public paid employees
60  /  87  :  Number of paid employees
70  /  87  :  Public sector wage premium by occupation (Professionals) (compared to formal wage employees)
80  /  87  :  Relative wage of Technicians in private sector  (using clerk as reference) 
                  Public sector employment as a share of formal employment  \
country     date                                                             
Afghanistan 2016                                                NaN          
            2015                                                NaN          
            2014                                                NaN          
            2013          