In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from math import sqrt
import wbdata                                                                 
import scipy.stats as stats
import warnings
warnings.filterwarnings("ignore")


print('Initialized')


Initialized


In [2]:
# Get a list of sources
wbdata.get_source()    

  id  name
----  --------------------------------------------------------------------
   1  Doing Business
   2  World Development Indicators
   3  Worldwide Governance Indicators
   5  Subnational Malnutrition Database
   6  International Debt Statistics
  11  Africa Development Indicators
  12  Education Statistics
  13  Enterprise Surveys
  14  Gender Statistics
  15  Global Economic Monitor
  16  Health Nutrition and Population Statistics
  18  IDA Results Measurement System
  19  Millennium Development Goals
  20  Quarterly Public Sector Debt
  22  Quarterly External Debt Statistics SDDS
  23  Quarterly External Debt Statistics GDDS
  24  Poverty and Equity
  25  Jobs
  27  Global Economic Prospects
  28  Global Financial Inclusion
  29  The Atlas of Social Protection: Indicators of Resilience and Equity
  30  Exporter Dynamics Database – Indicators at Country-Year Level
  31  Country Policy and Institutional Assessment
  32  Global Financial Development
  33  G20 Financial Inclus

In [94]:
# Get Doing Business data list

wbdata.get_indicator(source=12)  

id                         name
-------------------------  ---------------------------------------------------------------------------------------------------------------------------------------------------
fin1.t.a.1                 Financial institution account,male(% age 15+)
fin1.t.a.2                 Financial institution account,female(% age 15+)
fin18.t.d.1                Saved any money in the past year, male  (% age 15+)
fin18.t.d.2                Saved any money in the past year, female  (% age 15+)
FP.CPI.TOTL.ZG             Inflation, consumer prices (annual %)
FX.OWN.TOTL.FE.ZS          Account ownership at a financial institution or with a mobile-money-service provider, female (% of population ages 15+)
FX.OWN.TOTL.MA.ZS          Account ownership at a financial institution or with a mobile-money-service provider, male (% of population ages 15+)
HD.HCI.AMRT                Survival Rate from Age 15-60
HD.HCI.AMRT.FE             Survival Rate from Age 15-60, Female
HD.HCI.A

In [4]:
# use the serach function to search for united
wbdata.search_countries('united')   

id    name
----  --------------------
ARE   United Arab Emirates
GBR   United Kingdom
USA   United States

In [5]:
# Use the built in get_data request function to pull the data for 
# Ease of doing business index (1=most business-friendly regulations) 
# Will need a code cheat sheet when using this tool.

wbdata.get_data("IC.BUS.EASE.XQ", country="USA")

[{'indicator': {'id': 'IC.BUS.EASE.XQ',
   'value': 'Ease of doing business index (1=most business-friendly regulations)'},
  'country': {'id': 'US', 'value': 'United States'},
  'countryiso3code': 'USA',
  'date': '2020',
  'value': None,
  'unit': '',
  'obs_status': '',
  'decimal': 0},
 {'indicator': {'id': 'IC.BUS.EASE.XQ',
   'value': 'Ease of doing business index (1=most business-friendly regulations)'},
  'country': {'id': 'US', 'value': 'United States'},
  'countryiso3code': 'USA',
  'date': '2019',
  'value': 6,
  'unit': '',
  'obs_status': '',
  'decimal': 0},
 {'indicator': {'id': 'IC.BUS.EASE.XQ',
   'value': 'Ease of doing business index (1=most business-friendly regulations)'},
  'country': {'id': 'US', 'value': 'United States'},
  'countryiso3code': 'USA',
  'date': '2018',
  'value': None,
  'unit': '',
  'obs_status': '',
  'decimal': 0},
 {'indicator': {'id': 'IC.BUS.EASE.XQ',
   'value': 'Ease of doing business index (1=most business-friendly regulations)'},
  'cou

In [6]:
# Create a datetime object to be used in data queries

import datetime

data_date = datetime.datetime(2010, 1, 1), datetime.datetime(2011, 1, 1)

wbdata.get_data("IC.BUS.EASE.XQ", country=["USA", "GBR"], data_date=data_date)

[{'indicator': {'id': 'IC.BUS.EASE.XQ',
   'value': 'Ease of doing business index (1=most business-friendly regulations)'},
  'country': {'id': 'GB', 'value': 'United Kingdom'},
  'countryiso3code': 'GBR',
  'date': '2011',
  'value': None,
  'unit': '',
  'obs_status': '',
  'decimal': 0},
 {'indicator': {'id': 'IC.BUS.EASE.XQ',
   'value': 'Ease of doing business index (1=most business-friendly regulations)'},
  'country': {'id': 'GB', 'value': 'United Kingdom'},
  'countryiso3code': 'GBR',
  'date': '2010',
  'value': None,
  'unit': '',
  'obs_status': '',
  'decimal': 0},
 {'indicator': {'id': 'IC.BUS.EASE.XQ',
   'value': 'Ease of doing business index (1=most business-friendly regulations)'},
  'country': {'id': 'US', 'value': 'United States'},
  'countryiso3code': 'USA',
  'date': '2011',
  'value': None,
  'unit': '',
  'obs_status': '',
  'decimal': 0},
 {'indicator': {'id': 'IC.BUS.EASE.XQ',
   'value': 'Ease of doing business index (1=most business-friendly regulations)'},
 

In [7]:
wbdata.search_indicators("gdp per capita")

id                          name
--------------------------  ----------------------------------------------------------------------------------------
6.0.GDPpc_constant          GDP per capita, PPP (constant 2011 international $)
FB.DPT.INSU.PC.ZS           Deposit insurance coverage (% of GDP per capita)
NV.AGR.PCAP.KD.ZG           Real agricultural GDP per capita growth rate (%)
NY.GDP.PCAP.CD              GDP per capita (current US$)
NY.GDP.PCAP.CN              GDP per capita (current LCU)
NY.GDP.PCAP.KD              GDP per capita (constant 2010 US$)
NY.GDP.PCAP.KD.ZG           GDP per capita growth (annual %)
NY.GDP.PCAP.KN              GDP per capita (constant LCU)
NY.GDP.PCAP.PP.CD           GDP per capita, PPP (current international $)
NY.GDP.PCAP.PP.KD           GDP per capita, PPP (constant 2017 international $)
NY.GDP.PCAP.PP.KD.87        GDP per capita, PPP (constant 1987 international $)
NY.GDP.PCAP.PP.KD.ZG        GDP per capita, PPP annual growth (%)
SE.XPD.PRIM.PC.ZS   

In [8]:
wbdata.get_incomelevel() 

id    value
----  -------------------
HIC   High income
INX   Not classified
LIC   Low income
LMC   Lower middle income
LMY   Low & middle income
MIC   Middle income
UMC   Upper middle income

In [9]:
countries = [i['id'] for i in wbdata.get_country(incomelevel='HIC')] 

indicators = {"IC.BUS.EASE.XQ": "doing_business", "NY.GDP.PCAP.PP.KD": "gdppc"}         

df = wbdata.get_dataframe(indicators, country=countries, convert_date=True)   

df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,doing_business,gdppc
country,date,Unnamed: 2_level_1,Unnamed: 3_level_1
Aruba,2020-01-01,,
Aruba,2019-01-01,,
Aruba,2018-01-01,,
Aruba,2017-01-01,,38442.413838
Aruba,2016-01-01,,37866.173853


In [10]:
df.describe()

Unnamed: 0,doing_business,gdppc
count,59.0,1872.0
mean,49.033898,39381.05927
std,37.228542,21392.159807
min,1.0,4595.019036
25%,19.5,24642.291429
50%,41.0,35922.469708
75%,70.0,48434.438839
max,145.0,161938.749262


In [11]:
df[df['doing_business'].notnull()]

Unnamed: 0_level_0,Unnamed: 1_level_0,doing_business,gdppc
country,date,Unnamed: 2_level_1,Unnamed: 3_level_1
United Arab Emirates,2019-01-01,16.0,67119.132637
Antigua and Barbuda,2019-01-01,113.0,21910.184985
Australia,2019-01-01,14.0,49854.090319
Austria,2019-01-01,27.0,56187.584049
Belgium,2019-01-01,46.0,51934.125463
Bahrain,2019-01-01,43.0,45010.711495
"Bahamas, The",2019-01-01,119.0,37100.743985
Barbados,2019-01-01,128.0,15638.829897
Brunei Darussalam,2019-01-01,66.0,62099.587889
Canada,2019-01-01,23.0,49031.379925


In [12]:
cor = wbdata.get_dataframe(indicators, country=countries, convert_date=True) 

cor.sort_index().groupby('country').last().corr()

Unnamed: 0,doing_business,gdppc
doing_business,1.0,-0.386135
gdppc,-0.386135,1.0


# Self explore

In [58]:
wbdata.get_indicator(source=6)  

id                   name
-------------------  ---------------------------------------------------------------------------------------------------
BM.GSR.TOTL.CD       Imports of goods, services and primary income (BoP, current US$)
BN.CAB.XOKA.CD       Current account balance (BoP, current US$)
BX.GRT.EXTA.CD.DT    Grants, excluding technical cooperation (current US$)
BX.GRT.TECH.CD.DT    Technical cooperation grants (current US$)
BX.GSR.TOTL.CD       Exports of goods, services and primary income (BoP, current US$)
BX.KLT.DINV.CD.DT    Foreign direct investment, net inflows in reporting economy (DRS, current US$)
BX.KLT.DREM.CD.DT    Primary income on FDI (current US$)
BX.PEF.TOTL.CD.DT    Portfolio investment, equity (DRS, current US$)
BX.TRF.PWKR.CD.DT    Personal remittances, received (current US$)
DT.AMT.BLAT.CD       PPG, bilateral (AMT, current US$)
DT.AMT.BLAT.GG.CD    GG, bilateral (AMT, current US$)
DT.AMT.BLAT.OPS.CD   OPS, bilateral (AMT, current US$)
DT.AMT.BLAT.PRVG.CD  P

In [None]:
wbdata.get_data("BM.GSR.TOTL.CD")

In [95]:
indicators = {"BM.GSR.TOTL.CD": "import_dollars", 
              "NY.GNP.MKTP.CD":"gni", 
              "NY.GDP.PCAP.CD":"gdp_percapita", 
              "IQ.CPA.GNDR.XQ":"gender_equality",
             "IQ.CPA.TRAN.XQ":"cpia_raing",
             "GV.TI.SCOR.IDX":"corruption_perceptions_index",
             "CC.EST":"control_corruption_est",
             "IC.FRM.COR.ZS":"firms_id_corruption",
             "IC.FRM.OBS.OBST4":"firms_percent_corruption_biggest",
             "BX.TRF.MGR.DT.GD.ZS":"remittance_pecent_gdp",
             "BAR.PRM.SCHL.1519":"avg_years_primary_school_15-19",
             "BAR.PRM.SCHL.1519.FE":"avg_years_primary_school_15-19F",
             "BAR.SEC.SCHL.2024":"avg_years_secondary_school_20-24",
             "BAR.SEC.SCHL.1519.FE":"avg_years_secondary_school_20-24F",
             "FI.RES.TOTL.CD":"total_reserves",
             "BG.GSR.NFSV.GD.ZS":"trade_services_pecent_gdp",
             "FB.BNK.CAPA.ZS":"bank_capital_asset_ratio",
             "FI.RES.XGLD.CD":"total_reserves_sans_gold"}         

df = wbdata.get_dataframe(indicators, convert_date=True)   
df = df[df["gdp_percapita"].notnull()].reset_index()
df

Unnamed: 0,country,date,import_dollars,gni,gdp_percapita,gender_equality,cpia_raing,corruption_perceptions_index,control_corruption_est,firms_id_corruption,...,avg_years_primary_school_15-19,avg_years_primary_school_15-19F,avg_years_secondary_school_20-24,avg_years_secondary_school_20-24F,total_reserves,trade_services_pecent_gdp,bank_capital_asset_ratio,total_reserves_sans_gold,school_enroll_net_f,school_enroll_net_m
0,Afghanistan,1960-01-01,,5.488888e+08,59.773194,,,,,,...,,,,,5.069080e+07,,,1.000000e+07,,
1,Afghanistan,1961-01-01,,5.600000e+08,59.860874,,,,,,...,,,,,4.244450e+07,,,6.240000e+06,,
2,Afghanistan,1962-01-01,,5.577778e+08,58.458015,,,,,,...,,,,,4.059210e+07,,,4.470000e+06,,
3,Afghanistan,1963-01-01,,7.666667e+08,78.706388,,,,,,...,,,,,4.554780e+07,,,9.240000e+06,,
4,Afghanistan,1964-01-01,,8.155556e+08,82.095231,,,,,,...,,,,,4.461968e+07,,,8.130000e+06,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12393,Zimbabwe,2015-01-01,7.873438e+09,1.798077e+10,1445.071062,4.0,2.0,,-1.310461,,...,,,,,4.345861e+08,9.569889,,4.181497e+08,,
12394,Zimbabwe,2016-01-01,6.825338e+09,1.877295e+10,1464.583529,4.0,2.0,,-1.259387,,...,,,,,4.071932e+08,8.080966,,4.067314e+08,,
12395,Zimbabwe,2017-01-01,6.950997e+09,2.007960e+10,1548.170056,4.0,2.0,,-1.272736,,...,,,,,2.926212e+08,6.858633,,2.920988e+08,,
12396,Zimbabwe,2018-01-01,,2.270665e+10,1683.740577,4.0,2.5,,-1.234743,,...,,,,,8.695109e+07,,,8.643459e+07,,


In [96]:
df[df['country']=='United States']

Unnamed: 0,country,date,import_dollars,gni,gdp_percapita,gender_equality,cpia_raing,corruption_perceptions_index,control_corruption_est,firms_id_corruption,...,avg_years_primary_school_15-19,avg_years_primary_school_15-19F,avg_years_secondary_school_20-24,avg_years_secondary_school_20-24F,total_reserves,trade_services_pecent_gdp,bank_capital_asset_ratio,total_reserves_sans_gold,school_enroll_net_f,school_enroll_net_m
11806,United States,1960-01-01,,546400000000.0,3007.123445,,,,,,...,,,,,19664160000.0,,,1554800000.0,,
11807,United States,1961-01-01,,566800000000.0,3066.562869,,,,,,...,,,,,18825530000.0,,,1805900000.0,,
11808,United States,1962-01-01,,609200000000.0,3243.843078,,,,,,...,,,,,17252460000.0,,,1163400000.0,,
11809,United States,1963-01-01,,643100000000.0,3374.515171,,,,,,...,,,,,16878650000.0,,,1247000000.0,,
11810,United States,1964-01-01,,690700000000.0,3573.941185,,,,,,...,,,,,16725490000.0,,,1201400000.0,,
11811,United States,1965-01-01,,749000000000.0,3827.52711,,,,,,...,,,,,15498320000.0,,,1385000000.0,,
11812,United States,1966-01-01,,820100000000.0,4146.316646,,,,,,...,,,,,14953450000.0,,,1646700000.0,,
11813,United States,1967-01-01,,867100000000.0,4336.426587,,,,,,...,,,,,14898990000.0,,,2765200000.0,,
11814,United States,1968-01-01,,948600000000.0,4695.92339,,,,,,...,,,,,17857180000.0,,,4817900000.0,,
11815,United States,1969-01-01,,1026000000000.0,5032.144743,,,,,,...,,,,,17031420000.0,,,5104600000.0,,


In [54]:
wbdata.search_indicators("procure")

id                               name
-------------------------------  ------------------------------------------------------------------------------------------------------------------------
9.4_PROCUREMENT.COUNTRY.SYSTEMS  Use of procurement country systems (% of total international aid to education)
IQ.PPN.REGQ.S1                   Assessment of country’s adherence to the best regulatory practices at the procurement stage of PPP project (scale 1-100)
IQ.PPN.REGQ.S3                   Assessment of country’s adherence to the best regulatory practices, procurement of unsolicited proposals (scale 1-100)
PI-19                            Competition, value for money and controls in procurement
PI-19.2                          (ii) Use of competitive procurement methods
PI-19.3                          (iii) Public access to complete, reliable and timely procurement information
PI-19.4                          (iv) Existence of an independent administrative procurement complaints system
PI

In [57]:
wbdata.search_indicators("procurement")

id                               name
-------------------------------  ------------------------------------------------------------------------------------------------------------------------
9.4_PROCUREMENT.COUNTRY.SYSTEMS  Use of procurement country systems (% of total international aid to education)
IQ.PPN.REGQ.S1                   Assessment of country’s adherence to the best regulatory practices at the procurement stage of PPP project (scale 1-100)
IQ.PPN.REGQ.S3                   Assessment of country’s adherence to the best regulatory practices, procurement of unsolicited proposals (scale 1-100)
PI-19                            Competition, value for money and controls in procurement
PI-19.2                          (ii) Use of competitive procurement methods
PI-19.3                          (iii) Public access to complete, reliable and timely procurement information
PI-19.4                          (iv) Existence of an independent administrative procurement complaints system
PI

## Notes
* Avg year of schooling calculated every 5 years
* United States doesn't have much of the data so far
* 
