### Retrieve indicators from World Bank Data

*There are a lot of exploratory chunks. Leave them until we know what to exactly retrieve from the API.*

In [1]:
import wbgapi as wb
import pandas as pd

import os

In [2]:
landing_zone = os.getcwd()
temporal = os.path.join(landing_zone, 'temporal')
persistent = os.path.join(landing_zone, 'persistent')

Metadata for the indicators id:

In [3]:
wb.source.info()

id,name,code,concepts,lastupdated
1.0,Doing Business,DBS,3.0,2021-08-18
2.0,World Development Indicators,WDI,3.0,2021-12-16
3.0,Worldwide Governance Indicators,WGI,3.0,2021-09-27
5.0,Subnational Malnutrition Database,SNM,3.0,2016-03-21
6.0,International Debt Statistics,IDS,4.0,2022-01-14
11.0,Africa Development Indicators,ADI,3.0,2013-02-22
12.0,Education Statistics,EDS,3.0,2020-12-20
13.0,Enterprise Surveys,ESY,3.0,2021-04-02
14.0,Gender Statistics,GDS,3.0,2021-12-18
15.0,Global Economic Monitor,GEM,3.0,2020-07-27


In [4]:
wb.topic.info()

id,value
1.0,Agriculture & Rural Development
2.0,Aid Effectiveness
3.0,Economy & Growth
4.0,Education
5.0,Energy & Mining
6.0,Environment
7.0,Financial Sector
8.0,Health
9.0,Infrastructure
10.0,Social Protection & Labor


In [5]:
wb.series.info(wb.topic.members(11)) # Poverty

id,value
SI.SPR.PC40.ZG,"Annualized average growth rate in per capita real survey mean consumption or income, bottom 40% of population (%)"
SI.SPR.PCAP.ZG,"Annualized average growth rate in per capita real survey mean consumption or income, total population (%)"
SI.POV.GINI,Gini index (World Bank estimate)
SI.DST.04TH.20,Income share held by fourth 20%
SI.DST.10TH.10,Income share held by highest 10%
SI.DST.05TH.20,Income share held by highest 20%
SI.DST.FRST.10,Income share held by lowest 10%
SI.DST.FRST.20,Income share held by lowest 20%
SI.DST.02ND.20,Income share held by second 20%
SI.DST.03RD.20,Income share held by third 20%


Choose one of the 21 topics provided by the world bank data for a year range of choice (1960-2020): 

In [6]:
topic_number = 11 # Poverty
years = range(2000, 2019) # Until pre-covid

print(wb.topic.get(topic_number)) # Shows content of topic

{'id': '11', 'value': 'Poverty', 'sourceNote': 'For countries with an active poverty monitoring program, the World Bank—in collaboration with national institutions, other development agencies, and civil society—regularly conducts analytical work to assess the extent and causes of poverty and inequality, examine the impact of growth and public policy, and review household survey data and measurement methods.  Data here includes poverty and inequality measures generated from analytical reports, from national poverty monitoring programs, and from the World Bank’s Development Research Group which has been producing internationally comparable and global poverty estimates and lines since 1990.'}


Generates a dataframe and saves it as .csv-file in the landing zone:

But first, subset the indicators:

In [7]:
wb.region.info(q='asia')

code,name
CEA,East Asia and the Pacific (IFC classification)
CEU,Europe and Central Asia (IFC classification)
CSA,South Asia (IFC classification)
EAP,East Asia & Pacific (excluding high income)
EAS,East Asia & Pacific
ECA,Europe & Central Asia (excluding high income)
ECS,Europe & Central Asia
SAS,South Asia
,8 elements


In [8]:
wb.economy.info(wb.region.members('CEA'))

id,value,region,incomeLevel
ASM,American Samoa,EAS,UMC
BRN,Brunei Darussalam,EAS,HIC
KHM,Cambodia,EAS,LMC
CHN,China,EAS,UMC
FJI,Fiji,EAS,UMC
PYF,French Polynesia,EAS,HIC
GUM,Guam,EAS,HIC
HKG,"Hong Kong SAR, China",EAS,HIC
IDN,Indonesia,EAS,LMC
KIR,Kiribati,EAS,LMC


In [9]:
wb.economy.info(wb.region.members('CSA'))

id,value,region,incomeLevel
BGD,Bangladesh,SAS,LMC
BTN,Bhutan,SAS,LMC
IND,India,SAS,LMC
MDV,Maldives,SAS,UMC
NPL,Nepal,SAS,LMC
LKA,Sri Lanka,SAS,LMC
,6 elements,,


In [27]:
indicators = wb.topic.members(11).union('SI_POV_GINI', 'NE_IMP_GNFS_KN', 'BX_GSR_GNFS_CD', 'NY_GDP_MKTP_CD', 'SP_ADO_TFRT', 'SL_TLF_0714_ZS', 'SP_DYN_LE00_FE_IN', 'SP_DYN_LE00_MA_IN')
countries = wb.region.members('CEA').union(wb.region.members('CSA')) # Union since they are sets

In [None]:
indicators

In [29]:
df = wb.data.DataFrame(indicators, countries, years, columns='series').reset_index() #   seconds

  self.obj[key] = infer_fill_value(value)


In [12]:
df.head()

Unnamed: 0,economy,time,EN.POP.SLUM.UR.ZS,SI.DST.02ND.20,SI.DST.03RD.20,SI.DST.04TH.20,SI.DST.05TH.20,SI.DST.10TH.10,SI.DST.50MD,SI.DST.FRST.10,...,SI.POV.MDIM.IT,SI.POV.MDIM.MA,SI.POV.MDIM.XQ,SI.POV.NAHC,SI.POV.UMIC,SI.POV.UMIC.GP,SI.SPR.PC40,SI.SPR.PC40.ZG,SI.SPR.PCAP,SI.SPR.PCAP.ZG
0,ASM,YR2000,,,,,,,,,...,,,,,,,,,,
1,ASM,YR2001,,,,,,,,,...,,,,,,,,,,
2,ASM,YR2002,,,,,,,,,...,,,,,,,,,,
3,ASM,YR2003,,,,,,,,,...,,,,,,,,,,
4,ASM,YR2004,,,,,,,,,...,,,,,,,,,,


In [26]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 741 entries, 0 to 740
Columns: 319 entries, economy to SP_DYN_LE00_MA_IN
dtypes: float64(317), int32(1), object(1)
memory usage: 1.8+ MB


In [22]:
df['time'] = df['time'].str[-4:].astype(int)

In [23]:
regions = ['CEA', 'CSA']
"wbd_{}_{}_{}.csv".format(topic, regions, years)

"wbd_Poverty_['CEA', 'CSA']_range(2000, 2019).csv"

In [24]:
df.columns = df.columns.str.replace(".", "_")

  df.columns = df.columns.str.replace(".", "_")


In [25]:
df.columns

Index(['economy', 'time', 'BG_GSR_NFSV_GD_ZS', 'BM_GSR_CMCP_ZS',
       'BM_GSR_FCTY_CD', 'BM_GSR_GNFS_CD', 'BM_GSR_INSF_ZS', 'BM_GSR_MRCH_CD',
       'BM_GSR_NFSV_CD', 'BM_GSR_ROYL_CD',
       ...
       'SL_TLF_ACTI_ZS', 'SL_TLF_CACT_FE_ZS', 'SL_TLF_CACT_MA_ZS',
       'SL_UEM_TOTL_FE_ZS', 'SL_UEM_TOTL_MA_ZS', 'SM_POP_REFG',
       'SM_POP_REFG_OR', 'SP_ADO_TFRT', 'SP_DYN_LE00_FE_IN',
       'SP_DYN_LE00_MA_IN'],
      dtype='object', length=319)

In [32]:
df.to_csv(os.path.join(temporal, "wbd.csv"), index=False)

WGI

In [5]:
import pandas as pd

df = pd.read_stata(os.path.join(persistent, "wgi_20220101.dta"))

Unnamed: 0,code,countryname,year,vae,vas,van,var,val,vau,pve,...,rln,rlr,rll,rlu,cce,ccs,ccn,ccr,ccl,ccu
0,ABW,Aruba,1996,,,,,,,,...,,,,,,,,,,
1,ABW,Aruba,1998,,,,,,,,...,,,,,,,,,,
2,ABW,Aruba,2000,,,,,,,,...,,,,,,,,,,
3,ABW,Aruba,2002,,,,,,,,...,,,,,,,,,,
4,ABW,Aruba,2003,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4703,ZWE,Zimbabwe,2016,-1.177471,0.119457,15.0,14.778325,10.344828,22.167488,-0.619938,...,15.0,8.173077,4.326923,11.057693,-1.259222,0.135454,15.0,9.615385,4.807693,12.980769
4704,ZWE,Zimbabwe,2017,-1.195013,0.119808,15.0,14.778325,10.837439,21.674877,-0.708769,...,15.0,7.692307,4.326923,10.096154,-1.271278,0.125116,15.0,9.615385,4.326923,12.500000
4705,ZWE,Zimbabwe,2018,-1.140979,0.123411,15.0,16.908213,12.077294,21.739130,-0.722774,...,15.0,8.173077,5.769231,14.423077,-1.230871,0.127296,15.0,10.096154,5.288462,14.423077
4706,ZWE,Zimbabwe,2019,-1.159892,0.117649,14.0,16.425121,12.077294,21.739130,-0.959749,...,15.0,8.173077,5.288462,14.423077,-1.236825,0.135088,15.0,10.096154,4.326923,15.384615
