# Get development indicators from World Bank Databank

10 October 2019

API documentation can be accessed here: 

- https://datahelpdesk.worldbank.org/knowledgebase/articles/889392-about-the-indicators-api-documentation
- https://datahelpdesk.worldbank.org/knowledgebase/articles/898590-country-api-queries
- https://datahelpdesk.worldbank.org/knowledgebase/articles/898599-indicator-api-queries


- To request all countries: `http://api.worldbank.org/v2/country?format=json`
- general format for country call: `http://api.worldbank.org/v2/country/<ISO_CODE>?format=json`
- To request all indicators: `http://api.worldbank.org/v2/indicator`
- general format for indicator call: `http://api.worldbank.org/v2/indicators/<INDICATOR_CODE>?format=json`


In [1]:
import requests
import pandas as pd

## Get all WB countries

https://datahelpdesk.worldbank.org/knowledgebase/articles/898590-country-api-queries

In [2]:
allcountries_url = "http://api.worldbank.org/v2/country?format=json&per_page=400"
allcountries_raw = requests.get(allcountries_url)
allcountries = allcountries_raw.json()[1]

In [3]:
allcountries_todf = []

for c in allcountries:
    country = {'id': c['id'], 'iso2code': c['iso2Code'], 'name': c['name'], 
               'region_id': c['region']['id'], 'region_iso2code': c['region']['iso2code'], 'region_name': c['region']['value'], 
               'adminregion_id': c['adminregion']['id'], 'adminregion_iso2code': c['adminregion']['iso2code'], 'adminregion_name': c['adminregion']['value'], 
               'incomelevel_id': c['incomeLevel']['id'], 'incomelevel_iso2code': c['incomeLevel']['iso2code'], 'incomelevel_value': c['incomeLevel']['value'],
               'lendingtype_id': c['lendingType']['id'], 'lendingtype_iso2code': c['lendingType']['iso2code'], 'lendingtype_value': c['lendingType']['value'],
               'capitalcity': c['capitalCity'], 'longitude': c['longitude'], 'latitude': c['latitude']}
    allcountries_todf.append(country)

allcountries_df = pd.DataFrame(allcountries_todf)
allcountries_df.head()

Unnamed: 0,id,iso2code,name,region_id,region_iso2code,region_name,adminregion_id,adminregion_iso2code,adminregion_name,incomelevel_id,incomelevel_iso2code,incomelevel_value,lendingtype_id,lendingtype_iso2code,lendingtype_value,capitalcity,longitude,latitude
0,ABW,AW,Aruba,LCN,ZJ,Latin America & Caribbean,,,,HIC,XD,High income,LNX,XX,Not classified,Oranjestad,-70.0167,12.5167
1,AFG,AF,Afghanistan,SAS,8S,South Asia,SAS,8S,South Asia,LIC,XM,Low income,IDX,XI,IDA,Kabul,69.1761,34.5228
2,AFR,A9,Africa,,,Aggregates,,,,,,Aggregates,,,Aggregates,,,
3,AGO,AO,Angola,SSF,ZG,Sub-Saharan Africa,SSA,ZF,Sub-Saharan Africa (excluding high income),LMC,XN,Lower middle income,IBD,XF,IBRD,Luanda,13.242,-8.81155
4,ALB,AL,Albania,ECS,Z7,Europe & Central Asia,ECA,7E,Europe & Central Asia (excluding high income),UMC,XT,Upper middle income,IBD,XF,IBRD,Tirane,19.8172,41.3317


In [4]:
allcountries_df.columns

Index(['id', 'iso2code', 'name', 'region_id', 'region_iso2code', 'region_name',
       'adminregion_id', 'adminregion_iso2code', 'adminregion_name',
       'incomelevel_id', 'incomelevel_iso2code', 'incomelevel_value',
       'lendingtype_id', 'lendingtype_iso2code', 'lendingtype_value',
       'capitalcity', 'longitude', 'latitude'],
      dtype='object')

In [5]:
countries_df = allcountries_df[['id', 'iso2code', 'name', 'capitalcity', 'latitude', 'longitude', 
                                   'region_name', 'adminregion_name', 'incomelevel_value', 'lendingtype_value']]

region_df = allcountries_df[['region_id', 'region_iso2code', 'region_name']].drop_duplicates()
adminregion_df = allcountries_df[['adminregion_id', 'adminregion_iso2code', 'adminregion_name']].drop_duplicates()
incomelevel_df = allcountries_df[['incomelevel_id', 'incomelevel_iso2code', 'incomelevel_value']].drop_duplicates()
lendingtype_df = allcountries_df[['lendingtype_id', 'lendingtype_iso2code', 'lendingtype_value']].drop_duplicates()

In [6]:
countries_df.to_csv('../Data/WorldBank/Raw_API/countries_list.csv', index=False)
region_df.to_csv('../Data/WorldBank/Raw_API/region_codes.csv', index=False)
adminregion_df.to_csv('../Data/WorldBank/Raw_API/adminregion_codes.csv', index=False)
incomelevel_df.to_csv('../Data/WorldBank/Raw_API/incomelevel_codes.csv', index=False)
lendingtype_df.to_csv('../Data/WorldBank/Raw_API/lendinglevel_codes.csv', index=False)

## Get all WB indicators

https://datahelpdesk.worldbank.org/knowledgebase/articles/898599-indicator-api-queries

In [7]:
allindicators_url = "http://api.worldbank.org/v2/indicator?format=json&per_page=20000"
allindicators_raw = requests.get(allindicators_url)
allindicators = allindicators_raw.json()[1]

In [8]:
len(allindicators)

17298

In [9]:
allindicators_todf = []
indicator_topics = []
source_list = []
topic_list = []

for i in allindicators:
    source = i['source']
    source_list.append(source)
    
    topics = i['topics']
    if len(topics):
        topic_list.extend(topics)
    
    indicator = {'id': i['id'], 'name': i['name'], 'source': source['value'], 
                 'source_note': i['sourceNote'], 'source_organization': i['sourceOrganization']}
    
    
    if len(topics):
        for t in topics:
            try:
                indicator_topic = {'indicator_id': i['id'], 'topic_id': t['id'], 'topic_value': t['value']}
            except KeyError:
                indicator_topic = {'indicator_id': i['id'], 'topic_id': '', 'topic_value': ''}
            
            indicator_topics.append(indicator_topic)
    
    allindicators_todf.append(indicator)

allindicators_df = pd.DataFrame(allindicators_todf).drop_duplicates()
allindicators_df.head()

Unnamed: 0,id,name,source,source_note,source_organization
0,1.0.HCount.1.90usd,Poverty Headcount ($1.90 a day),LAC Equity Lab,The poverty headcount index measures the propo...,LAC Equity Lab tabulations of SEDLAC (CEDLAS a...
1,1.0.HCount.2.5usd,Poverty Headcount ($2.50 a day),LAC Equity Lab,The poverty headcount index measures the propo...,LAC Equity Lab tabulations of SEDLAC (CEDLAS a...
2,1.0.HCount.Mid10to50,Middle Class ($10-50 a day) Headcount,LAC Equity Lab,The poverty headcount index measures the propo...,LAC Equity Lab tabulations of SEDLAC (CEDLAS a...
3,1.0.HCount.Ofcl,Official Moderate Poverty Rate-National,LAC Equity Lab,The poverty headcount index measures the propo...,LAC Equity Lab tabulations of data from Nation...
4,1.0.HCount.Poor4uds,Poverty Headcount ($4 a day),LAC Equity Lab,The poverty headcount index measures the propo...,LAC Equity Lab tabulations of SEDLAC (CEDLAS a...


In [10]:
source_list_df = pd.DataFrame(source_list).drop_duplicates()
source_list_df['id'] = source_list_df['id'].astype(int)
source_list_df = source_list_df.sort_values(by=['id']).set_index('id')

In [11]:
topic_list_df = pd.DataFrame(topic_list).drop_duplicates().dropna()
topic_list_df['id'] = topic_list_df['id'].astype(int)
topic_list_df = topic_list_df.sort_values(by=['id']).set_index('id')

In [12]:
indicator_topics_df = pd.DataFrame(indicator_topics)

In [13]:
wdi = allindicators_df[allindicators_df['source'] == 'World Development Indicators']

In [14]:
wdi_wtopics = wdi.merge(indicator_topics_df, left_on='id', right_on='indicator_id')
wdi_wtopics = wdi_wtopics.drop(columns=['indicator_id', 'topic_id'])

In [15]:
wdi_wtopics['topic_value'].value_counts()

Health                               252
Economy & Growth                     247
Private Sector                       165
Gender                               163
Social Protection & Labor            156
Education                            154
Trade                                146
Environment                          135
Public Sector                         97
Climate Change                        76
Aid Effectiveness                     70
Financial Sector                      64
External Debt                         61
Energy & Mining                       50
Infrastructure                        47
Agriculture & Rural Development       44
Social Development                    34
Poverty                               25
Urban Development                     21
Science & Technology                  13
Millenium development goals            5
Name: topic_value, dtype: int64

In [16]:
allindicators_df.to_csv('../Data/WorldBank/Raw_API/indicators_list.csv', index=False)
source_list_df.to_csv('../Data/WorldBank/Raw_API/indicator-sources_codes.csv', index=False)
topic_list_df.to_csv('../Data/WorldBank/Raw_API/indicator-topics_codes.csv', index=False)
wdi_wtopics.to_csv('../Data/WorldBank/Raw_API/indicators-wdi-wtopics.csv', index=False)