#PART 1: Plotting World Bank data on a world map using Python

## Import the necessary libraries

If you haven't used the python libraries listed below, you will need to import them by typing in Anaconda's or Python's command line: 'pip install wbdata', 'pip install pandas', etc.

Documentation of the world bank data api: http://wbdata.readthedocs.org/en/latest/index.html#

To set up the absolutely awesome plotly library for interactive web plots, start here: https://plot.ly/python/


In [105]:
%matplotlib inline

import wbdata
import pandas as pd
import numpy as np
import datetime
import plotly.plotly as py


## Explore the database

In [106]:
#explore database topics:
wbdata.get_source()

11	Africa Development Indicators
36	Statistical Capacity Indicators
31	Country Policy and Institutional Assessment (CPIA) 
41	Country Partnership Strategy for India 
26	Corporate Scorecard
1 	Doing Business
30	Exporter Dynamics Database: Country-Year
12	Education Statistics
13	Enterprise Surveys
28	Global Findex ( Global Financial Inclusion database)
33	G20 Basic Set of Financial Inclusion Indicators
14	Gender Statistics
15	Global Economic Monitor
27	GEP Economic Prospects
32	Global Financial Development
21	Global Economic Monitor (GEM) Commodities
34	Global Partnership for Education
29	Global Social Protection
16	Health Nutrition and Population Statistics
39	Health Nutrition and Population Statistics by Wealth Quintile
40	Health Nutrition and Population Statistics: Population estimates and projections
18	International Development Association - Results Measurement System
45	INDO-DAPOER
6 	International Debt Statistics
25	Jobs for Knowledge Platform
37	LAC Equity Lab
19	Millennium Devel

In [120]:
# List indicators in a selected topic:
wbdata.get_indicator(source=5)

SN.SH.STA.MALN.ZS	Sub-National Malnutrition prevalence, weight for age (% of children under 5)
SN.SH.STA.OWGH.ZS	Sub-National Prevalence of overweight (% of children under 5)
SN.SH.STA.STNT.ZS	Sub-National Malnutrition prevalence, height for age (% of children under 5)
SN.SH.STA.WAST.ZS	Sub-National Prevalence of wasting (% of children under 5)
SN.SH.SVR.WAST.ZS	Sub-National Prevalence of severe wasting, weight for height (% of children under 5)


In [108]:
# WHICH INDICATOR?
#Search for a given indicator:
wbdata.search_indicators("life expectancy at birth")

SP.DYN.LE00.FE.IN	Life expectancy at birth, female (years)
SP.DYN.LE00.IN   	Life expectancy at birth, total (years)
SP.DYN.LE00.MA.IN	Life expectancy at birth, male (years)


In [109]:
# INPUT PARAMETERS
indicatorCode  = 'SP.DYN.LE00.IN'
indicatorLabel = 'Life expectancy'
unit           = 'Years'
yearLow        = 2010
yearHigh       = 2010

In [110]:
# WHICH COUNTRY CODE?
#obtain country codes and regional metadata:
countryInfo = wbdata.get_country(country_id=None, display=False)
dfInfo = pd.DataFrame(countryInfo)
dfInfo.iloc[:5]

Unnamed: 0,adminregion,capitalCity,id,incomeLevel,iso2Code,latitude,lendingType,longitude,name,region
0,"{u'id': u'', u'value': u''}",Oranjestad,ABW,"{u'id': u'NOC', u'value': u'High income: nonOE...",AW,12.5167,"{u'id': u'LNX', u'value': u'Not classified'}",-70.0167,Aruba,"{u'id': u'LCN', u'value': u'Latin America & Ca..."
1,"{u'id': u'SAS', u'value': u'South Asia'}",Kabul,AFG,"{u'id': u'LIC', u'value': u'Low income'}",AF,34.5228,"{u'id': u'IDX', u'value': u'IDA'}",69.1761,Afghanistan,"{u'id': u'SAS', u'value': u'South Asia'}"
2,"{u'id': u'', u'value': u''}",,AFR,"{u'id': u'NA', u'value': u'Aggregates'}",A9,,"{u'id': u'', u'value': u'Aggregates'}",,Africa,"{u'id': u'NA', u'value': u'Aggregates'}"
3,"{u'id': u'SSA', u'value': u'Sub-Saharan Africa...",Luanda,AGO,"{u'id': u'UMC', u'value': u'Upper middle income'}",AO,-8.81155,"{u'id': u'IBD', u'value': u'IBRD'}",13.242,Angola,"{u'id': u'SSF', u'value': u'Sub-Saharan Africa..."
4,"{u'id': u'ECA', u'value': u'Europe & Central A...",Tirane,ALB,"{u'id': u'UMC', u'value': u'Upper middle income'}",AL,41.3317,"{u'id': u'IBD', u'value': u'IBRD'}",19.8172,Albania,"{u'id': u'ECS', u'value': u'Europe & Central A..."


In [122]:
#obtain dataframe of country codes that are real countries and not aggregates:
ccodes = [];
cnames = [];
for i in range(len(dfInfo)):
    if str(dfInfo['region'][i]['value'])  != 'Aggregates':
        cix.append(i)
        ccodes.append(dfInfo['id'][i])
        cnames.append(dfInfo['name'][i])
dfCC = pd.DataFrame(ccodes, index=cnames, columns=['id'])
dfCC.iloc[:5]

Unnamed: 0,id
Aruba,ABW
Afghanistan,AFG
Angola,AGO
Albania,ALB
Andorra,AND


## Obtain data from World Bank

In [112]:
# GIVEN THE LIST OF COUNTRY CODES, YEAR(S) AND INDICATOR(S), OBTAIN DATA FROM DATABASE:

indicators = {indicatorCode:indicatorLabel} 
data_date = (datetime.datetime(yearLow, 1, 1), datetime.datetime(yearHigh, 12, 1)) 

df = wbdata.get_dataframe(indicators, country = ccodes, data_date=data_date, convert_date=False, keep_levels=False) 
d = {indicatorLabel : list(df[indicatorLabel]),'COUNTRY' : list(df.index)}
dff = pd.DataFrame(d)
dff.index = dff['COUNTRY']
dff.iloc[:5]

Unnamed: 0_level_0,COUNTRY,Life expectancy
COUNTRY,Unnamed: 1_level_1,Unnamed: 2_level_1
Aruba,Aruba,74.952024
Andorra,Andorra,
Afghanistan,Afghanistan,59.600098
Angola,Angola,50.654171
Albania,Albania,76.978512


In [113]:
# ADD COUNTRY CODE TO DATAFRAME
result = pd.concat([dff, dfCC['id']], axis=1, ignore_index=True)
result.columns = ['COUNTRY', indicatorLabel,'CODE']
result = result.dropna()
result.iloc[:5]

Unnamed: 0,COUNTRY,Life expectancy,CODE
Afghanistan,Afghanistan,59.600098,AFG
Albania,Albania,76.978512,ALB
Algeria,Algeria,70.61661,DZA
Angola,Angola,50.654171,AGO
Antigua and Barbuda,Antigua and Barbuda,75.333902,ATG


## Plot data on a world map

In [115]:
# GENERATE CHOROPLETH MAP

data = [ dict(
        type = 'choropleth',
        locations = df2['CODE'],
        z = df2[indicatorLabel],
        text = df2['COUNTRY'],
        colorscale = [[0,"rgb(5, 10, 172)"],[0.35,"rgb(40, 60, 190)"],[0.5,"rgb(70, 100, 245)"],\
            [0.6,"rgb(90, 120, 245)"],[0.7,"rgb(106, 137, 247)"],[1,"rgb(220, 220, 220)"]],
        autocolorscale = False,
        reversescale = True,
        marker = dict(
            line = dict (
                color = 'rgb(180,180,180)',
                width = 0.5
            )
        ),
        colorbar = dict(
            autotick = False,
            tickprefix = ' ',
            title = unit
        ),
    ) ]

layout = dict(
    title = indicatorLabel + ' of babies born in ' + str(yearLow) +  '<br>Data source: \
<a href="http://www.worldbank.org/">\
World Bank</a>',
    geo = dict(
        showframe = False,
        showcoastlines = False,
        projection = dict(
            type = 'Mercator'
        )
    )
)

fig = dict( data=data, layout=layout )
py.iplot( fig, validate=False, filename='d3-world-map' )

Using this modified command below, the plot will appear in a new window with a functional link you could use in your web page. So cool:

In [None]:
py.plot( fig, validate=False, filename='d3-world-map' )