This ipynb creates a tibble with all available indicators on the world bank database. Only three of these indicators are used, but in theory all of them can be. All the indicators can be used with processIndicator.csv to create a long-format tidy table with composite key (countryCode, year).

In [3]:
library(tidyverse)
library(magrittr)
library(glue)
library(rvest)
library(httr)

In [4]:
# using worlbanks api, request some indicators (recieves xml and stores as tree)
url = "http://api.worldbank.org/v2/indicator"
minitree <- read_xml(url)
minitree

{xml_document}
<indicators page="1" pages="351" per_page="50" total="17517" xmlns:wb="http://www.worldbank.org">
 [1] <wb:indicator id="1.0.HCount.1.90usd">\n  <wb:name>Poverty Headcount ($1 ...
 [2] <wb:indicator id="1.0.HCount.2.5usd">\n  <wb:name>Poverty Headcount ($2. ...
 [3] <wb:indicator id="1.0.HCount.Mid10to50">\n  <wb:name>Middle Class ($10-5 ...
 [4] <wb:indicator id="1.0.HCount.Ofcl">\n  <wb:name>Official Moderate Povert ...
 [5] <wb:indicator id="1.0.HCount.Poor4uds">\n  <wb:name>Poverty Headcount ($ ...
 [6] <wb:indicator id="1.0.HCount.Vul4to10">\n  <wb:name>Vulnerable ($4-10 a  ...
 [7] <wb:indicator id="1.0.PGap.1.90usd">\n  <wb:name>Poverty Gap ($1.90 a da ...
 [8] <wb:indicator id="1.0.PGap.2.5usd">\n  <wb:name>Poverty Gap ($2.50 a day ...
 [9] <wb:indicator id="1.0.PGap.Poor4uds">\n  <wb:name>Poverty Gap ($4 a day) ...
[10] <wb:indicator id="1.0.PSev.1.90usd">\n  <wb:name>Poverty Severity ($1.90 ...
[11] <wb:indicator id="1.0.PSev.2.5usd">\n  <wb:name>Poverty Severi

In [5]:
# get the total amount of ids available
minitree %>% xml_attrs()
total <- minitree %>% xml_attr("total")
total

In [6]:
# requests the indicators again, except with a per_page value equal to the total amount of indicators
# available (effectively presenting all indicators in a single xml)
url = glue("http://api.worldbank.org/v2/indicator?per_page={total}")
tree <- read_xml(url)

In [7]:
# from all the children in the tree (each indicator), retrieve the id value and store as all_ids
children <- tree %>% xml_children()
all_ids <- xml_attr(children, "id")
all_ids %>% head()

In [8]:
# repeat above, except extract the names of each indicator
children <- tree %>% xml_children()
all_names <- xml_text(children)
all_names %>% head()

In [9]:
# bind the two vectors of ids and names together. Since they were from the same page, and the order
# the children will be the same, these two vectors will be bind the ids and names for each indicator
# correctly
available_indicators <- cbind(all_ids, all_names)

In [10]:
# convert matrix to tibble and save as indicators.csv
available_indicators %<>% as_tibble()
available_indicators %>% write_csv("indicators.csv")