# Fetching data from GHO


In [1]:
from ghoclient import GHOSession, index
import pandas as pd
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
GC = GHOSession()

## Listing indicator codes

In [3]:
codes = GC.get_data_codes(format='dataframe')
codes.head()

Unnamed: 0,@Label,@DisplaySequence,@URL,Attr,Display
0,MDG_0000000001,2,https://www.who.int/data/gho/indicator-metadat...,"[{'@Category': 'DISPLAY_FR', 'Value': {'Displa...",Infant mortality rate (probability of dying be...
1,MDG_0000000003,5,https://www.who.int/data/gho/indicator-metadat...,"[{'@Category': 'DEFINITION_XML', 'Value': {'Di...",Adolescent birth rate (per 1000 women aged 15-...
2,MDG_0000000005,10,https://www.who.int/data/gho/indicator-metadat...,"[{'@Category': 'CATEGORY', 'Value': {'Display'...",Contraceptive prevalence (%)
3,MDG_0000000006,5,https://www.who.int/data/gho/indicator-metadat...,"[{'@Category': 'CATEGORY', 'Value': {'Display'...",Unmet need for family planning (%)
4,MDG_0000000007,5,https://www.who.int/data/gho/indicator-metadat...,"[{'@Category': 'DISPLAY_FR', 'Value': {'Displa...",Under-five mortality rate (probability of dyin...


## Serching by keyword
Since the codes are not exactly mnemonic, we can search for all codes about tuberculosis, for example.

In [9]:
index.build_index(codes)
results = index.search('tuberculosis')
results = pd.DataFrame(results)
results

Unnamed: 0,code,description
0,TB_1,Tuberculosis treatment coverage
1,UHC_TB_DT,Tuberculosis detection and treatment
2,WHS3_522,Number of reported cases of tuberculosis
3,TB_e_prev_num,Number of prevalent tuberculosis cases
4,TB_e_inc_num,Number of incident tuberculosis cases
5,TB_tot_newrel,Tuberculosis - new and relapse cases
6,TB_newinc,Tuberculosis - new and relapse cases
7,TB_c_newinc,Tuberculosis - new and relapse cases
8,TB_effective_treatment_coverage,Tuberculosis effective treatment coverage (%)
9,MDG_0000000022,Tuberculosis detection rate under DOTS (%)


Let's look at the "Number of reported cases of tuberculosis (per 100 000 population per year)": `WHS3_522`

In [10]:
data = GC.fetch_data_from_codes(code='WHS3_522')
data = data[(data.REGION=='AFR')&(data.YEAR==data.YEAR.max())]
data

HTTPError: HTTP Error 403: Forbidden

Now let's find indicators related to water

In [13]:
water_codes = index.search('water')
water_codes

[{'code': 'WAS_0000000001',
  'description': 'Access to improved drinking water sources'},
 {'code': 'EQ_HANDWASHING',
  'description': 'Households with soap and water at a handwashing facility (%)'},
 {'code': 'WSH_10_WAT',
  'description': 'Number of diarrhoea deaths from inadequate water'},
 {'code': 'WSH_20_WAT',
  'description': 'Attributable fraction of diarrhoea to inadequate water'},
 {'code': 'WSH_30_WAT',
  'description': 'Number of diarrhoea DALYs from inadequate water'},
 {'code': 'RADON_Q405',
  'description': 'Radon in national drinking-water regulations'},
 {'code': 'WAS_0000000001',
  'description': 'Access to improved drinking water sources'},
 {'code': 'EQ_HANDWASHING',
  'description': 'Households with soap and water at a handwashing facility (%)'},
 {'code': 'WSH_10_WAT',
  'description': 'Number of diarrhoea deaths from inadequate water'},
 {'code': 'WSH_20_WAT',
  'description': 'Attributable fraction of diarrhoea to inadequate water'},
 {'code': 'WSH_30_WAT',
  '

Not all indicators are available for all countries in recent years, so we can easily check what's available.

In [16]:
for c in water_codes:
    print(f"Checking {c['code']}")
    data = GC.fetch_data_from_codes(code=c['code'])
    try:
        data = data[(data.REGION=='AFR')&(data.YEAR==data.YEAR.max())]
    except AttributeError:
        print("no data available")
    if len(data) >=54:
        print(f"Code available on all countries for {data.YEAR.max()}")

Checking WAS_0000000001
no data available
Checking EQ_HANDWASHING
no data available
Checking WSH_10_WAT
Code available on all countries for 2016
Checking WSH_20_WAT
Checking WSH_30_WAT
Code available on all countries for 2016
Checking RADON_Q405
no data available
Checking WAS_0000000001
no data available
Checking EQ_HANDWASHING
no data available
Checking WSH_10_WAT
Code available on all countries for 2016
Checking WSH_20_WAT
Checking WSH_30_WAT
Code available on all countries for 2016
Checking RADON_Q405
no data available
Checking WHS5_122
no data available
Checking WSH_5
Checking EQ_WATER
Checking EQ_WATERIMPROVED
Code available on all countries for 2013
Checking EQ_WATERPIPED
Checking WSH_09
no data available
Checking WHS5_122
no data available
Checking WSH_5
Checking EQ_WATER
Checking EQ_WATERIMPROVED
Code available on all countries for 2013
Checking EQ_WATERPIPED
Checking WSH_09
no data available
Checking WSH_WATER_SAFELY_MANAGED
Checking WSH_WATER_BASIC
Code available on all count

In [17]:
data = GC.fetch_data_from_codes(code='WSH_WATER_BASIC')
data = data[(data.REGION=='AFR')&(data.YEAR==data.YEAR.max())]
data

Unnamed: 0,GHO,PUBLISHSTATE,YEAR,REGION,COUNTRY,RESIDENCEAREATYPE,Display Value,Numeric,Low,High,Comments
159,WSH_WATER_BASIC,PUBLISHED,2017,AFR,DZA,RUR,89.0,88.69096,,,
160,WSH_WATER_BASIC,PUBLISHED,2017,AFR,DZA,TOTL,94.0,93.55589,,,
161,WSH_WATER_BASIC,PUBLISHED,2017,AFR,DZA,URB,95.0,95.44293,,,
267,WSH_WATER_BASIC,PUBLISHED,2017,AFR,AGO,RUR,27.0,27.44429,,,
268,WSH_WATER_BASIC,PUBLISHED,2017,AFR,AGO,TOTL,56.0,55.84290,,,
...,...,...,...,...,...,...,...,...,...,...,...
10420,WSH_WATER_BASIC,PUBLISHED,2017,AFR,ZMB,TOTL,60.0,59.96376,,,
10421,WSH_WATER_BASIC,PUBLISHED,2017,AFR,ZMB,URB,84.0,83.86312,,,
10473,WSH_WATER_BASIC,PUBLISHED,2017,AFR,ZWE,RUR,50.0,49.80476,,,
10474,WSH_WATER_BASIC,PUBLISHED,2017,AFR,ZWE,TOTL,64.0,64.05123,,,
