# Fetching data from GHO


In [1]:
from ghoclient import GHOSession, index
import pandas as pd
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
GC = GHOSession()

## Listing indicator codes

In [4]:
codes = GC.get_data_codes(format='dataframe')
print(f"Number of available indicators: {len(codes)}")
codes.head()

Number of available indicators: 3466


Unnamed: 0,@Label,@DisplaySequence,@URL,Attr,Display
0,MDG_0000000001,2,https://www.who.int/data/gho/indicator-metadat...,"[{'@Category': 'DISPLAY_FR', 'Value': {'Displa...",Infant mortality rate (probability of dying be...
1,MDG_0000000003,5,https://www.who.int/data/gho/indicator-metadat...,"[{'@Category': 'DEFINITION_XML', 'Value': {'Di...",Adolescent birth rate (per 1000 women aged 15-...
2,MDG_0000000005,10,https://www.who.int/data/gho/indicator-metadat...,"[{'@Category': 'CATEGORY', 'Value': {'Display'...",Contraceptive prevalence (%)
3,MDG_0000000006,5,https://www.who.int/data/gho/indicator-metadat...,"[{'@Category': 'CATEGORY', 'Value': {'Display'...",Unmet need for family planning (%)
4,MDG_0000000007,5,https://www.who.int/data/gho/indicator-metadat...,"[{'@Category': 'DISPLAY_FR', 'Value': {'Displa...",Under-five mortality rate (probability of dyin...


## Serching by keyword
Since the codes are not exactly mnemonic, we can search for all codes about tuberculosis, for example.

In [15]:
index.build_index(codes)
results = index.search('tuberculosis')
results = pd.DataFrame(results)
results

Unnamed: 0,code,description
0,TB_1,Tuberculosis treatment coverage
1,UHC_TB_DT,Tuberculosis detection and treatment
2,WHS3_522,Number of reported cases of tuberculosis
3,TB_e_prev_num,Number of prevalent tuberculosis cases
4,TB_e_inc_num,Number of incident tuberculosis cases
5,TB_tot_newrel,Tuberculosis - new and relapse cases
6,TB_newinc,Tuberculosis - new and relapse cases
7,TB_c_newinc,Tuberculosis - new and relapse cases
8,TB_effective_treatment_coverage,Tuberculosis effective treatment coverage (%)
9,MDG_0000000022,Tuberculosis detection rate under DOTS (%)


Let's look at the "Incidence of tuberculosis (per 100 000 population per year)": `MDG_0000000020`

In [14]:
data = GC.fetch_data_from_codes(code='MDG_0000000020')
data = data[(data.REGION=='AFR')]
data

Unnamed: 0,GHO,PUBLISHSTATE,YEAR,REGION,WORLDBANKINCOMEGROUP,COUNTRY,Display Value,Numeric,Low,High,Comments
15,MDG_0000000020,PUBLISHED,2006,AFR,,,356 [312-404],356.0,312.0,404.0,
16,MDG_0000000020,PUBLISHED,2013,AFR,,,291 [258-326],291.0,258.0,326.0,
17,MDG_0000000020,PUBLISHED,2015,AFR,,,270 [240-302],270.0,240.0,302.0,
40,MDG_0000000020,PUBLISHED,2004,AFR,,AGO,350 [227-500],350.0,227.0,500.0,
41,MDG_0000000020,PUBLISHED,2013,AFR,,AGO,376 [243-537],376.0,243.0,537.0,
...,...,...,...,...,...,...,...,...,...,...,...
4056,MDG_0000000020,PUBLISHED,2018,AFR,,UGA,200 [118-304],200.0,118.0,304.0,
4073,MDG_0000000020,PUBLISHED,2013,AFR,,ZAF,1110 [770-1500],1110.0,770.0,1500.0,
4074,MDG_0000000020,PUBLISHED,2016,AFR,,ZAF,805 [561-1090],805.0,561.0,1090.0,
4075,MDG_0000000020,PUBLISHED,2013,AFR,,ZMB,437 [283-625],437.0,283.0,625.0,


Now let's find indicators related to water

In [17]:
water_codes = index.search('water')
pd.DataFrame(water_codes)

Unnamed: 0,code,description
0,WAS_0000000001,Access to improved drinking water sources
1,EQ_HANDWASHING,Households with soap and water at a handwashin...
2,WSH_10_WAT,Number of diarrhoea deaths from inadequate water
3,WSH_20_WAT,Attributable fraction of diarrhoea to inadequa...
4,WSH_30_WAT,Number of diarrhoea DALYs from inadequate water
5,RADON_Q405,Radon in national drinking-water regulations
6,WHS5_122,Population using improved drinking-water sourc...
7,WSH_5,"Water, sanitation and hygiene attributable DAL..."
8,EQ_WATER,Population using improved drinking-water sourc...
9,EQ_WATERIMPROVED,Households using an improved drinking-water so...


Not all indicators are available for all countries in recent years, so we can easily check what's available.

In [24]:
for c in water_codes:
    print(f"Checking {c['code']}: {c['description']}")
    data = GC.fetch_data_from_codes(code=c['code'])
    try:
        data = data[(data.REGION=='AFR')&(data.YEAR==data.YEAR.max())]
        print(f"\tLatest year available for {c['code']} in Africa: {data.YEAR.max()}")
    except AttributeError as e:
        print("\tno data available:\n\t  ", e)
    if len(data) >=54:
        print(f"\tCode available on all countries for {data.YEAR.max()}")

Checking WAS_0000000001: Access to improved drinking water sources
	no data available:
	   'DataFrame' object has no attribute 'REGION'
Checking EQ_HANDWASHING: Households with soap and water at a handwashing facility (%)
	no data available:
	   'DataFrame' object has no attribute 'REGION'
Checking WSH_10_WAT: Number of diarrhoea deaths from inadequate water
	Latest year available for WSH_10_WAT in Africa: 2016
	Code available on all countries for 2016
Checking WSH_20_WAT: Attributable fraction of diarrhoea to inadequate water
	Latest year available for WSH_20_WAT in Africa: 2016
Checking WSH_30_WAT: Number of diarrhoea DALYs from inadequate water
	Latest year available for WSH_30_WAT in Africa: 2016
	Code available on all countries for 2016
Checking RADON_Q405: Radon in national drinking-water regulations
	Latest year available for RADON_Q405 in Africa: 2019
Checking WHS5_122: Population using improved drinking-water sources (%)
	no data available:
	   'DataFrame' object has no attrib

In [23]:
data = GC.fetch_data_from_codes(code='WSH_WATER_BASIC')
data = data[(data.REGION=='AFR')&(data.YEAR==data.YEAR.max())]
data

Unnamed: 0,GHO,PUBLISHSTATE,YEAR,REGION,COUNTRY,RESIDENCEAREATYPE,Display Value,Numeric,Low,High,Comments
159,WSH_WATER_BASIC,PUBLISHED,2017,AFR,DZA,RUR,89.0,88.69096,,,
160,WSH_WATER_BASIC,PUBLISHED,2017,AFR,DZA,TOTL,94.0,93.55589,,,
161,WSH_WATER_BASIC,PUBLISHED,2017,AFR,DZA,URB,95.0,95.44293,,,
267,WSH_WATER_BASIC,PUBLISHED,2017,AFR,AGO,RUR,27.0,27.44429,,,
268,WSH_WATER_BASIC,PUBLISHED,2017,AFR,AGO,TOTL,56.0,55.84290,,,
...,...,...,...,...,...,...,...,...,...,...,...
10420,WSH_WATER_BASIC,PUBLISHED,2017,AFR,ZMB,TOTL,60.0,59.96376,,,
10421,WSH_WATER_BASIC,PUBLISHED,2017,AFR,ZMB,URB,84.0,83.86312,,,
10473,WSH_WATER_BASIC,PUBLISHED,2017,AFR,ZWE,RUR,50.0,49.80476,,,
10474,WSH_WATER_BASIC,PUBLISHED,2017,AFR,ZWE,TOTL,64.0,64.05123,,,
