In [14]:
from urllib.request import urlopen
import json
import numpy as np
import pandas as pd
import os

# Retrieving the list of available dimensions

In [3]:
response = urlopen("https://ghoapi.azureedge.net/api/Dimension")
json_data = response.read().decode('utf-8', 'replace')

d = json.loads(json_data)
dimGHO = pd.json_normalize(d['value'])
dimGHO.head(15)

Unnamed: 0,Code,Title
0,ADVERTISINGTYPE,SUBSTANCE_ABUSE_ADVERTISING_TYPES
1,AGEGROUP,Age Group
2,ALCOHOLTYPE,Beverage Types
3,AMRGLASSCATEGORY,AMR GLASS Category
4,ARCHIVE,Archive date
5,AWARENESSACTIVITYTYPE,SUBSTANCE_ABUSE_AWARENESS_ACTIVITY_TYPES
6,BACGROUP,SUBSTANCE_ABUSE_BAC_GROUPS
7,BEVERAGETYPE,SUBSTANCE_ABUSE_BEVERAGE_TYPES
8,CAREPATIENT,Patient type
9,CARESECTOR,Care sector


# Retrieving a list of available values for a specific dimension

## Retrieve the list of the `COUNTRY` dimension values

In [4]:
response = urlopen("https://ghoapi.azureedge.net/api/DIMENSION/COUNTRY/DimensionValues")
json_data = response.read().decode('utf-8', 'replace')

d = json.loads(json_data)
df_Code = pd.json_normalize(d['value'])
df_Code.head()

Unnamed: 0,Code,Title,ParentDimension,Dimension,ParentCode,ParentTitle
0,ABW,Aruba,REGION,COUNTRY,AMR,Americas
1,AFG,Afghanistan,REGION,COUNTRY,EMR,Eastern Mediterranean
2,AGO,Angola,REGION,COUNTRY,AFR,Africa
3,AIA,Anguilla,REGION,COUNTRY,AMR,Americas
4,ALB,Albania,REGION,COUNTRY,EUR,Europe


## Filter Africa Region

In [5]:
df_Code_afro = df_Code[df_Code.ParentCode=="AFR"]
df_Code_afro.head(10)

Unnamed: 0,Code,Title,ParentDimension,Dimension,ParentCode,ParentTitle
2,AGO,Angola,REGION,COUNTRY,AFR,Africa
16,BDI,Burundi,REGION,COUNTRY,AFR,Africa
18,BEN,Benin,REGION,COUNTRY,AFR,Africa
19,BFA,Burkina Faso,REGION,COUNTRY,AFR,Africa
33,BWA,Botswana,REGION,COUNTRY,AFR,Africa
34,CAF,Central African Republic,REGION,COUNTRY,AFR,Africa
40,CIV,Côte d'Ivoire,REGION,COUNTRY,AFR,Africa
41,CMR,Cameroon,REGION,COUNTRY,AFR,Africa
42,COD,Democratic Republic of the Congo,REGION,COUNTRY,AFR,Africa
43,COG,Congo,REGION,COUNTRY,AFR,Africa


In [6]:
df_Code_afro.shape

(50, 6)

In [7]:
df_Code_afro.Title.unique()

array(['Angola', 'Burundi', 'Benin', 'Burkina Faso', 'Botswana',
       'Central African Republic', "Côte d'Ivoire", 'Cameroon',
       'Democratic Republic of the Congo', 'Congo', 'Comoros',
       'Cabo Verde', 'Algeria', 'Eritrea', 'Ethiopia', 'Gabon', 'Ghana',
       'Guinea', 'Gambia', 'Guinea-Bissau', 'Equatorial Guinea', 'Kenya',
       'Liberia', 'Lesotho', 'Madagascar', 'Mali', 'Mozambique',
       'Mauritania', 'Mauritius', 'Malawi', 'Mayotte', 'Namibia', 'Niger',
       'Nigeria', 'Reunion', 'Rwanda', 'Senegal', 'Saint Helena',
       'Sierra Leone', 'South Sudan', 'Sao Tome and Principe', 'Eswatini',
       'Seychelles', 'Chad', 'Togo', 'United Republic of Tanzania',
       'Uganda', 'South Africa', 'Zambia', 'Zimbabwe'], dtype=object)

# Retrieving the list of all the indicators in GHO with their code

In [3]:
response = urlopen("https://ghoapi.azureedge.net/api/Indicator")
json_data = response.read().decode('utf-8', 'replace')

d = json.loads(json_data)
df_Indicators = pd.json_normalize(d['value'])
df_Indicators.head(15)

Unnamed: 0,IndicatorCode,IndicatorName,Language
0,AIR_10,Ambient air pollution attributable DALYs per ...,EN
1,AIR_11,Household air pollution attributable deaths,EN
2,AIR_12,Household air pollution attributable deaths in...,EN
3,AIR_13,Household air pollution attributable deaths pe...,EN
4,AIR_14,Household air pollution attributable deaths p...,EN
5,AIR_15,Household air pollution attributable DALYs,EN
6,AIR_16,Household air pollution attributable DALYs in ...,EN
7,AIR_17,Household air pollution attributable DALYs (pe...,EN
8,AIR_18,Household air pollution attributable DALYs pe...,EN
9,AIR_39,Household air pollution attributable DALYs (pe...,EN


In [4]:
df_Indicators.shape

(2182, 3)

# List of all the indicators in GHO containing the word 'Household'

In [10]:
response = urlopen("https://ghoapi.azureedge.net/api/Indicator?$filter=contains(IndicatorName,'Household')")
json_data = response.read().decode('utf-8', 'replace')

d = json.loads(json_data)
df_Indicators2 = pd.json_normalize(d['value'])
df_Indicators2

Unnamed: 0,IndicatorCode,IndicatorName,Language
0,AIR_11,Household air pollution attributable deaths,EN
1,AIR_12,Household air pollution attributable deaths in...,EN
2,AIR_13,Household air pollution attributable deaths pe...,EN
3,AIR_14,Household air pollution attributable deaths p...,EN
4,AIR_15,Household air pollution attributable DALYs,EN
5,AIR_16,Household air pollution attributable DALYs in ...,EN
6,AIR_17,Household air pollution attributable DALYs (pe...,EN
7,AIR_18,Household air pollution attributable DALYs pe...,EN
8,AIR_39,Household air pollution attributable DALYs (pe...,EN
9,AIR_51,Household air pollution attributable death rat...,EN


# Select only the indicators that have a specific title

In this example, the indicator `Ambient air pollution attributable deaths` was queried

In [13]:
response = urlopen("https://ghoapi.azureedge.net/api/Indicator?$filter=IndicatorName%20eq%20'Ambient%20air%20pollution%20attributable%20deaths'")
json_data = response.read().decode('utf-8', 'replace')

d = json.loads(json_data)
df_Indicators3 = pd.json_normalize(d['value'])
df_Indicators3

Unnamed: 0,IndicatorCode,IndicatorName,Language
0,AIR_41,Ambient air pollution attributable deaths,EN


# Retrieving indicator data

Specify an indicator to download by specifying the indicator code. This will return all associated data for that specific indicator.

## Case Study : Average life expectancy at birth (in years)

The indicator code for the ` "Average life expectancy at birth (in years)"` indicator is `WHOSIS_000001`

In [14]:
response = urlopen("https://ghoapi.azureedge.net/api/WHOSIS_000001")
json_data = response.read().decode('utf-8', 'replace')

d = json.loads(json_data)
df = pd.json_normalize(d['value'])
df.head()

Unnamed: 0,Id,IndicatorCode,SpatialDimType,SpatialDim,TimeDimType,TimeDim,Dim1Type,Dim1,Dim2Type,Dim2,...,DataSourceDim,Value,NumericValue,Low,High,Comments,Date,TimeDimensionValue,TimeDimensionBegin,TimeDimensionEnd
0,22847621,WHOSIS_000001,COUNTRY,CAN,YEAR,1920,SEX,BTSX,,,...,,82.8,82.80972,,,WHO life table method: Vital registration,2020-06-04T11:21:09.77+02:00,1920,1920-01-01T00:00:00+01:00,1920-12-31T00:00:00+01:00
1,24614313,WHOSIS_000001,COUNTRY,AFG,YEAR,2000,SEX,MLE,,,...,,54.6,54.57449,,,,2020-12-04T16:59:42.513+01:00,2000,2000-01-01T00:00:00+01:00,2000-12-31T00:00:00+01:00
2,24614317,WHOSIS_000001,COUNTRY,AFG,YEAR,2010,SEX,MLE,,,...,,59.6,59.60036,,,,2020-12-04T16:59:43.013+01:00,2010,2010-01-01T00:00:00+01:00,2010-12-31T00:00:00+01:00
3,24614321,WHOSIS_000001,COUNTRY,AFG,YEAR,2015,SEX,MLE,,,...,,61.0,61.03658,,,,2020-12-04T16:59:43.423+01:00,2015,2015-01-01T00:00:00+01:00,2015-12-31T00:00:00+01:00
4,24614325,WHOSIS_000001,COUNTRY,AFG,YEAR,2019,SEX,MLE,,,...,,63.3,63.28709,,,,2020-12-04T16:59:43.533+01:00,2019,2019-01-01T00:00:00+01:00,2019-12-31T00:00:00+01:00


In [15]:
df_Code.rename(columns={'Code': 'SpatialDim'}, inplace=True)
df_merged = pd.merge(df, df_Code, how='outer', on='SpatialDim')

In [16]:
df_merged.head()

Unnamed: 0,Id,IndicatorCode,SpatialDimType,SpatialDim,TimeDimType,TimeDim,Dim1Type,Dim1,Dim2Type,Dim2,...,Comments,Date,TimeDimensionValue,TimeDimensionBegin,TimeDimensionEnd,Title,ParentDimension,Dimension,ParentCode,ParentTitle
0,22847621.0,WHOSIS_000001,COUNTRY,CAN,YEAR,1920.0,SEX,BTSX,,,...,WHO life table method: Vital registration,2020-06-04T11:21:09.77+02:00,1920,1920-01-01T00:00:00+01:00,1920-12-31T00:00:00+01:00,Canada,REGION,COUNTRY,AMR,Americas
1,24615657.0,WHOSIS_000001,COUNTRY,CAN,YEAR,2000.0,SEX,MLE,,,...,,2020-12-04T17:00:21.56+01:00,2000,2000-01-01T00:00:00+01:00,2000-12-31T00:00:00+01:00,Canada,REGION,COUNTRY,AMR,Americas
2,24615661.0,WHOSIS_000001,COUNTRY,CAN,YEAR,2010.0,SEX,MLE,,,...,,2020-12-04T17:00:21.653+01:00,2010,2010-01-01T00:00:00+01:00,2010-12-31T00:00:00+01:00,Canada,REGION,COUNTRY,AMR,Americas
3,24615665.0,WHOSIS_000001,COUNTRY,CAN,YEAR,2015.0,SEX,MLE,,,...,,2020-12-04T17:00:21.747+01:00,2015,2015-01-01T00:00:00+01:00,2015-12-31T00:00:00+01:00,Canada,REGION,COUNTRY,AMR,Americas
4,24615669.0,WHOSIS_000001,COUNTRY,CAN,YEAR,2019.0,SEX,MLE,,,...,,2020-12-04T17:00:21.86+01:00,2019,2019-01-01T00:00:00+01:00,2019-12-31T00:00:00+01:00,Canada,REGION,COUNTRY,AMR,Americas


In [17]:
df_merged['Title'] = np.where(df_merged.SpatialDim=="AFR", "Africa", df_merged.Title)
df_merged['ParentTitle'] = np.where(df_merged.SpatialDim=="AFR", "Africa", df_merged.ParentTitle)
df_merged_afro = df_merged[df_merged.ParentTitle=="Africa"][["Title", 'IndicatorCode', 'Dim1', 'TimeDim', 'NumericValue']]
df_merged_afro.head()

Unnamed: 0,Title,IndicatorCode,Dim1,TimeDim,NumericValue
25,Angola,WHOSIS_000001,MLE,2000.0,46.77004
26,Angola,WHOSIS_000001,MLE,2010.0,55.77834
27,Angola,WHOSIS_000001,MLE,2015.0,59.13242
28,Angola,WHOSIS_000001,MLE,2019.0,60.70384
29,Angola,WHOSIS_000001,FMLE,2000.0,52.12018


In [18]:
df_merged_afro.rename(columns={'Title': 'Country', 'IndicatorCode': 'Indicator_Name', 'Dim1': 'Options',
                               'TimeDim': 'Start_Period', 'NumericValue': 'Value_received' }, inplace=True)
df_merged_afro = df_merged_afro.dropna()

In [19]:
df_merged_afro.Country.unique()

array(['Angola', 'Burundi', 'Benin', 'Burkina Faso', 'Botswana',
       'Central African Republic', "Côte d'Ivoire", 'Cameroon',
       'Democratic Republic of the Congo', 'Congo', 'Comoros',
       'Cabo Verde', 'Algeria', 'Eritrea', 'Ethiopia', 'Gabon', 'Ghana',
       'Guinea', 'Gambia', 'Guinea-Bissau', 'Equatorial Guinea', 'Kenya',
       'Liberia', 'Lesotho', 'Madagascar', 'Mali', 'Mozambique',
       'Mauritania', 'Mauritius', 'Malawi', 'Namibia', 'Niger', 'Nigeria',
       'Rwanda', 'Senegal', 'Sierra Leone', 'South Sudan',
       'Sao Tome and Principe', 'Eswatini', 'Seychelles', 'Chad', 'Togo',
       'United Republic of Tanzania', 'Uganda', 'South Africa', 'Zambia',
       'Zimbabwe', 'Africa'], dtype=object)

In [20]:
len(df_merged_afro.Country.unique())

48

In [21]:
df_merged_afro.loc[:,'Indicator_Name'] = "Average life expectancy at birth (in years)"
df_merged_afro.loc[:,'Measure_type'] = "Numeric"
df_merged_afro.loc[:,'Source'] = "Global Health Observatory"

In [22]:
df_merged_afro.head()

Unnamed: 0,Country,Indicator_Name,Options,Start_Period,Value_received,Measure_type,Source
25,Angola,Average life expectancy at birth (in years),MLE,2000.0,46.77004,Numeric,Global Health Observatory
26,Angola,Average life expectancy at birth (in years),MLE,2010.0,55.77834,Numeric,Global Health Observatory
27,Angola,Average life expectancy at birth (in years),MLE,2015.0,59.13242,Numeric,Global Health Observatory
28,Angola,Average life expectancy at birth (in years),MLE,2019.0,60.70384,Numeric,Global Health Observatory
29,Angola,Average life expectancy at birth (in years),FMLE,2000.0,52.12018,Numeric,Global Health Observatory


In [23]:
df_merged_afro.loc[:,'Options'] = np.where(df_merged_afro.Options=="MLE", "Male",
                                     np.where(df_merged_afro.Options=="FMLE", "Female", "Both sexes"))
df_merged_afro.loc[:,'End_Period'] = df_merged_afro.loc[:,'Start_Period']

In [24]:
cols = ['Start_Period', 'End_Period']
df_merged_afro[cols] = df_merged_afro[cols].applymap(np.int32)
df_merged_afro = df_merged_afro[["Country", 'Indicator_Name', 'Options', 'Start_Period', 'End_Period',
                                "Value_received", 'Measure_type', 'Source']]
df_merged_afro.head()

Unnamed: 0,Country,Indicator_Name,Options,Start_Period,End_Period,Value_received,Measure_type,Source
25,Angola,Average life expectancy at birth (in years),Male,2000,2000,46.77004,Numeric,Global Health Observatory
26,Angola,Average life expectancy at birth (in years),Male,2010,2010,55.77834,Numeric,Global Health Observatory
27,Angola,Average life expectancy at birth (in years),Male,2015,2015,59.13242,Numeric,Global Health Observatory
28,Angola,Average life expectancy at birth (in years),Male,2019,2019,60.70384,Numeric,Global Health Observatory
29,Angola,Average life expectancy at birth (in years),Female,2000,2000,52.12018,Numeric,Global Health Observatory


In [25]:
import os

path = r'C:\DownloadTest2'

# Check whether the specified path exists or not
isExist = os.path.exists(path)

if not isExist:
  
  # Create a new directory because it does not exist 
  os.makedirs(path)
  print("The new directory is created!")

The new directory is created!


In [26]:
fileName = 'Average_life_expectancy_at_birth.csv'

df_merged_afro.to_csv (path + '\\' + fileName, index = False, header=True)

# Function to automate the process

In [15]:
def IndicatorGHO(IndicatorCode, Indicator_Name, mesure_Type, fileName):
    response = urlopen("https://ghoapi.azureedge.net/api/DIMENSION/COUNTRY/DimensionValues")
    json_data = response.read().decode('utf-8', 'replace')
    d = json.loads(json_data)
    df_Code = pd.json_normalize(d['value'])
    df_Code.rename(columns={'Code': 'SpatialDim'}, inplace=True)
    API = "https://ghoapi.azureedge.net/api/"
    response = urlopen(API + IndicatorCode)
    json_data = response.read().decode('utf-8', 'replace')
    d = json.loads(json_data)
    df = pd.json_normalize(d['value'])
    df_merged = pd.merge(df, df_Code, how='outer', on='SpatialDim')
    
    df_merged['Title'] = np.where((df_merged.SpatialDim=="AFR") | (df_merged.SpatialDim=="WHO_LMI_AFR"), "Africa", df_merged.Title)
    df_merged['ParentTitle'] = np.where((df_merged.SpatialDim=="AFR") | (df_merged.SpatialDim=="WHO_LMI_AFR"), "Africa", df_merged.ParentTitle)
    #df_merged['Title'] = np.where(df_merged.SpatialDim=="AFR", "Africa", df_merged.Title)
    #df_merged['ParentTitle'] = np.where(df_merged.SpatialDim=="AFR", "Africa", df_merged.ParentTitle)
    
    df_merged_afro = df_merged[df_merged.ParentTitle=="Africa"]
    df_merged_afro = df_merged[df_merged.ParentTitle=="Africa"][["Title", 'IndicatorCode', 'Dim1', 'TimeDim', 'NumericValue']]
    df_merged_afro.rename(columns={'Title': 'Country', 'IndicatorCode': 'Indicator_Name', 'Dim1': 'Options',
                               'TimeDim': 'Start_Period', 'NumericValue': 'Value_received' }, inplace=True)
    
    df_merged_afro.drop(df_merged_afro[df_merged_afro.Country.isin(['Saint Helena', 'Mayotte', 'Reunion'])].index, inplace=True)
    #df_merged_afro = df_merged_afro.dropna()
    
    df_merged_afro.loc[:,'Indicator_Name'] = Indicator_Name
    df_merged_afro.loc[:,'Measure_type'] = mesure_Type
    df_merged_afro.loc[:,'Source'] = "Global Health Observatory"
    df_merged_afro.loc[:,'Options'] = np.where(df_merged_afro.Options=="MLE", "Male",
                                           np.where(df_merged_afro.Options=="FMLE", "Female",
                                                    np.where(df_merged_afro.Options=="BTSX", "Both sexes",df_merged_afro.Options)))
    df_merged_afro.loc[:,'Options'] = np.where(df_merged_afro.Options=="URB", "Urban",
                                           np.where(df_merged_afro.Options=="RUR", "Rural",
                                                    np.where(df_merged_afro.Options=="TOTL", "Total",df_merged_afro.Options)))
    
    df_merged_afro.loc[:,'End_Period'] = df_merged_afro.loc[:,'Start_Period']
    
    #cols = ['Start_Period', 'End_Period']
    #df_merged_afro[cols] = df_merged_afro[cols].applymap(np.int32)
    #df_merged_afro
    
    df_merged_afro = df_merged_afro[["Country", 'Indicator_Name', 'Options', 'Start_Period', 'End_Period',
                                "Value_received", 'Measure_type', 'Source']]
    path = r'C:\DownloadTest2'

    # Check whether the specified path exists or not
    isExist = os.path.exists(path)
    
    if not isExist:
        # Create a new directory because it does not exist 
        os.makedirs(path)
    df_merged_afro.to_csv (path + '\\' + fileName + '.csv', index = False, header=True)

In [12]:
IndicatorGHO("WHS_PBR", "Preterm birth rate (per 100 live births)", "Numeric", "Berence_f3")

# Loop through all the indicators

In [19]:
ListOfIndicators = pd.read_csv(r'C:\Moimeme\IndicatorsCodeGHO.csv', encoding='iso-8859-1')
ListOfIndicators.head()

Unnamed: 0,FileName,Indicator,MetaDataCodeGHO,Mesure_Type,IndicatorCodeGHO,IndicatorNameInGHO
0,AFR0001,Average life expectancy at birth (in years),65,Statistic,WHOSIS_000001,Life expectancy at birth (years)
1,AFR0002,Healthy life expectancy at birth (in years),66,Statistic,WHOSIS_000002,Healthy life expectancy (HALE) at birth (years)
2,AFR0003,Life expectancy at age 60 years,2977,Statistic,WHOSIS_000015,Life expectancy at age 60 (years)
3,AFR0006,Adolescent mortality rate (per 100 000 adolesc...,4751,Rate,MORTADO,Adolescent mortality rate (per 100 000 populat...
4,AFR0007,Adult mortality rate between 15 and 60 years o...,64,Rate,WHOSIS_000004,Adult mortality rate (probability of dying bet...


In [20]:
import urllib
for index, row in ListOfIndicators.iterrows():
    try:
        IndicatorGHO(row['IndicatorCodeGHO'], row['Indicator'], row['Mesure_Type'], row['FileName'])
        print(f"The indicator number {index+1} is processed, please go and check to C:\DownloadTest2")
    except ValueError:
        print(f"The indicator number {index+1}, which is : {row['IndicatorCodeGHO']} / {row['Indicator']}  is not processed because of ValueError")
    except urllib.request.HTTPError:
        print(f"The indicator number {index+1}, which is : {row['IndicatorCodeGHO']} / {row['Indicator']} is not processed because of HTTPError")

The indicator number 1 is processed, please go and check to C:\DownloadTest2
The indicator number 2 is processed, please go and check to C:\DownloadTest2
The indicator number 3 is processed, please go and check to C:\DownloadTest2
The indicator number 4 is processed, please go and check to C:\DownloadTest2
The indicator number 5 is processed, please go and check to C:\DownloadTest2
The indicator number 6 is processed, please go and check to C:\DownloadTest2
The indicator number 7 is processed, please go and check to C:\DownloadTest2
The indicator number 8 is processed, please go and check to C:\DownloadTest2
The indicator number 9 is processed, please go and check to C:\DownloadTest2
The indicator number 10 is processed, please go and check to C:\DownloadTest2
The indicator number 11 is processed, please go and check to C:\DownloadTest2
The indicator number 12 is processed, please go and check to C:\DownloadTest2
The indicator number 13 is processed, please go and check to C:\DownloadT

The indicator number 105 is processed, please go and check to C:\DownloadTest2
The indicator number 106, which is : SDGMALARIA / Malaria incidence rate (per 1000 population at risk) [SDG 3.3.3] is not processed because of HTTPError
The indicator number 107 is processed, please go and check to C:\DownloadTest2
The indicator number 108 is processed, please go and check to C:\DownloadTest2
The indicator number 109 is processed, please go and check to C:\DownloadTest2
The indicator number 110 is processed, please go and check to C:\DownloadTest2


TypeError: can only concatenate str (not "float") to str

In [23]:
# Set the path of the csv files
path = r'C:\\DownloadTest2'

# create an empty pandas data frame
df = pd.DataFrame()
 
# iterate over all files within "My_Folder"
for file in os.listdir(path):
    if file.endswith(".csv"):
        df = pd.concat([df , pd.read_csv(os.path.join(path, file))], axis=0 )
 
# reset the index 
df.reset_index(drop=True, inplace=True)

df.to_csv (path + '\\' + 'AllTheIndicatorsGHO' + '.csv', index = False, header=True)
