In [2]:
from urllib.request import urlopen
import json
import numpy as np
import pandas as pd
import os

# Retrieving the list of available dimensions

In [4]:
response = urlopen("https://ghoapi.azureedge.net/api/Dimension")
json_data = response.read().decode('utf-8', 'replace')

d = json.loads(json_data)
dimGHO = pd.json_normalize(d['value'])
dimGHO.head(15)

Unnamed: 0,Code,Title
0,ADVERTISINGTYPE,SUBSTANCE_ABUSE_ADVERTISING_TYPES
1,AGEGROUP,Age Group
2,ALCOHOLTYPE,Beverage Types
3,AMRGLASSCATEGORY,AMR GLASS Category
4,ARCHIVE,Archive date
5,AWARENESSACTIVITYTYPE,SUBSTANCE_ABUSE_AWARENESS_ACTIVITY_TYPES
6,BACGROUP,SUBSTANCE_ABUSE_BAC_GROUPS
7,BEVERAGETYPE,SUBSTANCE_ABUSE_BEVERAGE_TYPES
8,CAREPATIENT,Patient type
9,CARESECTOR,Care sector


# Retrieving a list of available values for a specific dimension

## Retrieve the list of the `COUNTRY` dimension values

In [3]:
response = urlopen("https://ghoapi.azureedge.net/api/DIMENSION/COUNTRY/DimensionValues")
json_data = response.read().decode('utf-8', 'replace')

d = json.loads(json_data)
df_Code = pd.json_normalize(d['value'])
df_Code.head(15)

Unnamed: 0,Code,Title,ParentDimension,Dimension,ParentCode,ParentTitle
0,ABW,Aruba,REGION,COUNTRY,AMR,Americas
1,AFG,Afghanistan,REGION,COUNTRY,EMR,Eastern Mediterranean
2,AGO,Angola,REGION,COUNTRY,AFR,Africa
3,AIA,Anguilla,REGION,COUNTRY,AMR,Americas
4,ALB,Albania,REGION,COUNTRY,EUR,Europe
5,AND,Andorra,REGION,COUNTRY,EUR,Europe
6,ANT530,SPATIAL_SYNONYM,REGION,COUNTRY,AMR,Americas
7,ANT532,SPATIAL_SYNONYM,REGION,COUNTRY,AMR,Americas
8,ARE,United Arab Emirates,REGION,COUNTRY,EMR,Eastern Mediterranean
9,ARG,Argentina,REGION,COUNTRY,AMR,Americas


## Filter Africa Region

In [6]:
df_Code_afro = df_Code[df_Code.ParentCode=="AFR"]
df_Code_afro.head(10)

Unnamed: 0,Code,Title,ParentDimension,Dimension,ParentCode,ParentTitle
0,AGO,Angola,REGION,COUNTRY,AFR,Africa
1,BDI,Burundi,REGION,COUNTRY,AFR,Africa
2,BEN,Benin,REGION,COUNTRY,AFR,Africa
3,BFA,Burkina Faso,REGION,COUNTRY,AFR,Africa
4,BWA,Botswana,REGION,COUNTRY,AFR,Africa
5,CAF,Central African Republic,REGION,COUNTRY,AFR,Africa
6,ZMB,Zambia,REGION,COUNTRY,AFR,Africa
7,ZWE,Zimbabwe,REGION,COUNTRY,AFR,Africa
8,SYC,Seychelles,REGION,COUNTRY,AFR,Africa
9,TCD,Chad,REGION,COUNTRY,AFR,Africa


In [7]:
df_Code_afro.shape

(50, 6)

In [8]:
df_Code_afro.Title.unique()

array(['Angola', 'Burundi', 'Benin', 'Burkina Faso', 'Botswana',
       'Central African Republic', 'Zambia', 'Zimbabwe', 'Seychelles',
       'Chad', 'Togo', 'United Republic of Tanzania', 'Uganda',
       'South Africa', 'Senegal', 'Saint Helena', 'Sierra Leone',
       'South Sudan', 'Sao Tome and Principe', 'Eswatini', 'Mayotte',
       'Namibia', 'Niger', 'Nigeria', 'Reunion', 'Rwanda', 'Madagascar',
       'Mali', 'Mozambique', 'Mauritania', 'Mauritius', 'Malawi',
       'Gambia', 'Guinea-Bissau', 'Equatorial Guinea', 'Kenya', 'Liberia',
       'Lesotho', 'Algeria', 'Eritrea', 'Ethiopia', 'Gabon', 'Ghana',
       'Guinea', "Côte d'Ivoire", 'Cameroon',
       'Democratic Republic of the Congo', 'Congo', 'Comoros',
       'Cabo Verde'], dtype=object)

# Retrieving the list of all the indicators in GHO with their code

In [11]:
response = urlopen("https://ghoapi.azureedge.net/api/Indicator")
json_data = response.read().decode('utf-8', 'replace')

d = json.loads(json_data)
df_Indicators = pd.json_normalize(d['value'])
df_Indicators.head(15)

Unnamed: 0,IndicatorCode,IndicatorName,Language
0,AIR_10,Ambient air pollution attributable DALYs per ...,EN
1,AIR_11,Household air pollution attributable deaths,EN
2,AIR_12,Household air pollution attributable deaths in...,EN
3,AIR_13,Household air pollution attributable deaths pe...,EN
4,AIR_14,Household air pollution attributable deaths p...,EN
5,AIR_15,Household air pollution attributable DALYs,EN
6,AIR_16,Household air pollution attributable DALYs in ...,EN
7,AIR_17,Household air pollution attributable DALYs (pe...,EN
8,AIR_18,Household air pollution attributable DALYs pe...,EN
9,AIR_39,Household air pollution attributable DALYs (pe...,EN


In [15]:


df_Indicators[df_Indicators['IndicatorCode']=='cpmo']

Unnamed: 0,IndicatorCode,IndicatorName,Language
80,cpmo,Contraceptive prevalence - modern methods (%),EN


In [16]:
 df_Indicators[df_Indicators['IndicatorCode']=='cpmt']
    

Unnamed: 0,IndicatorCode,IndicatorName,Language
81,cpmt,Contraceptive prevalence - modern and traditio...,EN


In [5]:
df_Indicators.shape

(2247, 3)

# List of all the indicators in GHO containing the word 'Household'

In [11]:
response = urlopen("https://ghoapi.azureedge.net/api/Indicator?$filter=contains(IndicatorName,'Household')")
json_data = response.read().decode('utf-8', 'replace')

d = json.loads(json_data)
df_Indicators2 = pd.json_normalize(d['value'])
df_Indicators2

Unnamed: 0,IndicatorCode,IndicatorName,Language
0,AIR_11,Household air pollution attributable deaths,EN
1,AIR_12,Household air pollution attributable deaths in...,EN
2,AIR_13,Household air pollution attributable deaths pe...,EN
3,AIR_14,Household air pollution attributable deaths p...,EN
4,AIR_15,Household air pollution attributable DALYs,EN
5,AIR_16,Household air pollution attributable DALYs in ...,EN
6,AIR_17,Household air pollution attributable DALYs (pe...,EN
7,AIR_18,Household air pollution attributable DALYs pe...,EN
8,AIR_39,Household air pollution attributable DALYs (pe...,EN
9,AIR_51,Household air pollution attributable death rat...,EN


# Select only the indicators that have a specific title

In this example, the indicator `Ambient air pollution attributable deaths` was queried

In [9]:
# response = urlopen("https://ghoapi.azureedge.net/api/Indicator?$filter=IndicatorName eq 'Ambient air pollution attributable deaths'")
# json_data = response.read().decode('utf-8', 'replace')

# d = json.loads(json_data)
# df_Indicators3 = pd.json_normalize(d['value'])
# df_Indicators3

# Retrieving indicator data

Specify an indicator to download by specifying the indicator code. This will return all associated data for that specific indicator.

## Case Study : Average life expectancy at birth (in years)

The indicator code for the ` "Average life expectancy at birth (in years)"` indicator is `WHOSIS_000001`

In [12]:
response = urlopen("https://ghoapi.azureedge.net/api/WHOSIS_000001")
json_data = response.read().decode('utf-8', 'replace')

d = json.loads(json_data)
df = pd.json_normalize(d['value'])
df.head()

Unnamed: 0,Id,IndicatorCode,SpatialDimType,SpatialDim,TimeDimType,TimeDim,Dim1Type,Dim1,Dim2Type,Dim2,...,DataSourceDim,Value,NumericValue,Low,High,Comments,Date,TimeDimensionValue,TimeDimensionBegin,TimeDimensionEnd
0,22847621,WHOSIS_000001,COUNTRY,CAN,YEAR,1920,SEX,BTSX,,,...,,82.8,82.80972,,,WHO life table method: Vital registration,2020-06-04T11:21:09.77+02:00,1920,1920-01-01T00:00:00+01:00,1920-12-31T00:00:00+01:00
1,24614313,WHOSIS_000001,COUNTRY,AFG,YEAR,2000,SEX,MLE,,,...,,54.6,54.57449,,,,2020-12-04T16:59:42.513+01:00,2000,2000-01-01T00:00:00+01:00,2000-12-31T00:00:00+01:00
2,24614317,WHOSIS_000001,COUNTRY,AFG,YEAR,2010,SEX,MLE,,,...,,59.6,59.60036,,,,2020-12-04T16:59:43.013+01:00,2010,2010-01-01T00:00:00+01:00,2010-12-31T00:00:00+01:00
3,24614321,WHOSIS_000001,COUNTRY,AFG,YEAR,2015,SEX,MLE,,,...,,61.0,61.03658,,,,2020-12-04T16:59:43.423+01:00,2015,2015-01-01T00:00:00+01:00,2015-12-31T00:00:00+01:00
4,24614325,WHOSIS_000001,COUNTRY,AFG,YEAR,2019,SEX,MLE,,,...,,63.3,63.28709,,,,2020-12-04T16:59:43.533+01:00,2019,2019-01-01T00:00:00+01:00,2019-12-31T00:00:00+01:00


In [13]:

response = urlopen("https://ghoapi.azureedge.net/api/HRH_30")
json_data = response.read().decode('utf-8', 'replace')

d = json.loads(json_data)
df = pd.json_normalize(d['value'])
df.head()

Unnamed: 0,Id,IndicatorCode,SpatialDimType,SpatialDim,TimeDimType,TimeDim,Dim1Type,Dim1,Dim2Type,Dim2,...,DataSourceDim,Value,NumericValue,Low,High,Comments,Date,TimeDimensionValue,TimeDimensionBegin,TimeDimensionEnd
0,18016526,HRH_30,COUNTRY,AFG,YEAR,2008,,,,,...,,0.457,0.457,,,,2018-02-26T12:18:42+01:00,2008,2008-01-01T00:00:00+01:00,2008-12-31T00:00:00+01:00
1,18016527,HRH_30,COUNTRY,AFG,YEAR,2009,,,,,...,,0.454,0.454,,,,2018-02-26T12:18:42.023+01:00,2009,2009-01-01T00:00:00+01:00,2009-12-31T00:00:00+01:00
2,18016528,HRH_30,COUNTRY,AFG,YEAR,2010,,,,,...,,0.435,0.435,,,,2018-02-26T12:18:42.043+01:00,2010,2010-01-01T00:00:00+01:00,2010-12-31T00:00:00+01:00
3,18016529,HRH_30,COUNTRY,AFG,YEAR,2011,,,,,...,,0.425,0.425,,,,2018-02-26T12:18:42.06+01:00,2011,2011-01-01T00:00:00+01:00,2011-12-31T00:00:00+01:00
4,18016530,HRH_30,COUNTRY,AFG,YEAR,2012,,,,,...,,0.455,0.455,,,,2018-02-26T12:18:42.08+01:00,2012,2012-01-01T00:00:00+01:00,2012-12-31T00:00:00+01:00


In [13]:
df_Code.rename(columns={'Code': 'SpatialDim'}, inplace=True)
df_merged = pd.merge(df, df_Code, how='outer', on='SpatialDim')

In [16]:
df_merged['Title'] = np.where(df_merged.SpatialDim=="AFR", "Africa", df_merged.Title)
df_merged['ParentTitle'] = np.where(df_merged.SpatialDim=="AFR", "Africa", df_merged.ParentTitle)
df_merged_afro = df_merged[df_merged.ParentTitle=="Africa"][["Title", 'IndicatorCode', 'Dim1', 'TimeDim', 'NumericValue']]
df_merged_afro.head()

Unnamed: 0,Title,IndicatorCode,Dim1,TimeDim,NumericValue
25,Angola,WHOSIS_000001,MLE,2000.0,46.77004
26,Angola,WHOSIS_000001,MLE,2010.0,55.77834
27,Angola,WHOSIS_000001,MLE,2015.0,59.13242
28,Angola,WHOSIS_000001,MLE,2019.0,60.70384
29,Angola,WHOSIS_000001,FMLE,2000.0,52.12018


In [18]:
df_merged_afro.rename(columns={'Title': 'Country', 'IndicatorCode': 'Indicator_Name', 'Dim1': 'Options',
                               'TimeDim': 'Start_Period', 'NumericValue': 'Value_received' }, inplace=True)
df_merged_afro = df_merged_afro.dropna()

In [19]:
df_merged_afro.Country.unique()

array(['Angola', 'Burundi', 'Benin', 'Burkina Faso', 'Botswana',
       'Central African Republic', "Côte d'Ivoire", 'Cameroon',
       'Democratic Republic of the Congo', 'Congo', 'Comoros',
       'Cabo Verde', 'Algeria', 'Eritrea', 'Ethiopia', 'Gabon', 'Ghana',
       'Guinea', 'Gambia', 'Guinea-Bissau', 'Equatorial Guinea', 'Kenya',
       'Liberia', 'Lesotho', 'Madagascar', 'Mali', 'Mozambique',
       'Mauritania', 'Mauritius', 'Malawi', 'Namibia', 'Niger', 'Nigeria',
       'Rwanda', 'Senegal', 'Sierra Leone', 'South Sudan',
       'Sao Tome and Principe', 'Eswatini', 'Seychelles', 'Chad', 'Togo',
       'United Republic of Tanzania', 'Uganda', 'South Africa', 'Zambia',
       'Zimbabwe', 'Africa'], dtype=object)

In [20]:
len(df_merged_afro.Country.unique())

48

In [21]:
df_merged_afro.loc[:,'Indicator_Name'] = "Average life expectancy at birth (in years)"
df_merged_afro.loc[:,'Measure_type'] = "Numeric"
df_merged_afro.loc[:,'Source'] = "Global Health Observatory"

In [22]:
df_merged_afro.head()

Unnamed: 0,Country,Indicator_Name,Options,Start_Period,Value_received,Measure_type,Source
25,Angola,Average life expectancy at birth (in years),MLE,2000.0,46.77004,Numeric,Global Health Observatory
26,Angola,Average life expectancy at birth (in years),MLE,2010.0,55.77834,Numeric,Global Health Observatory
27,Angola,Average life expectancy at birth (in years),MLE,2015.0,59.13242,Numeric,Global Health Observatory
28,Angola,Average life expectancy at birth (in years),MLE,2019.0,60.70384,Numeric,Global Health Observatory
29,Angola,Average life expectancy at birth (in years),FMLE,2000.0,52.12018,Numeric,Global Health Observatory


In [23]:
df_merged_afro.loc[:,'Options'] = np.where(df_merged_afro.Options=="MLE", "Male",
                                     np.where(df_merged_afro.Options=="FMLE", "Female", "Both sexes"))
df_merged_afro.loc[:,'End_Period'] = df_merged_afro.loc[:,'Start_Period']

In [25]:
cols = ['Start_Period', 'End_Period']
df_merged_afro[cols] = df_merged_afro[cols].applymap(np.int32)
df_merged_afro = df_merged_afro[["Country", 'Indicator_Name', 'Options', 'Start_Period', 'End_Period',
                                "Value_received", 'Measure_type', 'Source']]
df_merged_afro.head()

Unnamed: 0,Country,Indicator_Name,Options,Start_Period,End_Period,Value_received,Measure_type,Source
25,Angola,Average life expectancy at birth (in years),Male,2000,2000,46.77004,Numeric,Global Health Observatory
26,Angola,Average life expectancy at birth (in years),Male,2010,2010,55.77834,Numeric,Global Health Observatory
27,Angola,Average life expectancy at birth (in years),Male,2015,2015,59.13242,Numeric,Global Health Observatory
28,Angola,Average life expectancy at birth (in years),Male,2019,2019,60.70384,Numeric,Global Health Observatory
29,Angola,Average life expectancy at birth (in years),Female,2000,2000,52.12018,Numeric,Global Health Observatory


In [10]:
import os

path = r'C:\DownloadTest2'

# Check whether the specified path exists or not
isExist = os.path.exists(path)

if not isExist:
  
  # Create a new directory because it does not exist 
  os.makedirs(path)
  print("The new directory is created!")

The new directory is created!


In [None]:
fileName = 'Average life expectancy at birth.csv'

df_merged_afro.to_csv (path + '\\' + fileName, index = False, header=True)

# Function to automate the process

In [45]:
def IndicatorGHO(IndicatorCode, Indicator_Name, fileName, SourceName):
    response = urlopen("https://ghoapi.azureedge.net/api/DIMENSION/COUNTRY/DimensionValues")
    json_data = response.read().decode('utf-8', 'replace')
    d = json.loads(json_data)
    df_Code = pd.json_normalize(d['value'])
    df_Code.rename(columns={'Code': 'SpatialDim'}, inplace=True)
    API = "https://ghoapi.azureedge.net/api/"
    response = urlopen(API + IndicatorCode)
    json_data = response.read().decode('utf-8', 'replace')
    d = json.loads(json_data)
    df = pd.json_normalize(d['value'])
    df_merged = pd.merge(df, df_Code, how='outer', on='SpatialDim')
    
    df_merged['Title'] = np.where((df_merged.SpatialDim=="AFR") | (df_merged.SpatialDim=="WHO_LMI_AFR"), "Africa", df_merged.Title)
    df_merged['ParentTitle'] = np.where((df_merged.SpatialDim=="AFR") | (df_merged.SpatialDim=="WHO_LMI_AFR"), "Africa", df_merged.ParentTitle)
    #df_merged['Title'] = np.where(df_merged.SpatialDim=="AFR", "Africa", df_merged.Title)
    #df_merged['ParentTitle'] = np.where(df_merged.SpatialDim=="AFR", "Africa", df_merged.ParentTitle)
    
    df_merged_afro = df_merged[df_merged.ParentTitle=="Africa"]
    df_merged_afro = df_merged[df_merged.ParentTitle=="Africa"][["Title", 'IndicatorCode', 'Dim1', 'TimeDim', 'NumericValue']]
    df_merged_afro.rename(columns={'Title': 'Country', 'IndicatorCode': 'Indicator_Name', 'Dim1': 'Options',
                               'TimeDim': 'Start_Period', 'NumericValue': 'Value_received' }, inplace=True)
    
    df_merged_afro.drop(df_merged_afro[df_merged_afro.Country.isin(['Saint Helena', 'Mayotte', 'Reunion'])].index, inplace=True)
    #df_merged_afro = df_merged_afro.dropna()
    
    df_merged_afro.loc[:,'Indicator_Name'] = Indicator_Name
    df_merged_afro.loc[:,'Measure_type'] = "Numeric"
    df_merged_afro.loc[:,'Source'] = SourceName
    df_merged_afro.loc[:,'Options'] = np.where(df_merged_afro.Options=="MLE", "Male",
                                           np.where(df_merged_afro.Options=="FMLE", "Female",
                                                    np.where(df_merged_afro.Options=="BTSX", "Both sexes",df_merged_afro.Options)))
    df_merged_afro.loc[:,'Options'] = np.where(df_merged_afro.Options=="URB", "Urban",
                                           np.where(df_merged_afro.Options=="RUR", "Rural",
                                                    np.where(df_merged_afro.Options=="TOTL", "Total",df_merged_afro.Options)))
    df_merged_afro.loc[:,'End_Period'] = df_merged_afro.loc[:,'Start_Period']
    
    #cols = ['Start_Period', 'End_Period']
    #df_merged_afro[cols] = df_merged_afro[cols].applymap(np.int32)
    #df_merged_afro
    
    df_merged_afro = df_merged_afro[["Country", 'Indicator_Name', 'Options', 'Start_Period', 'End_Period',
                                "Value_received", 'Measure_type', 'Source']]
    path = r'D:\DownloadTest2'

    # Check whether the specified path exists or not
    isExist = os.path.exists(path)
    
    if not isExist:
        # Create a new directory because it does not exist 
        os.makedirs(path)
    df_merged_afro.to_csv (path + '\\' + fileName + '.csv', index = False, header=True)

In [31]:
IndicatorGHO("WHOSIS_000002", "Healthy life expectancy (HALE) at birth", "Healthy life expectancy", "Global Health Observatory")

In [32]:
IndicatorGHO("WHOSIS_000015", "Life expectancy at age 60 (years)", "Life expectancy at age 60", "Global Health Observatory")

# Loop through all the indicators

In [46]:
ListOfIndicators = pd.read_csv(r'D:\PERSONNEL\OMS\TRAVAIL\Indicateur\IndicatorsCodeGHO.csv', encoding='iso-8859-1')
ListOfIndicators.head()

Unnamed: 0,IndicatorCodeGHO,Indicator,FileName,Source
0,WHOSIS_000001,Average life expectancy at birth (in years),file1,Global Health Observatory
1,WHOSIS_000002,Healthy life expectancy (HALE) at birth,file2,Global Health Observatory
2,WHOSIS_000015,Life expectancy at age 60 (years),file3,Global Health Observatory
3,MORTADO,Adolescent mortality rate (per 100 000 adolesc...,file4,Global Health Observatory
4,WHOSIS_000004,Adult mortality rate (probably of dying betwee...,file5,Global Health Observatory


In [47]:
for index, row in ListOfIndicators.iterrows():
    try:
        IndicatorGHO(row['IndicatorCodeGHO'], row['Indicator'], row['FileName'], row['Source'])
        print(f"The indicator number {index+1} is processed, please go and check to D:\DownloadTest2")
    except ValueError:
        print(f"The indicator number {index+1} is not processed because of error")

The indicator number 1 is processed, please go and check to D:\DownloadTest2
Well done!
The indicator number 2 is processed, please go and check to D:\DownloadTest2
Well done!
The indicator number 3 is processed, please go and check to D:\DownloadTest2
Well done!
The indicator number 4 is processed, please go and check to D:\DownloadTest2
Well done!
The indicator number 5 is processed, please go and check to D:\DownloadTest2
Well done!
The indicator number 6 is processed, please go and check to D:\DownloadTest2
Well done!
The indicator number 7 is processed, please go and check to D:\DownloadTest2
Well done!
The indicator number 8 is processed, please go and check to D:\DownloadTest2
Well done!
The indicator number 9 is processed, please go and check to D:\DownloadTest2
Well done!
The indicator number 10 is processed, please go and check to D:\DownloadTest2
Well done!
The indicator number 11 is processed, please go and check to D:\DownloadTest2
Well done!
The indicator number 12 is pro

The indicator number 94 is processed, please go and check to D:\DownloadTest2
Well done!
The indicator number 95 is processed, please go and check to D:\DownloadTest2
Well done!
The indicator number 96 is processed, please go and check to D:\DownloadTest2
Well done!
The indicator number 97 is processed, please go and check to D:\DownloadTest2
Well done!
The indicator number 98 is processed, please go and check to D:\DownloadTest2
Well done!
The indicator number 99 is processed, please go and check to D:\DownloadTest2
Well done!
The indicator number 100 is processed, please go and check to D:\DownloadTest2
Well done!
The indicator number 101 is processed, please go and check to D:\DownloadTest2
Well done!
The indicator number 102 is processed, please go and check to D:\DownloadTest2
Well done!
The indicator number 103 is processed, please go and check to D:\DownloadTest2
Well done!
