In [60]:
# import dependencies
import requests
import json
import pandas as pd
from matplotlib import pyplot as plt
from scipy import stats as st

# WHO Data Exploration

In [61]:
# Entry point for WHO's indicators
who_url = 'https://ghoapi.azureedge.net/api/Indicator'

# Initialize variable to loop through indicator list
index = 0

# Read API and print out name of every indicator with its index value
who_data = requests.get(who_url).json()
for indicator in who_data['value']:
    print( index, indicator['IndicatorName'])
    index += 1

0 Ambient air pollution  attributable DALYs per 100'000 children under 5 years
1 Household air pollution attributable deaths
2 Household air pollution attributable deaths in children under 5 years
3 Household air pollution attributable deaths per 100'000 capita
4 Household air pollution  attributable deaths per 100'000 children under 5 years
5 Household air pollution attributable DALYs
6 Household air pollution attributable DALYs in children under 5 years
7 Household air pollution attributable DALYs (per 100 000 population)
8 Household air pollution  attributable DALYs per 100'000 children under 5 years
9 Household air pollution attributable DALYs (per 100 000, age-standardized)
10 Ambient air pollution attributable deaths in children under 5 years
11 Ambient air pollution attributable deaths
12 Ambient air pollution attributable death rate (per 100 000 population, age-standardized)
13 Ambient air pollution attributable DALYs
14 DALYs attributable to ambient air pollution (age-standard

1140 Pharmacotherapy used for the management of alcohol withdrawal
1141 Special housing services for alcohol use disorders
1142 Pharmacotherapy used for the management of benzodiazepine withdrawal
1143 Special housing services for drug use disorders
1144 Psychoactive substance causing entry into treatment
1145 Pharmacotherapy used for the management of cannabis withdrawal
1146 Employment services for alcohol use disorders
1147 Essential list of medicines
1148 Employment services for drug use disorders
1149 Essential list of medicines, pharmacotherapy for substance use disorders
1150 Open access interventions for alcohol
1151 Government unit for substance use disorder prevention
1152 Open access interventions for drugs
1153 Budget line for substance use disorder prevention
1154 Health professionals providing treatment for alcohol and drug use disorders
1155 Funding method for substance use disorder prevention
1156 Standards of care for professionals providing treatment for alcohol and d

In [62]:
who_data['value'][665]

{'IndicatorCode': 'MH_12',
 'IndicatorName': 'Age-standardized suicide rates (per 100 000 population)',
 'Language': 'EN'}

In [63]:
# Create entry point for suicide rate data
sui_url = 'https://ghoapi.azureedge.net/api/MH_12'

In [64]:
# Read data from API
sui_data = requests.get(sui_url).json()
sui_data

{'@odata.context': 'https://ghoapi.azureedge.net/api/$metadata#MH_12',
 'value': [{'Id': 19629382,
   'IndicatorCode': 'MH_12',
   'SpatialDimType': 'REGION',
   'SpatialDim': 'GLOBAL',
   'TimeDimType': 'YEAR',
   'TimeDim': 2016,
   'Dim1Type': 'SEX',
   'Dim1': 'BTSX',
   'Dim2Type': None,
   'Dim2': None,
   'Dim3Type': None,
   'Dim3': None,
   'DataSourceDimType': None,
   'DataSourceDim': None,
   'Value': '10.53',
   'NumericValue': 10.5328,
   'Low': None,
   'High': None,
   'Comments': None,
   'Date': '2018-07-17T08:37:08.217+02:00',
   'TimeDimensionValue': '2016',
   'TimeDimensionBegin': '2016-01-01T00:00:00+01:00',
   'TimeDimensionEnd': '2016-12-31T00:00:00+01:00'},
  {'Id': 25257364,
   'IndicatorCode': 'MH_12',
   'SpatialDimType': 'COUNTRY',
   'SpatialDim': 'AFG',
   'TimeDimType': 'YEAR',
   'TimeDim': 2003,
   'Dim1Type': 'SEX',
   'Dim1': 'FMLE',
   'Dim2Type': None,
   'Dim2': None,
   'Dim3Type': None,
   'Dim3': None,
   'DataSourceDimType': None,
   'DataSou

In [65]:
len(sui_data['value'])

11641

In [66]:
# Initialize dictionary
sui_dict = {'country': [], 'year': [], 'suicide rate': [], 'sex': []}

In [67]:
# Loop through json items to store data
for entry in sui_data['value']:
    sui_dict['country'].append(entry['SpatialDim'])
    sui_dict['year'].append(entry['TimeDim'])
    sui_dict['suicide rate'].append(entry['NumericValue'])
    sui_dict['sex'].append(entry['Dim1'])

In [68]:
# Create dataframe
sui_df = pd.DataFrame(sui_dict)
sui_df.head()

Unnamed: 0,country,year,suicide rate,sex
0,GLOBAL,2016,10.5328,BTSX
1,AFG,2003,7.6,FMLE
2,AFG,2007,7.11,FMLE
3,AFG,2006,7.31,FMLE
4,AFG,2005,7.44,FMLE


In [69]:
# Only want both sex values - loc 'sex' == 'BTSX', then drop sex column it's not needed
sui_df = sui_df.loc[sui_df['sex'] == 'BTSX']
sui_df = sui_df.drop(columns = 'sex')

In [70]:
# Number of countries and number of years with data for each country - 194 countries/regions with 20 years of data
sui_df['country'].value_counts()

GLOBAL    21
SLB       20
NLD       20
NOR       20
NPL       20
          ..
GRC       20
GRD       20
GTM       20
GUY       20
WPR       20
Name: country, Length: 194, dtype: int64

In [71]:
sui_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3881 entries, 0 to 11640
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   country       3881 non-null   object 
 1   year          3881 non-null   int64  
 2   suicide rate  3881 non-null   float64
dtypes: float64(1), int64(1), object(1)
memory usage: 121.3+ KB


# IHDI DataFrame

In [72]:
# Designate IHDI data location
ihdi_data_path = '../test/IHDI_time_series.csv'
# Read IHDI data
ihdi_data = pd.read_csv(ihdi_data_path)
ihdi_data

Unnamed: 0,iso3,country,hdicode,region,hdi_2010,hdi_2011,hdi_2012,hdi_2013,hdi_2014,hdi_2015,...,ineq_inc_2010,ineq_inc_2011,ineq_inc_2012,ineq_inc_2013,ineq_inc_2014,ineq_inc_2015,ineq_inc_2016,ineq_inc_2017,ineq_inc_2018,ineq_inc_2019
0,AFG,Afghanistan,Low,SA,0.472,0.477,0.489,0.496,0.500,0.500,...,,,,10.800000,10.800000,10.800000,10.800000,10.800000,,
1,AGO,Angola,Medium,SSA,0.517,0.533,0.544,0.555,0.565,0.572,...,36.400000,50.000000,50.000000,50.000000,28.900000,28.900000,28.900000,28.900000,28.900000,28.900000
2,ALB,Albania,High,ECA,0.745,0.764,0.775,0.782,0.787,0.788,...,14.400000,18.300000,18.300000,18.300000,18.300000,18.300000,18.300000,12.200000,13.200000,13.178980
3,AND,Andorra,Very High,,0.837,0.836,0.858,0.856,0.863,0.862,...,,,,,,,,,,
4,ARE,United Arab Emirates,Very High,AS,0.820,0.826,0.832,0.838,0.847,0.859,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201,,Europe and Central Asia,,,0.739,0.748,0.755,0.765,0.772,0.775,...,18.571553,17.728896,18.654602,16.922236,16.584963,16.735295,17.080308,16.725733,16.401101,17.190203
202,,Latin America and the Caribbean,,,0.736,0.741,0.745,0.752,0.756,0.759,...,36.317271,39.265425,38.434441,36.289661,35.247378,34.914246,34.561079,33.258924,34.053272,34.938681
203,,South Asia,,,0.580,0.588,0.597,0.603,0.612,0.620,...,14.526296,15.013335,15.849408,17.890255,17.828920,17.805116,21.306943,17.609085,18.377523,18.464559
204,,Sub-Saharan Africa,,,0.501,0.508,0.515,0.525,0.530,0.535,...,26.041262,28.421803,30.407189,28.194985,27.474611,27.405337,27.043387,27.728122,27.653806,27.620839


In [73]:
# Check number of countries 
ihdi_data['iso3'].value_counts()

AFG    1
QAT    1
MYS    1
NAM    1
NER    1
      ..
GNQ    1
GRC    1
GRD    1
GTM    1
ZWE    1
Name: iso3, Length: 195, dtype: int64

In [95]:
ihdi_data.tail(15)

Unnamed: 0,iso3,country,hdicode,region,hdi_2010,hdi_2011,hdi_2012,hdi_2013,hdi_2014,hdi_2015,...,ineq_inc_2010,ineq_inc_2011,ineq_inc_2012,ineq_inc_2013,ineq_inc_2014,ineq_inc_2015,ineq_inc_2016,ineq_inc_2017,ineq_inc_2018,ineq_inc_2019
191,YEM,Yemen,Low,AS,0.506,0.506,0.504,0.509,0.502,0.483,...,17.6,17.6,17.6,17.6,20.6,20.6,20.6,21.8,21.8,21.8
192,ZAF,South Africa,High,SSA,0.664,0.665,0.675,0.685,0.693,0.701,...,40.9,,,,57.3,56.4,56.4,56.4,57.7,56.996
193,ZMB,Zambia,Medium,SSA,0.527,0.534,0.549,0.557,0.561,0.569,...,20.8,20.8,42.6,42.6,42.6,48.6,48.6,48.6,48.6,44.84022
194,ZWE,Zimbabwe,Medium,SSA,0.482,0.499,0.525,0.537,0.547,0.553,...,34.5,34.5,35.8,35.8,35.8,35.8,27.0,27.0,27.0,28.76901
195,,Very high human development,,,0.87,0.874,0.877,0.882,0.885,0.889,...,19.833915,21.067764,19.301829,22.126533,21.765491,19.97138,21.012903,20.366883,19.445313,20.401155
196,,High human development,,,0.705,0.711,0.718,0.725,0.73,0.735,...,28.060905,28.506207,28.361115,28.376503,28.962528,29.588516,29.328523,25.814592,27.515909,28.024197
197,,Medium human development,,,0.571,0.579,0.587,0.593,0.601,0.609,...,16.178684,17.045227,17.996849,18.816762,18.530211,18.449152,21.35296,18.733478,19.661378,19.744047
198,,Low human development,,,0.468,0.475,0.481,0.491,0.497,0.5,...,24.343847,27.214562,29.049161,25.577953,23.381376,23.49151,24.13374,24.528348,24.938316,25.085119
199,,Arab States,,,0.676,0.68,0.687,0.686,0.687,0.691,...,17.769818,17.835611,17.602896,17.331168,17.711421,26.235268,28.512269,26.063087,25.443312,25.3612
200,,East Asia and the Pacific,,,0.688,0.697,0.704,0.711,0.718,0.724,...,27.064922,27.181363,27.179944,27.04714,27.421614,27.380111,26.967756,23.065155,25.570667,26.186272


In [98]:
# Remove 'NaN' entries
cleaned_ihdi = ihdi_data.drop(ihdi_data.index[195:])
# Check to make sure 'NaN' entries are removed
cleaned_ihdi.tail(15)

Unnamed: 0,iso3,country,hdicode,region,hdi_2010,hdi_2011,hdi_2012,hdi_2013,hdi_2014,hdi_2015,...,ineq_inc_2010,ineq_inc_2011,ineq_inc_2012,ineq_inc_2013,ineq_inc_2014,ineq_inc_2015,ineq_inc_2016,ineq_inc_2017,ineq_inc_2018,ineq_inc_2019
180,TZA,Tanzania (United Republic of),Low,SSA,0.481,0.487,0.496,0.497,0.504,0.514,...,17.6,20.6,20.9,20.9,22.7,22.7,22.7,22.4,22.4,22.4
181,UGA,Uganda,Low,SSA,0.498,0.504,0.507,0.513,0.519,0.525,...,26.4,29.1,29.1,27.3,27.3,27.3,27.3,24.2,24.9,24.940388
182,UKR,Ukraine,High,ECA,0.755,0.76,0.764,0.767,0.771,0.765,...,10.4,10.9,10.9,10.9,9.2,9.2,9.2,8.5,8.5,8.5
183,URY,Uruguay,Very High,LAC,0.782,0.789,0.793,0.8,0.803,0.806,...,26.3,27.8,27.9,27.1,24.2,26.2,26.4,25.3,22.0,23.357915
184,USA,United States,Very High,,0.916,0.919,0.92,0.918,0.92,0.921,...,23.5,32.4,24.1,35.6,35.6,27.0,27.0,28.1,26.6,27.144
185,UZB,Uzbekistan,High,ECA,0.669,0.676,0.685,0.692,0.696,0.701,...,17.9,17.9,20.1,20.1,20.1,20.1,20.1,,,
186,VCT,Saint Vincent and the Grenadines,High,LAC,0.718,0.721,0.72,0.723,0.733,0.733,...,,,,,,,,,,
187,VEN,Venezuela (Bolivarian Republic of),High,LAC,0.757,0.769,0.772,0.777,0.775,0.769,...,32.0,44.9,44.9,28.4,28.4,28.4,28.4,25.2,25.2,25.2
188,VNM,Viet Nam,High,EAP,0.661,0.671,0.676,0.681,0.683,0.688,...,18.2,11.4,11.4,14.6,22.0,21.4,21.4,21.4,18.1,19.133
189,VUT,Vanuatu,Medium,EAP,0.59,0.591,0.591,0.593,0.594,0.598,...,,,,18.5,18.5,18.5,18.5,19.7,19.7,19.7


In [103]:
cleaned_ihdi

Unnamed: 0,iso3,country,hdicode,region,hdi_2010,hdi_2011,hdi_2012,hdi_2013,hdi_2014,hdi_2015,...,ineq_inc_2010,ineq_inc_2011,ineq_inc_2012,ineq_inc_2013,ineq_inc_2014,ineq_inc_2015,ineq_inc_2016,ineq_inc_2017,ineq_inc_2018,ineq_inc_2019
0,AFG,Afghanistan,Low,SA,0.472,0.477,0.489,0.496,0.500,0.500,...,,,,10.8,10.8,10.8,10.8,10.8,,
1,AGO,Angola,Medium,SSA,0.517,0.533,0.544,0.555,0.565,0.572,...,36.4,50.0,50.0,50.0,28.9,28.9,28.9,28.9,28.9,28.90000
2,ALB,Albania,High,ECA,0.745,0.764,0.775,0.782,0.787,0.788,...,14.4,18.3,18.3,18.3,18.3,18.3,18.3,12.2,13.2,13.17898
3,AND,Andorra,Very High,,0.837,0.836,0.858,0.856,0.863,0.862,...,,,,,,,,,,
4,ARE,United Arab Emirates,Very High,AS,0.820,0.826,0.832,0.838,0.847,0.859,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
190,WSM,Samoa,High,EAP,0.698,0.701,0.698,0.700,0.703,0.707,...,,,,,,,,,,
191,YEM,Yemen,Low,AS,0.506,0.506,0.504,0.509,0.502,0.483,...,17.6,17.6,17.6,17.6,20.6,20.6,20.6,21.8,21.8,21.80000
192,ZAF,South Africa,High,SSA,0.664,0.665,0.675,0.685,0.693,0.701,...,40.9,,,,57.3,56.4,56.4,56.4,57.7,56.99600
193,ZMB,Zambia,Medium,SSA,0.527,0.534,0.549,0.557,0.561,0.569,...,20.8,20.8,42.6,42.6,42.6,48.6,48.6,48.6,48.6,44.84022
