In [None]:
# import dependencies
import requests
import json
import pandas as pd
from matplotlib import pyplot as plt
from scipy import stats as st

# WHO Data Exploration

In [None]:
# Entry point for WHO's indicators
who_url = 'https://ghoapi.azureedge.net/api/Indicator'

# Initialize variable to loop through indicator list
index = 0

# Read API and print out name of every indicator with its index value
who_data = requests.get(who_url).json()
for indicator in who_data['value']:
    print( index, indicator['IndicatorName'])
    index += 1

In [None]:
who_data['value'][665]

In [None]:
# Create entry point for suicide rate data
sui_url = 'https://ghoapi.azureedge.net/api/MH_12'

In [None]:
# Read data from API
sui_data = requests.get(sui_url).json()
sui_data

In [None]:
len(sui_data['value'])

In [None]:
# Initialize dictionary
sui_dict = {'iso3': [], 'year': [], 'suicide rate': [], 'sex': []}

In [None]:
# Loop through json items to store data in dictionary
for entry in sui_data['value']:
    sui_dict['iso3'].append(entry['SpatialDim'])
    sui_dict['year'].append(entry['TimeDim'])
    sui_dict['suicide rate'].append(entry['NumericValue'])
    sui_dict['sex'].append(entry['Dim1'])

In [None]:
# Create dataframe
sui_df = pd.DataFrame(sui_dict)
sui_df.head()

In [None]:
# Only want both sex values - loc 'sex' == 'BTSX', then drop sex column it's not needed
sui_df = sui_df.loc[sui_df['sex'] == 'BTSX']
sui_df = sui_df.drop(columns = 'sex')

In [None]:
# Number of countries and number of years with data for each country - 194 countries/regions with 20 years of data
sui_df['iso3'].value_counts()

In [None]:
sui_df.info()

In [None]:
index_sui = sui_df.set_index('iso3')

In [None]:
# remove entries of unneeded years from the data set
index_sui = index_sui.loc[(index_sui['year'] > 2009)]
index_sui = index_sui.loc[(index_sui['year'] < 2020)]
cleaned_sui_df = index_sui.reset_index()

# check to make sure only the needed entries remain
cleaned_sui_df['year'].value_counts()

# IHDI DataFrame

In [None]:
# Designate IHDI data location
ihdi_data_path = 'Resources/country_ihdi_UNDP.csv'
# Read IHDI data
ihdi_data = pd.read_csv(ihdi_data_path)
ihdi_data

In [None]:
# Check number of countries 
ihdi_data['iso3'].value_counts()

In [None]:
ihdi_data.tail(15)

In [None]:

cleaned_ihdi = ihdi_data.drop(ihdi_data.index[195:])

cleaned_ihdi.tail()

In [None]:
# create new dataframe with just hdi / ihdi entries
# create dictionary with iso3 code, country, and hdi/ihdi values
ihdi_dic = {'iso3': [], 'country': [], "hdi_2010" : [], "hdi_2011" : [], "hdi_2012" : [], "hdi_2013" : [], "hdi_2014" : [], "hdi_2015" : [], "hdi_2016" : [], "hdi_2017" : [], "hdi_2018" : [], "hdi_2019" : [], "ihdi_2010" : [], "ihdi_2011" : [], "ihdi_2012" : [], "ihdi_2013" : [], "ihdi_2014" : [], "ihdi_2015" : [], "ihdi_2016" : [], "ihdi_2017" : [], "ihdi_2018" : [], "ihdi_2019" : []}
for entry in range(len(cleaned_ihdi)):
    ihdi_dic['iso3'].append(cleaned_ihdi.loc[entry, 'iso3'])
    ihdi_dic['country'].append(cleaned_ihdi.loc[entry, 'country'])
    ihdi_dic['hdi_2010'].append(cleaned_ihdi.loc[entry, 'hdi_2010'])
    ihdi_dic['hdi_2011'].append(cleaned_ihdi.loc[entry, 'hdi_2011'])
    ihdi_dic['hdi_2012'].append(cleaned_ihdi.loc[entry, 'hdi_2012'])
    ihdi_dic['hdi_2013'].append(cleaned_ihdi.loc[entry, 'hdi_2013'])
    ihdi_dic['hdi_2014'].append(cleaned_ihdi.loc[entry, 'hdi_2014'])
    ihdi_dic['hdi_2015'].append(cleaned_ihdi.loc[entry, 'hdi_2015'])
    ihdi_dic['hdi_2016'].append(cleaned_ihdi.loc[entry, 'hdi_2016'])
    ihdi_dic['hdi_2017'].append(cleaned_ihdi.loc[entry, 'hdi_2017'])
    ihdi_dic['hdi_2018'].append(cleaned_ihdi.loc[entry, 'hdi_2018'])
    ihdi_dic['hdi_2019'].append(cleaned_ihdi.loc[entry, 'hdi_2019'])
    ihdi_dic['ihdi_2010'].append(cleaned_ihdi.loc[entry, 'ihdi_2010'])
    ihdi_dic['ihdi_2011'].append(cleaned_ihdi.loc[entry, 'ihdi_2011'])
    ihdi_dic['ihdi_2012'].append(cleaned_ihdi.loc[entry, 'ihdi_2012'])
    ihdi_dic['ihdi_2013'].append(cleaned_ihdi.loc[entry, 'ihdi_2013'])
    ihdi_dic['ihdi_2014'].append(cleaned_ihdi.loc[entry, 'ihdi_2014'])
    ihdi_dic['ihdi_2015'].append(cleaned_ihdi.loc[entry, 'ihdi_2015'])
    ihdi_dic['ihdi_2016'].append(cleaned_ihdi.loc[entry, 'ihdi_2016'])
    ihdi_dic['ihdi_2017'].append(cleaned_ihdi.loc[entry, 'ihdi_2017'])
    ihdi_dic['ihdi_2018'].append(cleaned_ihdi.loc[entry, 'ihdi_2018'])
    ihdi_dic['ihdi_2019'].append(cleaned_ihdi.loc[entry, 'ihdi_2019'])
cleaned_ihdiv2 = pd.DataFrame(ihdi_dic)  

In [None]:
cleaned_ihdiv2

In [None]:
# separate year from hdi
hdi = cleaned_ihdiv2.melt(id_vars = ['iso3', 'country'], value_vars = ['hdi_2010', 'hdi_2011', 'hdi_2012', 'hdi_2013',
       'hdi_2014', 'hdi_2015', 'hdi_2016', 'hdi_2017', 'hdi_2018', 'hdi_2019'], var_name = 'year', value_name = 'hdi')

In [None]:
hdi['year'] = hdi['year'].str.split('_',expand=True)[1]
hdi.head()

In [None]:
# separate year from ihdi
ihdi = cleaned_ihdiv2.melt(id_vars = ['iso3', 'country'], value_vars = ['ihdi_2010', 'ihdi_2011', 'ihdi_2012', 'ihdi_2013',
       'ihdi_2014', 'ihdi_2015', 'ihdi_2016', 'ihdi_2017', 'ihdi_2018', 'ihdi_2019'], var_name = 'year', value_name = 'ihdi')
ihdi.head()

In [None]:
ihdi['year'] = ihdi['year'].str.split('_', expand=True)[1]
ihdi.head()

In [None]:
# merge dataframes with separated hdi/ihdi and year entries
final_ihdi_df = pd.merge(ihdi, hdi, on=['iso3','country','year'], how = 'inner')

In [None]:
final_ihdi_df.info()

In [None]:
# set 'year' type to int64 (for merging with cleaned_sui_df)
final_ihdi_df = final_ihdi_df.astype({'year': 'int64'})

# Merge DataFrames

In [None]:
# merge cleaned_sui_df with final_ihdi_df on year and iso3 code
sui_vs_ihdi = pd.merge(cleaned_sui_df, final_ihdi_df, on=['year','iso3'], how='inner')
sui_vs_ihdi

In [None]:
# finding how many non-null entries
sui_vs_ihdi.info()

In [None]:
# dropping null entries
sui_vs_ihdi.dropna(inplace=True)

In [None]:
sui_vs_ihdi.info()

In [None]:
# checking how many entries for each year
sui_vs_ihdi['year'].value_counts()

# Visualize and Analyze HDI vs. Suicide Rates

In [None]:
# create scatter plot of hdi vs suicide rate
plt.scatter(sui_vs_ihdi['hdi'], sui_vs_ihdi['suicide rate'], s=1)
plt.xlim(0, max(sui_vs_ihdi['hdi']))
plt.ylim(0, max(sui_vs_ihdi['suicide rate']))

In [None]:
m, b, r, p, e = st.linregress(sui_vs_ihdi['hdi'], sui_vs_ihdi['suicide rate'])

In [None]:
m, b

In [None]:
sui_hdi_regress = m * sui_vs_ihdi['hdi'] + b

hdi_r_squared_str = "{}\u00b2".format('r')
hdi_p_r_val_str = f'pvalue={"{:.2e}".format(p)}, {hdi_r_squared_str}={round(r*r,2)}'

plt.scatter(sui_vs_ihdi['hdi'], sui_vs_ihdi['suicide rate'], s=1)
plt.annotate(hdi_p_r_val_str,(0.6, 100), c='r')
plt.xlabel('HDI Rating')
plt.ylabel('Suicide Rate (%)')
plt.title('HDI Rating of Country vs Suicide Rate')
plt.plot(sui_vs_ihdi['hdi'], sui_hdi_regress, c='red')
plt.savefig('Images/HDI_vs_Suicide_Rate.jpg', format='jpg')
plt.show()

In [None]:
figure, axeses = plt.subplots(nrows=2, ncols=5, figsize=(15,10), sharex=True, sharey=True)
sui_vs_ihdi.sort_values('year', inplace=True)
for axes, year in zip(axeses.flat, sui_vs_ihdi['year'].unique()):
    sui_vs_hdi_by_year = sui_vs_ihdi.loc[sui_vs_ihdi['year'] == year]
    m, b, r, p, e = st.linregress(sui_vs_hdi_by_year['hdi'], sui_vs_hdi_by_year['suicide rate'])
    best_fit = m*sui_vs_hdi_by_year['hdi'] + b
    best_fit_eq = f'y={round(m,1)}*x+{round(b)}'
    axes.plot(sui_vs_hdi_by_year['hdi'], best_fit, c='red')
    axes.annotate(best_fit_eq, (10, 50), c='red')
    axes.set_title(year)
    axes.scatter(sui_vs_hdi_by_year['hdi'], sui_vs_hdi_by_year['suicide rate'],  s=1)

# Visualize and Analyze IHDI vs. Suicide Rates

In [None]:
plt.scatter(sui_vs_ihdi['ihdi'], sui_vs_ihdi['suicide rate'], s=1)
plt.xlim(0, max(sui_vs_ihdi['ihdi']))
plt.ylim(0, max(sui_vs_ihdi['suicide rate']))

In [None]:
m, b, r, p, e = st.linregress(sui_vs_ihdi['ihdi'], sui_vs_ihdi['suicide rate'])

In [None]:
m, b

In [None]:
sui_ihdi_regress = m * sui_vs_ihdi['ihdi'] + b
ihdi_r_squared_str = "{}\u00b2".format('r')
ihdi_p_r_val_str = f'pvalue={"{:.2e}".format(p)}, {ihdi_r_squared_str}={round(r*r,2)}'

plt.scatter(sui_vs_ihdi['ihdi'], sui_vs_ihdi['suicide rate'], s=1)
plt.plot(sui_vs_ihdi['ihdi'], sui_ihdi_regress, c='red')
plt.annotate(ihdi_p_r_val_str,(0.5, 100), c='r')
plt.xlabel('IHDI Rating')
plt.ylabel('Suicide Rate (%)')
plt.title('IHDI Rating of Country vs Suicide Rate')
plt.savefig('Images/IHDI_vs_Suicide_Rate.png', format='png')
plt.show()

In [None]:
figure, axeses = plt.subplots(nrows=2, ncols=5, figsize=(15,10), sharex=True, sharey=True)
sui_vs_ihdi.sort_values('year', inplace=True)
for axes, year in zip(axeses.flat, sui_vs_ihdi['year'].unique()):
    sui_vs_ihdi_by_year = sui_vs_ihdi.loc[sui_vs_ihdi['year'] == year]
    m, b, r, p, e = st.linregress(sui_vs_ihdi_by_year['ihdi'], sui_vs_ihdi_by_year['suicide rate'])
    best_fit = m*sui_vs_ihdi_by_year['ihdi'] + b
    best_fit_eq = f'y={round(m,1)}*x+{round(b)}'
    axes.plot(sui_vs_ihdi_by_year['ihdi'], best_fit, c='red')
    axes.annotate(best_fit_eq, (10, 50), c='red')
    axes.set_title(year)
    axes.scatter(sui_vs_ihdi_by_year['ihdi'], sui_vs_ihdi_by_year['suicide rate'],  s=1)