In [1]:
import requests, json, os, warnings, re
import pandas as pd
from functools import reduce

In [2]:
domain = "https://api.worldbank.org" 

In [3]:
def get_json_data(indicator, page):
    """
    
    """
    data_json = []
    for i in range(1, page):
        request = requests.get(domain + "/v2/country/all/indicator/" + 
                               indicator + "?format=json&page=" + str(i))
        data_json.extend(request.json()[1])
    return data_json

In [4]:
def process_json_data(json_data, year, label):
    """
    """
    df = pd.DataFrame(json_data)
    df = df[df['date'] == year]
    df['country'] = df['country'].apply(lambda x: x['value'])
    df = df[['country', 'value']]
    df.columns = ['country', label]
    
    return df

In [11]:
df_1564 = get_json_data("SP.POP.1564.TO.ZS", 318)

In [26]:
df_64up = get_json_data("SP.POP.65UP.TO.ZS", 318)

In [15]:
df_1564 = process_json_data(df_1564, '2018', 'age_1564')

In [33]:
df_64up = process_json_data(df_64up, '2018', 'age_64up')

In [39]:
age_comp = pd.merge(df_1564, df_64up, on='country')
age_comp['age_0014'] = 100 - (age_comp['age_1564'] + age_comp['age_64up'])

In [43]:
age_comp.to_csv('../data/clean_data/age_composition_2018.csv', index=False)

In [44]:
df_smok = get_json_data("SH.PRV.SMOK", 318)

In [50]:
df_smok = process_json_data(df_smok, '2016', 'smok')

In [51]:
df_smok.to_csv('../data/clean_data/smoking_prevalence_2016.csv', index=False)

In [52]:
df_air = get_json_data("EN.ATM.PM25.MC.M3", 318)

In [55]:
df_air = process_json_data(df_air, '2017', 'air_polution')

In [56]:
df_air.to_csv('../data/clean_data/air_polution_2017.csv', index=False)

In [57]:
df_bed = get_json_data("SH.MED.BEDS.ZS", 318)

In [82]:
df_bed = process_json_data(df_bed, '2013', 'hosp_bed')

In [83]:
df_bed.to_csv('../data/clean_data/hospital_bed_2013.csv', index=False)

In [68]:
df_phy = get_json_data("SH.MED.PHYS.ZS", 318)

In [84]:
df_phy = process_json_data(df_phy, '2015', 'doctor')

In [85]:
df_phy.to_csv('../data/clean_data/physicians_2015.csv', index=False)

In [86]:
df_nurse = get_json_data("SH.MED.NUMW.P3", 318)

In [95]:
df_nurse = process_json_data(df_nurse, '2015', 'nurse_midwivies').dropna()

In [96]:
df_nurse.to_csv('../data/clean_data/nurse_midwivies_2015.csv', index=False)

In [None]:
df_pop = get_json_data("SP.POP.TOTL", 318)

In [50]:
df_pop = process_json_data(df_pop, '2018', 'pop_total')

In [51]:
df_pop.to_csv('../data/clean_data/total_population_2018.csv', index=False)

In [44]:
df_pop_dens = get_json_data("EN.POP.DNST", 318)

In [50]:
df_pop_dens = process_json_data(df_pop_dens, '2016', 'smok')

In [51]:
df_pop_dens.to_csv('../data/clean_data/population_density_2016.csv', index=False)

In [112]:
df_comb = reduce(lambda left, right: pd.merge(left, right, how='left', on='country'),
                 [age_comp, df_smok, df_air, df_phy, df_nurse, df_pop, df_pop_dens])

In [113]:
df_comb = df_comb.sort_values(by='country')

In [114]:
df_comb.to_csv('../data/clean_data/country_data.csv', index=False)