In [3]:
# Import required libraries
import pickle
import copy
import pathlib
import dash
import math
import datetime as dt
import pandas as pd
from dash.dependencies import Input, Output, State, ClientsideFunction
import dash_core_components as dcc
import dash_html_components as html
import plotly.graph_objects as go



In [4]:
from mapbox import Geocoder
mapbox_access_token = "pk.eyJ1IjoiamNyYW53ZWxsd2FyZCIsImEiOiJja2NkMW02aXcwYTl5MnFwbjdtdDB0M3oyIn0.zkIzPc4NSjLZvrY-DWrlZg"

geocoder = Geocoder(access_token=mapbox_access_token)

def geocode_address(address):
    """Geocode street address into lat/long."""
    response = geocoder.forward(address)
    coords = response.json()["features"][0]["center"]
    longitude = coords[0]
    latitude = coords[1]
    return dict(longitude=longitude, latitude=latitude)

In [5]:
sdmx_url = 'https://sdmx.data.unicef.org/ws/public/sdmxapi/rest/data/ECARO,TRANSMONEE,1.0/.{}....?format=csv'

In [15]:
codes = [
    "EDU_SDG_STU_L2_GLAST_MAT",
    "EDU_SDG_STU_L2_GLAST_REA",
    "EDU_SDG_STU_L1_GLAST_MAT",
    "EDU_SDG_STU_L1_G2OR3_MAT",
    "EDU_SDG_STU_L1_GLAST_REA",
    "EDU_SDG_STU_L1_G2OR3_REA",
    "EDU_SDG_GER_L01",
    "EDUNF_PRP_L02",
    "EDUNF_ROFST_L2",
    "EDU_SDG_QUTP_L02",
    "EDU_SDG_QUTP_L1",
    "EDU_SDG_QUTP_L2",
    "EDU_SDG_QUTP_L3",
    "EDU_SDG_TRTP_L02",
    "EDU_SDG_TRTP_L1",
    "EDU_SDG_TRTP_L2",
    "EDU_SDG_TRTP_L3",
    "EDUNF_ROFST_L1",
    "EDUNF_ROFST_L2",
    "EDUNF_ROFST_L3",
    "EDUNF_OFST_L1",
    "EDUNF_OFST_L2",
    "EDUNF_OFST_L3",
    "EDUNF_NIR_L1_ENTRYAGE",
    "EDUNF_CR_L3",
    "EDUNF_NER_L02",
    "EDUNF_NERA_L1_UNDER1",
    "EDUNF_NERA_L1",
    "EDUNF_NERA_L2",
    "EDUNF_GER_L1",
    "EDUNF_GER_L2",
    "EDUNF_GER_L3",
    "EDUNF_NIR_L1_ENTRYAGE",
    "EDUNF_STU_L1_TOT",
    "EDUNF_STU_L2_TOT",
    "EDUNF_STU_L3_TOT",
    "EDU_SDG_SCH_L1",
    "EDU_SDG_SCH_L2",
    "EDU_SDG_SCH_L3",
    "EDUNF_PRP_L02",
    "EDUNF_OFST_L1T3",
    "EDUNF_SAP_L1T3",
    "EDUNF_SAP_L02"
]
df = pd.DataFrame()
inds = set(codes)
for ind in inds:
    sdmx = pd.read_csv(sdmx_url.format(ind))
    sdmx["CODE"] = ind
    df = df.append(sdmx)

In [7]:
countries = df["Geographic area"].unique()

In [8]:
df = df.merge(
    right=pd.DataFrame(
        [dict(country=country, **geocode_address(country)) for country in countries]
    ),
    left_on='Geographic area',
    right_on='country'
)

In [41]:
df

Unnamed: 0,REF_AREA,Geographic area,INDICATOR,Indicator,SEX,Sex,AGE,Age,RESIDENCE,Residence,...,Unit of measure,OBS_FOOTNOTE,FREQ,Frequency,DATA_SOURCE,UNIT_MULTIPLIER,Unit multiplier,OBS_STATUS,Observation Status,CODE
0,ALB,Albania,EDU_SDG_TRTP_L1,Percentage of trained teachers in primary educ...,_T,Total,_T,Total,_T,Total,...,%,,1,One year,UIS: EDU_SDG_TRTP_L1,0,Units,A,Normal value,EDU_SDG_TRTP_L1
1,ARM,Armenia,EDU_SDG_TRTP_L1,Percentage of trained teachers in primary educ...,_T,Total,_T,Total,_T,Total,...,%,,1,One year,UIS: EDU_SDG_TRTP_L1,0,Units,A,Normal value,EDU_SDG_TRTP_L1
2,ARM,Armenia,EDU_SDG_TRTP_L1,Percentage of trained teachers in primary educ...,_T,Total,_T,Total,_T,Total,...,%,,1,One year,UIS: EDU_SDG_TRTP_L1,0,Units,A,Normal value,EDU_SDG_TRTP_L1
3,ARM,Armenia,EDU_SDG_TRTP_L1,Percentage of trained teachers in primary educ...,_T,Total,_T,Total,_T,Total,...,%,,1,One year,UIS: EDU_SDG_TRTP_L1,0,Units,A,Normal value,EDU_SDG_TRTP_L1
4,AZE,Azerbaijan,EDU_SDG_TRTP_L1,Percentage of trained teachers in primary educ...,_T,Total,_T,Total,_T,Total,...,%,,1,One year,UIS: EDU_SDG_TRTP_L1,0,Units,A,Normal value,EDU_SDG_TRTP_L1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127,SVK,Slovakia,EDU_SDG_STU_L1_GLAST_REA,Proportion of children at the end of primary e...,M,Male,_T,Total,_T,Total,...,%,,1,One year,UIS: EDU_SDG_STU_L1_GLAST_REA,0,Units,A,Normal value,EDU_SDG_STU_L1_GLAST_REA
128,SVK,Slovakia,EDU_SDG_STU_L1_GLAST_REA,Proportion of children at the end of primary e...,M,Male,_T,Total,_T,Total,...,%,,1,One year,UIS: EDU_SDG_STU_L1_GLAST_REA,0,Units,A,Normal value,EDU_SDG_STU_L1_GLAST_REA
129,SVK,Slovakia,EDU_SDG_STU_L1_GLAST_REA,Proportion of children at the end of primary e...,M,Male,_T,Total,_T,Total,...,%,,1,One year,UIS: EDU_SDG_STU_L1_GLAST_REA,0,Units,A,Normal value,EDU_SDG_STU_L1_GLAST_REA
130,SVK,Slovakia,EDU_SDG_STU_L1_GLAST_REA,Proportion of children at the end of primary e...,M,Male,_T,Total,_T,Total,...,%,,1,One year,UIS: EDU_SDG_STU_L1_GLAST_REA,0,Units,A,Normal value,EDU_SDG_STU_L1_GLAST_REA


In [23]:
data.merge(
    right=countries,
    left_on='Geographic area',
    right_on='name'
)

Unnamed: 0,CODE,Indicator,Geographic area,TIME_PERIOD,OBS_VALUE,name,longitude,latitude
0,EDUNF_OFST_L1,Number of out of school children of primary sc...,Albania,1999,19659.000000,Albania,20.156691,40.641090
1,EDUNF_OFST_L1,Number of out of school children of primary sc...,Albania,2000,21103.000000,Albania,20.156691,40.641090
2,EDUNF_OFST_L1,Number of out of school children of primary sc...,Albania,2001,21787.500000,Albania,20.156691,40.641090
3,EDUNF_OFST_L1,Number of out of school children of primary sc...,Albania,2003,21092.000000,Albania,20.156691,40.641090
4,EDUNF_OFST_L1,Number of out of school children of primary sc...,Albania,2004,22114.500000,Albania,20.156691,40.641090
...,...,...,...,...,...,...,...,...
2419,EDUNF_ROFST_L3,Out of school children rate: Upper secondary e...,Uzbekistan,2013,14.460020,Uzbekistan,63.169372,41.750437
2420,EDUNF_ROFST_L3,Out of school children rate: Upper secondary e...,Uzbekistan,2014,17.612487,Uzbekistan,63.169372,41.750437
2421,EDUNF_ROFST_L3,Out of school children rate: Upper secondary e...,Uzbekistan,2015,16.168617,Uzbekistan,63.169372,41.750437
2422,EDUNF_ROFST_L3,Out of school children rate: Upper secondary e...,Uzbekistan,2016,16.463327,Uzbekistan,63.169372,41.750437


In [10]:
# Create controls
county_options = [
    {"label": str(country), "value": str(country)} for country in data['Geographic area'].unique()
]

In [11]:
county_options

[{'label': 'Albania', 'value': 'Albania'},
 {'label': 'Armenia', 'value': 'Armenia'},
 {'label': 'Azerbaijan', 'value': 'Azerbaijan'},
 {'label': 'Bosnia and Herzegovina', 'value': 'Bosnia and Herzegovina'},
 {'label': 'Bulgaria', 'value': 'Bulgaria'},
 {'label': 'Belarus', 'value': 'Belarus'},
 {'label': 'Czechia', 'value': 'Czechia'},
 {'label': 'Estonia', 'value': 'Estonia'},
 {'label': 'Georgia', 'value': 'Georgia'},
 {'label': 'Croatia', 'value': 'Croatia'},
 {'label': 'Hungary', 'value': 'Hungary'},
 {'label': 'Kyrgyzstan', 'value': 'Kyrgyzstan'},
 {'label': 'Kazakhstan', 'value': 'Kazakhstan'},
 {'label': 'Lithuania', 'value': 'Lithuania'},
 {'label': 'Latvia', 'value': 'Latvia'},
 {'label': 'Republic of Moldova', 'value': 'Republic of Moldova'},
 {'label': 'Montenegro', 'value': 'Montenegro'},
 {'label': 'North Macedonia', 'value': 'North Macedonia'},
 {'label': 'Poland', 'value': 'Poland'},
 {'label': 'Romania', 'value': 'Romania'},
 {'label': 'Serbia', 'value': 'Serbia'},
 {'

In [11]:
years =[i for i in range(2010, 2020)]

indicators = df['Indicator'].unique()

{index: str(year) for index,year in enumerate(years)}

{0: '2010',
 1: '2011',
 2: '2012',
 3: '2013',
 4: '2014',
 5: '2015',
 6: '2016',
 7: '2017',
 8: '2018',
 9: '2019'}

In [14]:
countries = df["Geographic area"].unique()


In [101]:
numerator = "EDUNF_OFST_L1,EDUNF_OFST_L2,EDUNF_OFST_L3"
denominator = "EDUNF_SAP_L1T3"
absolute=True

In [102]:
query = (
    "CODE in @indicator & TIME_PERIOD in @years & `Geographic area` in @countries"
)
numors = numerator.split(",")
indicator = numors
# select last value for each country
indicator_values = (
    df.query(query)
    .groupby(
        [
            "CODE",
            "Indicator",
            "Geographic area",
            "UNIT_MEASURE",
        ]
    )
    .agg({"TIME_PERIOD": "last", "OBS_VALUE": "last"})
    .reset_index()
    .set_index(["Geographic area", "TIME_PERIOD"])
)
# select the avalible denominators for countiries in selected years
indicator = [denominator]
denominator_values = (
    df.query(query)
    .groupby(
        [
            "CODE",
            "Indicator",
            "Geographic area",
            "UNIT_MEASURE",
        ]
    )
    .agg({"TIME_PERIOD": "last", "OBS_VALUE": "last"})
    .reset_index()
    .set_index(["Geographic area", "TIME_PERIOD"])
)
# select only those denominators that match avalible indicators
denominators = denominator_values[
    denominator_values.index.isin(indicator_values.index)
]["OBS_VALUE"]

denominator_sum = denominators.to_numpy().sum()

indicator_sum = (
    indicator_values["OBS_VALUE"] * denominator_sum
    if absolute
    else (denominators / denominator_sum)
).dropna()  # will drop missing countires

sources = indicator_sum.index.tolist()


In [103]:
indicator_values

Unnamed: 0_level_0,Unnamed: 1_level_0,CODE,Indicator,UNIT_MEASURE,OBS_VALUE
Geographic area,TIME_PERIOD,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Albania,2013,EDUNF_OFST_L1,Number of out-of-school children of primary sc...,PS,6487.0
Armenia,2018,EDUNF_OFST_L1,Number of out-of-school children of primary sc...,PS,6050.0
Azerbaijan,2018,EDUNF_OFST_L1,Number of out-of-school children of primary sc...,PS,12763.0
Belarus,2018,EDUNF_OFST_L1,Number of out-of-school children of primary sc...,PS,1198.0
Bulgaria,2017,EDUNF_OFST_L1,Number of out-of-school children of primary sc...,PS,17828.0
...,...,...,...,...,...
Slovenia,2017,EDUNF_OFST_L3,Number of out-of-school youth of upper seconda...,PS,1687.0
Tajikistan,2011,EDUNF_OFST_L3,Number of out-of-school youth of upper seconda...,PS,53100.0
Turkey,2017,EDUNF_OFST_L3,Number of out-of-school youth of upper seconda...,PS,356792.0
Ukraine,2014,EDUNF_OFST_L3,Number of out-of-school youth of upper seconda...,PS,32722.0


In [91]:
denominator_values

Unnamed: 0_level_0,Unnamed: 1_level_0,CODE,Indicator,UNIT_MEASURE,OBS_VALUE
Geographic area,TIME_PERIOD,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Albania,2019,EDUNF_SAP_L1T3,Population of the official age for primary and...,PS,228553.0
Armenia,2019,EDUNF_SAP_L1T3,Population of the official age for primary and...,PS,243667.0
Azerbaijan,2018,EDUNF_SAP_L1T3,Population of the official age for primary and...,PS,875972.0
Belarus,2019,EDUNF_SAP_L1T3,Population of the official age for primary and...,PS,557932.0
Bulgaria,2019,EDUNF_SAP_L1T3,Population of the official age for primary and...,PS,421698.0
Croatia,2019,EDUNF_SAP_L1T3,Population of the official age for primary and...,PS,254948.0
Czechia,2019,EDUNF_SAP_L1T3,Population of the official age for primary and...,PS,706562.0
Estonia,2019,EDUNF_SAP_L1T3,Population of the official age for primary and...,PS,85295.0
Georgia,2019,EDUNF_SAP_L1T3,Population of the official age for primary and...,PS,308217.0
Hungary,2019,EDUNF_SAP_L1T3,Population of the official age for primary and...,PS,595141.0


In [104]:
indicator_sum

Geographic area  TIME_PERIOD
Albania          2013           3.526262e+10
Armenia          2018           3.288714e+10
Azerbaijan       2018           6.937828e+10
Belarus          2018           6.512197e+09
Bulgaria         2017           9.691106e+10
                                    ...     
Slovenia         2017           9.170348e+09
Tajikistan       2011           2.886458e+11
Turkey           2017           1.939482e+12
Ukraine          2014           1.778732e+11
Uzbekistan       2017           7.016322e+11
Name: OBS_VALUE, Length: 79, dtype: float64

In [105]:
sources

[('Albania', 2013),
 ('Armenia', 2018),
 ('Azerbaijan', 2018),
 ('Belarus', 2018),
 ('Bulgaria', 2017),
 ('Croatia', 2011),
 ('Czechia', 2014),
 ('Estonia', 2017),
 ('Georgia', 2016),
 ('Hungary', 2017),
 ('Kazakhstan', 2019),
 ('Kyrgyzstan', 2012),
 ('Latvia', 2017),
 ('Lithuania', 2017),
 ('Montenegro', 2017),
 ('North Macedonia', 2017),
 ('Poland', 2017),
 ('Republic of Moldova', 2018),
 ('Romania', 2017),
 ('Russian Federation', 2016),
 ('Serbia', 2018),
 ('Slovakia', 2017),
 ('Slovenia', 2014),
 ('Tajikistan', 2016),
 ('Turkey', 2017),
 ('Ukraine', 2014),
 ('Uzbekistan', 2014),
 ('Albania', 2018),
 ('Armenia', 2018),
 ('Azerbaijan', 2018),
 ('Belarus', 2018),
 ('Bulgaria', 2017),
 ('Croatia', 2013),
 ('Czechia', 2017),
 ('Estonia', 2017),
 ('Georgia', 2014),
 ('Hungary', 2017),
 ('Kazakhstan', 2013),
 ('Kyrgyzstan', 2018),
 ('Latvia', 2017),
 ('Lithuania', 2012),
 ('Montenegro', 2018),
 ('Poland', 2017),
 ('Republic of Moldova', 2018),
 ('Romania', 2017),
 ('Russian Federation', 2

In [106]:
indicator_sum.to_numpy().sum()

10217045236723.0

In [107]:
indicator_values.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,CODE,Indicator,UNIT_MEASURE,OBS_VALUE
Geographic area,TIME_PERIOD,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Albania,2013,EDUNF_OFST_L1,Number of out-of-school children of primary sc...,PS,6487.0
Armenia,2018,EDUNF_OFST_L1,Number of out-of-school children of primary sc...,PS,6050.0
Azerbaijan,2018,EDUNF_OFST_L1,Number of out-of-school children of primary sc...,PS,12763.0
Belarus,2018,EDUNF_OFST_L1,Number of out-of-school children of primary sc...,PS,1198.0
Bulgaria,2017,EDUNF_OFST_L1,Number of out-of-school children of primary sc...,PS,17828.0


In [51]:
# select the avalible denominators for countiries in selected years
indicator = [denominator]
denominator_values = (
    df.query(query)
    .groupby(["CODE", "Indicator", "Geographic area", "UNIT_MEASURE", "Unit of measure"])
    .agg({"TIME_PERIOD": "last", "OBS_VALUE": "last"})
    .reset_index()
    .set_index(["Geographic area", "TIME_PERIOD"])
)
denominator_values

Unnamed: 0_level_0,Unnamed: 1_level_0,CODE,Indicator,UNIT_MEASURE,Unit of measure,OBS_VALUE
Geographic area,TIME_PERIOD,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Albania,2018,EDUNF_SAP_L02,Population of the official age for pre-primary...,PS,Persons,52740.0
Armenia,2018,EDUNF_SAP_L02,Population of the official age for pre-primary...,PS,Persons,68787.0
Azerbaijan,2018,EDUNF_SAP_L02,Population of the official age for pre-primary...,PS,Persons,273802.0
Belarus,2018,EDUNF_SAP_L02,Population of the official age for pre-primary...,PS,Persons,182111.0
Bosnia and Herzegovina,2019,EDUNF_SAP_L02,Population of the official age for pre-primary...,PS,Persons,47710.0
Bulgaria,2018,EDUNF_SAP_L02,Population of the official age for pre-primary...,PS,Persons,143007.0
Croatia,2018,EDUNF_SAP_L02,Population of the official age for pre-primary...,PS,Persons,85284.0
Czechia,2018,EDUNF_SAP_L02,Population of the official age for pre-primary...,PS,Persons,171650.0
Estonia,2018,EDUNF_SAP_L02,Population of the official age for pre-primary...,PS,Persons,30532.0
Georgia,2018,EDUNF_SAP_L02,Population of the official age for pre-primary...,PS,Persons,89465.0


In [58]:
# select only those denominators that match avalible indicators
denominators = denominator_values[
    denominator_values.index.isin(indicator_values.index)
]["OBS_VALUE"]
denominators.index.tolist()


[('Albania', 2018),
 ('Azerbaijan', 2018),
 ('Belarus', 2018),
 ('Georgia', 2018),
 ('Kyrgyzstan', 2018),
 ('Montenegro', 2018),
 ('Republic of Moldova', 2018),
 ('Serbia', 2018),
 ('Ukraine', 2014)]

In [53]:
(indicator_values["OBS_VALUE"] * (denominators / denominator_sum))

Geographic area      TIME_PERIOD
Albania              2018           0.016135
Armenia              2018           0.021044
Azerbaijan           2018           0.083764
Belarus              2018           0.055713
Kazakhstan           2019           0.248078
Kyrgyzstan           2018           0.094495
Montenegro           2018           0.003555
Republic of Moldova  2018           0.024046
Serbia               2018           0.041473
Uzbekistan           2018           0.411699
Name: OBS_VALUE, dtype: float64

In [37]:

indicator_sum = (indicator_values["OBS_VALUE"] * (denominators / denominators.to_numpy().sum())).dropna().to_numpy().sum()


In [38]:
indicator_sum

44.437160891981755