In [1]:
# Import required libraries
import pickle
import copy
import pathlib
import dash
import math
import datetime as dt
import pandas as pd
from dash.dependencies import Input, Output, State, ClientsideFunction
import dash_core_components as dcc
import dash_html_components as html
import plotly.graph_objects as go



In [None]:
from mapbox import Geocoder
mapbox_access_token = "pk.eyJ1IjoiamNyYW53ZWxsd2FyZCIsImEiOiJja2NkMW02aXcwYTl5MnFwbjdtdDB0M3oyIn0.zkIzPc4NSjLZvrY-DWrlZg"

geocoder = Geocoder(access_token=mapbox_access_token)

def geocode_address(address):
    """Geocode street address into lat/long."""
    response = geocoder.forward(address)
    coords = response.json()["features"][0]["center"]
    longitude = coords[0]
    latitude = coords[1]
    return dict(longitude=longitude, latitude=latitude)

In [None]:
sdmx_url = 'https://sdmx.data.unicef.org/ws/public/sdmxapi/rest/data/ECARO,TRANSMONEE,1.0/.{}....?format=csv'

In [None]:
codes = [
    "EDU_SDG_STU_L2_GLAST_MAT",
    "EDU_SDG_STU_L2_GLAST_REA",
    "EDU_SDG_STU_L1_GLAST_MAT",
    "EDU_SDG_STU_L1_G2OR3_MAT",
    "EDU_SDG_STU_L1_GLAST_REA",
    "EDU_SDG_STU_L1_G2OR3_REA",
    "EDU_SDG_GER_L01",
    "EDUNF_PRP_L02",
    "EDUNF_ROFST_L2",
    "EDU_SDG_QUTP_L02",
    "EDU_SDG_QUTP_L1",
    "EDU_SDG_QUTP_L2",
    "EDU_SDG_QUTP_L3",
    "EDU_SDG_TRTP_L02",
    "EDU_SDG_TRTP_L1",
    "EDU_SDG_TRTP_L2",
    "EDU_SDG_TRTP_L3",
    "EDUNF_ROFST_L1",
    "EDUNF_ROFST_L2",
    "EDUNF_ROFST_L3",
    "EDUNF_OFST_L1",
    "EDUNF_OFST_L2",
    "EDUNF_OFST_L3",
    "EDUNF_NIR_L1_ENTRYAGE",
    "EDUNF_CR_L3",
    "EDUNF_NER_L02",
    "EDUNF_NERA_L1_UNDER1",
    "EDUNF_NERA_L1",
    "EDUNF_NERA_L2",
    "EDUNF_GER_L1",
    "EDUNF_GER_L2",
    "EDUNF_GER_L3",
    "EDUNF_NIR_L1_ENTRYAGE",
    "EDUNF_STU_L1_TOT",
    "EDUNF_STU_L2_TOT",
    "EDUNF_STU_L3_TOT",
    "EDU_SDG_SCH_L1",
    "EDU_SDG_SCH_L2",
    "EDU_SDG_SCH_L3",
    "EDUNF_PRP_L02",
    "EDUNF_OFST_L1T3",
    "EDUNF_SAP_L1T3",
    "EDUNF_SAP_L02"
]
df = pd.DataFrame()
inds = set(codes)
for ind in inds:
    sdmx = pd.read_csv(sdmx_url.format(ind))
    sdmx["CODE"] = ind
    df = df.append(sdmx)

In [None]:
countries = df["Geographic area"].unique()

In [None]:
df = df.merge(
    right=pd.DataFrame(
        [dict(country=country, **geocode_address(country)) for country in countries]
    ),
    left_on='Geographic area',
    right_on='country'
)

In [None]:
df.columns

In [None]:
data.merge(
    right=countries,
    left_on='Geographic area',
    right_on='name'
)

In [None]:
# Create controls
county_options = [
    {"label": str(country), "value": str(country)} for country in data['Geographic area'].unique()
]

In [None]:
county_options

In [None]:
years =[i for i in range(2010, 2020)]

indicators = df['Indicator'].unique()

{index: str(year) for index,year in enumerate(years)}

In [None]:
countries = df["Geographic area"].unique()


In [None]:
numerator = "EDUNF_OFST_L1,EDUNF_OFST_L2,EDUNF_OFST_L3"
denominator = "EDUNF_SAP_L1T3"
absolute=True

In [None]:
query = (
    "CODE in @indicator & TIME_PERIOD in @years & `Geographic area` in @countries"
)
numors = numerator.split(",")
indicator = numors
# select last value for each country
indicator_values = (
    df.query(query)
    .groupby(
        [
            "CODE",
            "Indicator",
            "Geographic area",
            "UNIT_MEASURE",
        ]
    )
    .agg({"TIME_PERIOD": "last", "OBS_VALUE": "last"})
    .reset_index()
    .set_index(["Geographic area", "TIME_PERIOD"])
)
# select the avalible denominators for countiries in selected years
indicator = [denominator]
denominator_values = (
    df.query(query)
    .groupby(
        [
            "CODE",
            "Indicator",
            "Geographic area",
            "UNIT_MEASURE",
        ]
    )
    .agg({"TIME_PERIOD": "last", "OBS_VALUE": "last"})
    .reset_index()
    .set_index(["Geographic area", "TIME_PERIOD"])
)
# select only those denominators that match avalible indicators
denominators = denominator_values[
    denominator_values.index.isin(indicator_values.index)
]["OBS_VALUE"]

denominator_sum = denominators.to_numpy().sum()

indicator_sum = (
    indicator_values["OBS_VALUE"] * denominator_sum
    if absolute
    else (denominators / denominator_sum)
).dropna()  # will drop missing countires

sources = indicator_sum.index.tolist()


In [None]:
indicator_values

In [None]:
denominator_values

In [None]:
indicator_sum

In [None]:
sources

In [None]:
indicator_sum.to_numpy().sum()

In [None]:
indicator_values.head()

In [None]:
# select the avalible denominators for countiries in selected years
indicator = [denominator]
denominator_values = (
    df.query(query)
    .groupby(["CODE", "Indicator", "Geographic area", "UNIT_MEASURE", "Unit of measure"])
    .agg({"TIME_PERIOD": "last", "OBS_VALUE": "last"})
    .reset_index()
    .set_index(["Geographic area", "TIME_PERIOD"])
)
denominator_values

In [None]:
# select only those denominators that match avalible indicators
denominators = denominator_values[
    denominator_values.index.isin(indicator_values.index)
]["OBS_VALUE"]
denominators.index.tolist()


In [None]:
(indicator_values["OBS_VALUE"] * (denominators / denominator_sum))

In [None]:

indicator_sum = (indicator_values["OBS_VALUE"] * (denominators / denominators.to_numpy().sum())).dropna().to_numpy().sum()


In [None]:
indicator_sum