## Config

In [None]:
import sys
import configparser
config = configparser.ConfigParser()
config.read_file(open('../../../settings.ini'))

sys.path.insert(0, config.get('PATHS','libs_path'))
engine_path = config.get('DATABASE','engine_path')
local_path = '../data/'


## Imports

In [None]:
import postgres #from local file postgres.py
import commons
from commons import inline_table_xml, download_file, download_zip_file, extract_zip_file #from local file commons.py
from importlib import reload

import json
import pandas as pd
from sqlalchemy import create_engine

engine = create_engine(engine_path)
db = postgres.PostgresDriver(engine)

# Institution Directory

## Load file

In [None]:
dim_institutions = download_file("http://pacha.datawheel.us/datachile/education/establishments/analysis/2_performance/",
                                 local_path,
                                 "directorio_oficial_2016.csv",
                                 delimiter=';')

In [None]:
dim_institutions = dim_institutions.rename(lambda c: c.lower(), axis=1)
db.to_sql(dim_institutions, 'education', 'dim_educational_institutions_new')

# Registered Students

## Load file

In [None]:

facts_registered = download_file("http://pacha.datawheel.us/datachile/education/psu/transparency_data/3_tidy_data/3_registered/",
                                 local_path,
                                 "registered.csv")

## Ingest 

In [None]:
db.to_sql(facts_registered, 'education', 'fact_registered')

### Indexes

In [None]:
engine.execute("""
CREATE INDEX fact_registered_comuna_datachile_id_idx 
ON education.fact_registered (comuna_datachile_id)
""")

engine.execute("""
CREATE INDEX fact_registered_anio_proceso_idx 
ON education.fact_registered (anio_proceso) 
""")



## Check

In [None]:
import altair as alt

df = pd.read_sql("""SELECT c.region_name as rn, rbd, AVG(promlm_actual) AS psu, AVG(nem) AS nem 
               FROM education.fact_registered r
               INNER JOIN public.dim_comunas c ON c.id = r.comuna_datachile_id
               WHERE anio_proceso = 2015 AND promlm_actual IS NOT NULL AND nem IS NOT NULL 
               GROUP BY c.region_name, rbd""", 
            engine)

chart = alt.Chart(df).mark_point().encode(
    x=alt.X('nem', scale=alt.Scale(domain=[4,7])),
    y=alt.Y('psu', scale=alt.Scale(domain=[300,700])),
    color=alt.Color('rn', type='nominal')
)
chart

In [None]:
chart.to_json()

# Enrollment

## Load File

In [None]:
facts_enrollment = download_file("http://pacha.datawheel.us/datachile/education/mineduc/3_tidy_data/1_enrollment/",
                                 local_path,
                                 "enrollment.csv")

In [None]:
db.to_sql(facts_enrollment, 'education', 'fact_university_enrollment')
