## Config

In [2]:
import sys
import configparser
config = configparser.ConfigParser()
config.read_file(open('../../../settings.ini'))

sys.path.insert(0, config.get('PATHS','libs_path'))
engine_path = config.get('DATABASE','engine_path')

## Imports

In [3]:
import postgres #from local file postgres.py
from commons import inline_table_xml, download_file, download_zip_file, extract_zip_file #from local file commons.py

import json
import pandas as pd
from sqlalchemy import create_engine

## Load file

In [6]:
local_path = '../data/'

facts_registered = download_file("http://pacha.datawheel.us/datachile/education/mineduc/3_tidy_data/3_registered/",
                                 local_path,
                                 "registered.csv")

Already downloaded. Using: ../data/registered.csv


## Ingest

In [4]:
engine = create_engine(engine_path)
db = postgres.PostgresDriver(engine)

In [12]:
db.to_sql(facts_registered, 'education', 'fact_registered')

DROP TABLE IF EXISTS education.fact_registered;
CREATE TABLE "education"."fact_registered" (
"mrun" INTEGER,
  "anio_proceso" INTEGER,
  "sexo_id" INTEGER,
  "rbd" INTEGER,
  "codigo_ensenianza_id" INTEGER,
  "rama_educacional_id" TEXT,
  "grupo_dependencia_id" INTEGER,
  "anio_egreso" INTEGER,
  "promedio_notas" INTEGER,
  "puntajes_proceso_id" INTEGER,
  "puntaje_nem" REAL,
  "puntaje_ranking" REAL,
  "lyc_actual" REAL,
  "mate_actual" REAL,
  "hycs_actual" REAL,
  "ciencias_actual" REAL,
  "promlm_actual" REAL,
  "bea_id" REAL,
  "lyc_anterior" REAL,
  "mate_anterior" REAL,
  "hycs_anterior" REAL,
  "ciencias_anterior" REAL,
  "promlm_anterior" REAL,
  "tipo_identificacion_id" INTEGER,
  "comuna_datachile_id" INTEGER,
  "dia_nacimiento" INTEGER,
  "mes_nacimiento" INTEGER,
  "anio_nacimiento" INTEGER
)
COPY "education"."fact_registered" ("mrun","anio_proceso","sexo_id","rbd","codigo_ensenianza_id","rama_educacional_id","grupo_dependencia_id","anio_egreso","promedio_notas","puntajes_

## Check

In [19]:
import altair as alt

df = pd.read_sql("""SELECT rbd, grupo_dependencia_id, AVG(promlm_actual) AS psu, AVG(puntaje_nem) AS nem 
               FROM education.fact_registered 
               WHERE anio_proceso = 2015 AND promlm_actual IS NOT NULL AND puntaje_nem IS NOT NULL 
               GROUP BY rbd, grupo_dependencia_id""", 
            engine)

chart = alt.Chart(df).mark_point().encode(
    x=alt.X('nem', scale=alt.Scale(domain=[300,800])),
    y='psu',
    color='grupo_dependencia_id',
)
print(chart.to_json(indent=2))

{
  "$schema": "https://vega.github.io/schema/vega-lite/v1.2.1.json",
  "data": {
    "values": [
      {
        "grupo_dependencia_id": 3,
        "nem": 560.837837837838,
        "psu": 590.117117117117,
        "rbd": 9259
      },
      {
        "grupo_dependencia_id": 3,
        "nem": 516.77358490566,
        "psu": 528.594339622642,
        "rbd": 20167
      },
      {
        "grupo_dependencia_id": 4,
        "nem": 609.903225806452,
        "psu": 599.612903225806,
        "rbd": 15576
      },
      {
        "grupo_dependencia_id": 3,
        "nem": 545.300751879699,
        "psu": 534.15037593985,
        "rbd": 25012
      },
      {
        "grupo_dependencia_id": 5,
        "nem": 516.782051282051,
        "psu": 470.711538461538,
        "rbd": 9407
      },
      {
        "grupo_dependencia_id": 4,
        "nem": 599.40625,
        "psu": 660.984375,
        "rbd": 14498
      },
      {
        "grupo_dependencia_id": 3,
        "nem": 566.04347826087,
        "p