# Ingest education: Sistema Nacional de Evaluación de Desempeño (SNED)

### Config

In [1]:
import sys
import configparser
config = configparser.ConfigParser()
config.read_file(open('../../../settings.ini'))

sys.path.insert(0, config.get('PATHS','libs_path'))
engine_path = config.get('DATABASE','engine_path')

### Main params

In [2]:
remote_path = 'http://pacha.datawheel.us/'
local_path = '../data/'

### Imports

In [3]:
import postgres #from local file postgres.py
from commons import inline_table_xml, download_file, download_zip_file, extract_zip_file #from local file commons.py

import json
import pandas as pd
from sqlalchemy import create_engine

In [4]:
download_zip_file(remote_path,
                  local_path,
                  "sned.zip")

extract_zip_file(local_path, "temp.zip")
df = download_file(remote_path,local_path,'sned_06_17.csv')
list(df)

Unzipping... ../data/temp.zip
Already downloaded. Using: ../data/sned_06_17.csv


['year',
 'comuna_datachile_id',
 'institution_id',
 'administration2_id',
 'rural_id',
 'efectiveness',
 'overcoming',
 'initiative',
 'integration',
 'improvement',
 'fairness',
 'sned_score',
 'subsidized_100pc_id',
 'subsidized_60pc_id',
 'selected_id',
 'cluster_id']

### Rename & int

In [5]:
df = df.rename(columns={"administration2_id": "administration_id"})
df = df.astype({'year':'int','comuna_datachile_id':'int','institution_id':'int','administration_id':'int','rural_id':'int','subsidized_100pc_id':'int','subsidized_60pc_id':'int','selected_id':'int','cluster_id':'int'})


### Ingest

In [6]:
engine = create_engine(engine_path)
db = postgres.PostgresDriver(engine)
db.to_sql(df, 'education', 'fact_sned')

DROP TABLE IF EXISTS education.fact_sned;
CREATE TABLE "education"."fact_sned" (
"year" INTEGER,
  "comuna_datachile_id" INTEGER,
  "institution_id" INTEGER,
  "administration_id" INTEGER,
  "rural_id" INTEGER,
  "efectiveness" REAL,
  "overcoming" REAL,
  "initiative" REAL,
  "integration" REAL,
  "improvement" REAL,
  "fairness" REAL,
  "sned_score" REAL,
  "subsidized_100pc_id" INTEGER,
  "subsidized_60pc_id" INTEGER,
  "selected_id" INTEGER,
  "cluster_id" INTEGER
)
COPY "education"."fact_sned" ("year","comuna_datachile_id","institution_id","administration_id","rural_id","efectiveness","overcoming","initiative","integration","improvement","fairness","sned_score","subsidized_100pc_id","subsidized_60pc_id","selected_id","cluster_id") FROM STDIN WITH CSV HEADER DELIMITER ',';


### Related dims

In [7]:
df1 = download_file(remote_path,local_path,'administration2_id.csv')
df1 = df1.rename(columns={"administration2_id": "administration_id","administration2_es": "administration_es","administration2_en": "administration_en"})
print(inline_table_xml(df1,'administration','administration_id','administration_es'))

Already downloaded. Using: ../data/administration2_id.csv

<InlineTable alias="administration">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">0</Value>
      <Value column="description">nan</Value>
      <Value column="es_description">nan</Value>
    </Row>
  <Row>
      <Value column="id">1</Value>
      <Value column="description">Municipal</Value>
      <Value column="es_description">Municipal</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">Particular Subvencionado</Value>
      <Value column="es_description">Particular Subvencionado</Value>
    </Row>
  <Row>
      <Value column="id">4</Value>
      <Value column="description">Corporación de Administración Delegada</Value>
      <Value column="es_description">Corporación de Administración Delegada</Value>
    <

In [8]:
df0 = download_file(remote_path,local_path,'cluster_id.csv')
print(inline_table_xml(df0,'cluster','cluster_id','cluster_es'))

Already downloaded. Using: ../data/cluster_id.csv

<InlineTable alias="cluster">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">11</Value>
      <Value column="description">básica urbana grupo 1</Value>
      <Value column="es_description">básica urbana grupo 1</Value>
    </Row>
  <Row>
      <Value column="id">12</Value>
      <Value column="description">básica urbana grupo 2</Value>
      <Value column="es_description">básica urbana grupo 2</Value>
    </Row>
  <Row>
      <Value column="id">13</Value>
      <Value column="description">básica urbana grupo 3</Value>
      <Value column="es_description">básica urbana grupo 3</Value>
    </Row>
  <Row>
      <Value column="id">21</Value>
      <Value column="description">media urbana (con o sin básica) grupo 1</Value>
      <Value column="es_description">media ur

In [9]:
df3 = download_file(remote_path,local_path,'subsidized_100pc_id.csv')
print(inline_table_xml(df3,'subsidized_100pc','subsidized_100pc_id','subsidized_100pc_en'))

Already downloaded. Using: ../data/subsidized_100pc_id.csv

<InlineTable alias="subsidized_100pc">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">0</Value>
      <Value column="description">nan</Value>
      <Value column="es_description">nan</Value>
    </Row>
  <Row>
      <Value column="id">1</Value>
      <Value column="description">Obtains 100% subsidy</Value>
      <Value column="es_description">Obtains 100% subsidy</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">Does not obtain 100% subsidy</Value>
      <Value column="es_description">Does not obtain 100% subsidy</Value>
    </Row>
  </Rows>
</InlineTable>
    


In [10]:
df4 = download_file(remote_path,local_path,'subsidized_60pc_id.csv')
print(inline_table_xml(df4,'subsidized_60pc','subsidized_60pc_id','subsidized_60pc_en'))

Already downloaded. Using: ../data/subsidized_60pc_id.csv

<InlineTable alias="subsidized_60pc">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">0</Value>
      <Value column="description">nan</Value>
      <Value column="es_description">nan</Value>
    </Row>
  <Row>
      <Value column="id">1</Value>
      <Value column="description">Obtains 60% subsidy</Value>
      <Value column="es_description">Obtains 60% subsidy</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">Does not obtain 60% subsidy</Value>
      <Value column="es_description">Does not obtain 60% subsidy</Value>
    </Row>
  </Rows>
</InlineTable>
    
