# Import & Load Performance data

### Config

In [1]:
import sys
import configparser
config = configparser.ConfigParser()
config.read_file(open('../../../settings.ini'))

sys.path.insert(0, config.get('PATHS','libs_path'))
engine_path = config.get('DATABASE','engine_path')

### Main params

In [2]:
remote_path = 'http://pacha.datawheel.us/educacion/establecimientos/analysis/2_rendimiento/'
local_path = '../data/'

### Imports

In [3]:
import postgres #from local file postgres.py
from commons import inline_table_xml, download_file, download_zip_file, extract_zip_file #from local file commons.py

import json
import pandas as pd
from sqlalchemy import create_engine

### Load file

In [4]:
#Download and unzip
download_zip_file(remote_path,local_path,'datos_rendimiento.zip')
extract_zip_file(local_path,'temp.zip')

Downloading... http://pacha.datawheel.us/educacion/establecimientos/analysis/2_rendimiento/datos_rendimiento.zip
Unzipping... ../data/temp.zip


True

In [5]:
#Open unzipped file
df = pd.read_csv(local_path+'analysis/2_rendimiento/datos_rendimiento.csv',delimiter=",")
df[['rural_id']] = df[['rural_id']].fillna(value=0)
df = df.astype({'year':'int','comuna_datachile_id':'int','sex_id':'int','institution_id':'int','administration_id':'int','teaching_id':'int','rural_id':'int'})
list(df)

['year',
 'institution_id',
 'administration_id',
 'rural_id',
 'teaching_id',
 'sex_id',
 'scores_average',
 'comuna_datachile_id']

### Ingest

In [6]:
engine = create_engine(engine_path)
db = postgres.PostgresDriver(engine)
db.to_sql(df, 'education', 'fact_performance')

CREATE SCHEMA IF NOT EXISTS education;
DROP TABLE IF EXISTS education.fact_performance;
CREATE TABLE "education"."fact_performance" (
"year" INTEGER,
  "institution_id" INTEGER,
  "administration_id" INTEGER,
  "rural_id" INTEGER,
  "teaching_id" INTEGER,
  "sex_id" INTEGER,
  "scores_average" REAL,
  "comuna_datachile_id" INTEGER
)
COPY "education"."fact_performance" ("year","institution_id","administration_id","rural_id","teaching_id","sex_id","scores_average","comuna_datachile_id") FROM STDIN WITH CSV HEADER DELIMITER ',';


### Indexes & FK

In [7]:
engine.execute("""
CREATE INDEX fact_performance_comuna_id 
ON education.fact_performance (comuna_datachile_id)
""")

engine.execute("""
ALTER TABLE education.fact_performance
  ADD COLUMN date_id INTEGER; 
""")

engine.execute("""
UPDATE education.fact_performance
SET date_id = dim_date.id
FROM public.dim_date
WHERE dim_date.the_year = education.fact_performance.year
      AND dim_date.month_of_year = 1
      AND dim_date.day_of_month = 1
""")


<sqlalchemy.engine.result.ResultProxy at 0x10bfd5eb8>

### Dims

Sex_id

```xml
<InlineTable alias="sex">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">0</Value>
      <Value column="description">No informa</Value>
      <Value column="es_description">No informa</Value>
    </Row>
  <Row>
      <Value column="id">1</Value>
      <Value column="description">Women</Value>
      <Value column="es_description">Mujer</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">Men</Value>
      <Value column="es_description">Hombre</Value>
    </Row>
  </Rows>
</InlineTable>
```

In [8]:
d2 = download_file(remote_path,local_path,'administration_id.csv')
print (inline_table_xml(d2, 'administration', 'administration_id', 'administration'))

Downloading... http://pacha.datawheel.us/educacion/establecimientos/analysis/2_rendimiento/administration_id.csv

<InlineTable alias="administration">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">0</Value>
      <Value column="description">nan</Value>
      <Value column="es_description">nan</Value>
    </Row>
  <Row>
      <Value column="id">1</Value>
      <Value column="description">Corporaciรณn Municipal</Value>
      <Value column="es_description">Corporaciรณn Municipal</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">Municipal DAEM</Value>
      <Value column="es_description">Municipal DAEM</Value>
    </Row>
  <Row>
      <Value column="id">3</Value>
      <Value column="description">Particular Subvencionado</Value>
      <Value column="es_description">Parti

In [9]:
d3 = download_file(remote_path,local_path,'rural_id.csv')
print (inline_table_xml(d3, 'rural', 'rural_id', 'rural'))

Downloading... http://pacha.datawheel.us/educacion/establecimientos/analysis/2_rendimiento/rural_id.csv

<InlineTable alias="rural">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">0</Value>
      <Value column="description">nan</Value>
      <Value column="es_description">nan</Value>
    </Row>
  <Row>
      <Value column="id">1</Value>
      <Value column="description">Urbano</Value>
      <Value column="es_description">Urbano</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">Rural</Value>
      <Value column="es_description">Rural</Value>
    </Row>
  </Rows>
</InlineTable>
    


In [10]:
d4 = download_file(remote_path,local_path,'colegios_id.csv')
print (inline_table_xml(d4, 'institution', 'institution_id', 'institution'))

Downloading... http://pacha.datawheel.us/educacion/establecimientos/analysis/2_rendimiento/colegios_id.csv

<InlineTable alias="institution">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">1</Value>
      <Value column="description">LICEO POLITECNICO ARICA</Value>
      <Value column="es_description">LICEO POLITECNICO ARICA</Value>
    </Row>
  <Row>
      <Value column="id">3</Value>
      <Value column="description">ESC. PEDRO VICENTE GUTIERREZ TORRES</Value>
      <Value column="es_description">ESC. PEDRO VICENTE GUTIERREZ TORRES</Value>
    </Row>
  <Row>
      <Value column="id">4</Value>
      <Value column="description">LICEO OCTAVIO PALMA PEREZ</Value>
      <Value column="es_description">LICEO OCTAVIO PALMA PEREZ</Value>
    </Row>
  <Row>
      <Value column="id">5</Value>
      <Value column="descripti

In [11]:
d5 = download_file(remote_path,local_path,'teaching_id.csv')
print (inline_table_xml(d5, 'teaching', 'teaching_id', 'teaching'))

Downloading... http://pacha.datawheel.us/educacion/establecimientos/analysis/2_rendimiento/teaching_id.csv

<InlineTable alias="teaching">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">0</Value>
      <Value column="description">nan</Value>
      <Value column="es_description">nan</Value>
    </Row>
  <Row>
      <Value column="id">10</Value>
      <Value column="description">Educación Parvularia</Value>
      <Value column="es_description">Educación Parvularia</Value>
    </Row>
  <Row>
      <Value column="id">110</Value>
      <Value column="description">Educación Básica</Value>
      <Value column="es_description">Educación Básica</Value>
    </Row>
  <Row>
      <Value column="id">160</Value>
      <Value column="description">Educación Básica Común Adultos (Decreto 584/2007)</Value>
      <Value column="es_