# Import & Load PSU

### Config

In [1]:
import sys
import configparser
config = configparser.ConfigParser()
config.read_file(open('../../settings.ini'))

sys.path.insert(0, config.get('PATHS','libs_path'))
engine_path = config.get('DATABASE','engine_path')

### Main params

In [2]:
remote_path = 'http://pacha.datawheel.us/educacion/psu/csv/'
local_path = '../data/'

### Imports

In [3]:
import postgres #from local file postgres.py
from commons import inline_table_xml, download_file #from local file commons.py

import json
import pandas as pd
from sqlalchemy import create_engine

### Load file

In [4]:
df = download_file(remote_path,local_path,'panel_psu_2004_2016.csv')
df = df[['year','comuna_datachile_id','sex_id','language_test','math_test','history_test','sciences_test','grades_to_points','ranking_points']]

df = df.fillna(-1);

df = df.astype({'year':'int','comuna_datachile_id':'int','sex_id':'int','language_test':'int','math_test':'int','history_test':'int','sciences_test':'int','grades_to_points':'int','ranking_points':'int'})

list(df)

['year',
 'comuna_datachile_id',
 'sex_id',
 'language_test',
 'math_test',
 'history_test',
 'sciences_test',
 'grades_to_points',
 'ranking_points']

In [5]:
len(df)

6218946

### Ingest

In [6]:
engine = create_engine(engine_path)
db = postgres.PostgresDriver(engine)
db.to_sql(df, 'education', 'fact_psu')

CREATE SCHEMA IF NOT EXISTS education;
DROP TABLE IF EXISTS education.fact_psu;
CREATE TABLE "education"."fact_psu" (
"year" INTEGER,
  "comuna_datachile_id" INTEGER,
  "sex_id" INTEGER,
  "language_test" INTEGER,
  "math_test" INTEGER,
  "history_test" INTEGER,
  "sciences_test" INTEGER,
  "grades_to_points" INTEGER,
  "ranking_points" INTEGER
)
COPY "education"."fact_psu" ("year","comuna_datachile_id","sex_id","language_test","math_test","history_test","sciences_test","grades_to_points","ranking_points") FROM STDIN WITH CSV HEADER DELIMITER ',';


### Indexes & FK

In [7]:
engine.execute("""
CREATE INDEX fact_psu_comuna_id 
ON education.fact_psu (comuna_datachile_id)
""")

engine.execute("""
ALTER TABLE education.fact_psu
  ADD COLUMN date_id INTEGER; 
""")

engine.execute("""
UPDATE education.fact_psu
SET date_id = dim_date.id
FROM public.dim_date
WHERE dim_date.the_year = education.fact_psu.year
      AND dim_date.month_of_year = 1
      AND dim_date.day_of_month = 1
""")


<sqlalchemy.engine.result.ResultProxy at 0x126d37d68>

In [8]:
engine.execute("""
UPDATE education.fact_psu SET language_test = NULL where language_test = -1;
UPDATE education.fact_psu SET math_test = NULL where math_test = -1;
UPDATE education.fact_psu SET history_test = NULL where history_test = -1;
UPDATE education.fact_psu SET sciences_test = NULL where sciences_test = -1;
UPDATE education.fact_psu SET grades_to_points = NULL where grades_to_points = -1;
UPDATE education.fact_psu SET ranking_points = NULL where ranking_points = -1;
""")

<sqlalchemy.engine.result.ResultProxy at 0x10b29e828>

### Dims

Sex_id

```xml
<InlineTable alias="sex">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">0</Value>
      <Value column="description">No informa</Value>
      <Value column="es_description">No informa</Value>
    </Row>
  <Row>
      <Value column="id">1</Value>
      <Value column="description">Women</Value>
      <Value column="es_description">Mujer</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">Men</Value>
      <Value column="es_description">Hombre</Value>
    </Row>
  </Rows>
</InlineTable>
```