# Import & Load NESI data

### Config

In [1]:
import sys
import configparser
config = configparser.ConfigParser()
config.read_file(open('../../settings.ini'))

sys.path.insert(0, config.get('PATHS','libs_path'))
engine_path = config.get('DATABASE','engine_path')

### Main params

In [2]:
remote_path = 'http://pacha.datawheel.us/economia/nesi/analysis/2_wages_by_categories/2_csv/'
local_path = '../data/'

### Imports

In [3]:
import postgres #from local file postgres.py
from commons import inline_table_xml, download_file #from local file commons.py

import json
import pandas as pd
from sqlalchemy import create_engine

### Load & rename

In [4]:
df = download_file(remote_path,local_path,'wages_by_categories.csv')
df = df.rename(columns={'occupation': 'occupation_id', 'icse': 'icse_id','isced':'isced_id','isco':'isco_id','journey':'journey_id','sex':'sex_id'})
df = df[['year','comuna_datachile_id','age','occupation_id','sex_id','icse_id','isced_id','isco_id','journey_id','age_range_id','income','weight']]

df = df.astype({'year':'int','comuna_datachile_id':'int','age':'int','occupation_id':'int','sex_id':'int','icse_id':'int','isced_id':'int','isco_id':'int','journey_id':'int','age_range_id':'int','income':'int'})
list(df)

['year',
 'comuna_datachile_id',
 'age',
 'occupation_id',
 'sex_id',
 'icse_id',
 'isced_id',
 'isco_id',
 'journey_id',
 'age_range_id',
 'income',
 'weight']

### Ingest

In [5]:
engine = create_engine(engine_path)
db = postgres.PostgresDriver(engine)
db.to_sql(df, 'economy', 'fact_income_nesi')

CREATE SCHEMA IF NOT EXISTS economy;
DROP TABLE IF EXISTS economy.fact_income_nesi;
CREATE TABLE "economy"."fact_income_nesi" (
"year" INTEGER,
  "comuna_datachile_id" INTEGER,
  "age" INTEGER,
  "occupation_id" INTEGER,
  "sex_id" INTEGER,
  "icse_id" INTEGER,
  "isced_id" INTEGER,
  "isco_id" INTEGER,
  "journey_id" INTEGER,
  "age_range_id" INTEGER,
  "income" INTEGER,
  "weight" REAL
)
COPY "economy"."fact_income_nesi" ("year","comuna_datachile_id","age","occupation_id","sex_id","icse_id","isced_id","isco_id","journey_id","age_range_id","income","weight") FROM STDIN WITH CSV HEADER DELIMITER ',';


In [6]:
### Foreign & indexes

In [7]:
engine.execute("""
ALTER TABLE economy.fact_income_nesi
  ADD COLUMN date_id INTEGER; 
""")

engine.execute("""
UPDATE economy.fact_income_nesi
SET date_id = dim_date.id
FROM public.dim_date
WHERE dim_date.the_year = economy.fact_income_nesi.year
      AND dim_date.month_of_year = 1
      AND dim_date.day_of_month = 1
""")

engine.execute("""
CREATE INDEX fact_income_nesi_comuna_datachile_id
ON economy.fact_income_nesi (comuna_datachile_id)
""")

<sqlalchemy.engine.result.ResultProxy at 0x10bceddd8>

### Related dim

In [8]:
d1 = download_file(remote_path,local_path,'age_range_id.csv')
print (inline_table_xml(d1, 'age_range', 'age_range_id', 'age_range'))


<InlineTable alias="age_range">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">1</Value>
      <Value column="description">15 a 19</Value>
      <Value column="es_description">15 a 19</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">20 a 24</Value>
      <Value column="es_description">20 a 24</Value>
    </Row>
  <Row>
      <Value column="id">3</Value>
      <Value column="description">25 a 29</Value>
      <Value column="es_description">25 a 29</Value>
    </Row>
  <Row>
      <Value column="id">4</Value>
      <Value column="description">30 a 65</Value>
      <Value column="es_description">30 a 65</Value>
    </Row>
  <Row>
      <Value column="id">5</Value>
      <Value column="description">65 a edad max</Value>
      <Value column="es_description">65 a edad max

In [9]:
d2 = download_file(remote_path,local_path,'icse_id.csv')
print (inline_table_xml(d2, 'icse', 'icse_id', 'icse'))


<InlineTable alias="icse">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">0</Value>
      <Value column="description">No corresponde</Value>
      <Value column="es_description">No corresponde</Value>
    </Row>
  <Row>
      <Value column="id">1</Value>
      <Value column="description">Empleador</Value>
      <Value column="es_description">Empleador</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">Cuenta Propia</Value>
      <Value column="es_description">Cuenta Propia</Value>
    </Row>
  <Row>
      <Value column="id">3</Value>
      <Value column="description">Asalariado Sector Privado</Value>
      <Value column="es_description">Asalariado Sector Privado</Value>
    </Row>
  <Row>
      <Value column="id">4</Value>
      <Value column="description">Asalariado 

In [10]:
d3 = download_file(remote_path,local_path,'isced_id.csv')
print (inline_table_xml(d3, 'isced', 'isced_id', 'isced'))


<InlineTable alias="isced">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">1</Value>
      <Value column="description">Nunca estudió</Value>
      <Value column="es_description">Nunca estudió</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">Educación Preescolar</Value>
      <Value column="es_description">Educación Preescolar</Value>
    </Row>
  <Row>
      <Value column="id">3</Value>
      <Value column="description">Educación Primaria (nivel 1)</Value>
      <Value column="es_description">Educación Primaria (nivel 1)</Value>
    </Row>
  <Row>
      <Value column="id">4</Value>
      <Value column="description">Educación Primaria (nivel 2)</Value>
      <Value column="es_description">Educación Primaria (nivel 2)</Value>
    </Row>
  <Row>
      <Value column="id

In [11]:
d4 = download_file(remote_path,local_path,'isco_id.csv')
print (inline_table_xml(d4, 'isco', 'isco_id', 'isco'))


<InlineTable alias="isco">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">0</Value>
      <Value column="description">nan</Value>
      <Value column="es_description">nan</Value>
    </Row>
  <Row>
      <Value column="id">1</Value>
      <Value column="description">Miembros del poder ejecutivo, legislativo y de la administración pública y de empresas públicas</Value>
      <Value column="es_description">Miembros del poder ejecutivo, legislativo y de la administración pública y de empresas públicas</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">Profesionales científicos e intelectuales</Value>
      <Value column="es_description">Profesionales científicos e intelectuales</Value>
    </Row>
  <Row>
      <Value column="id">3</Value>
      <Value column="description

In [12]:
d5 = download_file(remote_path,local_path,'isic_id.csv')
print (inline_table_xml(d5, 'isic', 'isic_id', 'isic'))


<InlineTable alias="isic">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">0</Value>
      <Value column="description">nan</Value>
      <Value column="es_description">nan</Value>
    </Row>
  <Row>
      <Value column="id">1</Value>
      <Value column="description">Agricultura, ganadería, caza y silvicultura</Value>
      <Value column="es_description">Agricultura, ganadería, caza y silvicultura</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">Pesca</Value>
      <Value column="es_description">Pesca</Value>
    </Row>
  <Row>
      <Value column="id">3</Value>
      <Value column="description">Explotación de minas y canteras</Value>
      <Value column="es_description">Explotación de minas y canteras</Value>
    </Row>
  <Row>
      <Value column="id">4</Value>
   

In [13]:
d6 = download_file(remote_path,local_path,'journey_id.csv')
print (inline_table_xml(d6, 'journey', 'journey_id', 'journey'))


<InlineTable alias="journey">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">0</Value>
      <Value column="description">nan</Value>
      <Value column="es_description">nan</Value>
    </Row>
  <Row>
      <Value column="id">1</Value>
      <Value column="description">Completa</Value>
      <Value column="es_description">Completa</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">Parcial</Value>
      <Value column="es_description">Parcial</Value>
    </Row>
  </Rows>
</InlineTable>
    


In [14]:
d7 = download_file(remote_path,local_path,'occupation_id.csv')
print (inline_table_xml(d7, 'occupation', 'occupation_id', 'occupation'))


<InlineTable alias="occupation">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">0</Value>
      <Value column="description">nan</Value>
      <Value column="es_description">nan</Value>
    </Row>
  <Row>
      <Value column="id">0</Value>
      <Value column="description">Ocupados con menos de 1 mes en el empleo actual</Value>
      <Value column="es_description">Ocupados con menos de 1 mes en el empleo actual</Value>
    </Row>
  <Row>
      <Value column="id">1</Value>
      <Value column="description">Ocupados con más de 1 mes en el empleo actual</Value>
      <Value column="es_description">Ocupados con más de 1 mes en el empleo actual</Value>
    </Row>
  </Rows>
</InlineTable>
    


In [15]:
d8 = download_file(remote_path,local_path,'sex_id.csv')
print (inline_table_xml(d8, 'sex', 'sex_id', 'sex'))


<InlineTable alias="sex">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">1</Value>
      <Value column="description">Hombre</Value>
      <Value column="es_description">Hombre</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">Mujer</Value>
      <Value column="es_description">Mujer</Value>
    </Row>
  </Rows>
</InlineTable>
    
