# Import & Load NENE data

### Config

In [1]:
import sys
import configparser
config = configparser.ConfigParser()
config.read_file(open('../../settings.ini'))

sys.path.insert(0, config.get('PATHS','libs_path'))
engine_path = config.get('DATABASE','engine_path')

### Main params

In [2]:
remote_path = 'http://pacha.datawheel.us/economia/nene/analysis/2_occupational_status/4_csv/'
local_path = '../data/'

### Imports

In [3]:
import postgres #from local file postgres.py
from commons import inline_table_xml, download_file #from local file commons.py

import json
import pandas as pd
from sqlalchemy import create_engine

### Load file

In [4]:
df = download_file(remote_path,local_path,'occupational_status.csv')
df = df.rename(columns={'ano_encuesta':'year','mes_encuesta':'month'});
df = df.astype({'year':'int','month':'int','region_id':'int','age':'int','age_range_id':'int','icse_id':'int','sex_id':'int','isced_id':'int','isco_id':'int','occupied_id':'int','icse_id':'int','general_economic_condition':'int'})
list(df)

['year',
 'month',
 'region_id',
 'age',
 'age_range_id',
 'sex_id',
 'icse_id',
 'isco_id',
 'isced_id',
 'occupied_id',
 'general_economic_condition',
 'weights']

### Ingest

In [5]:
engine = create_engine(engine_path)
db = postgres.PostgresDriver(engine)
db.to_sql(df, 'economy', 'fact_occupational_status_nene')

CREATE SCHEMA IF NOT EXISTS economy;
DROP TABLE IF EXISTS economy.fact_occupational_status_nene;
CREATE TABLE "economy"."fact_occupational_status_nene" (
"year" INTEGER,
  "month" INTEGER,
  "region_id" INTEGER,
  "age" INTEGER,
  "age_range_id" INTEGER,
  "sex_id" INTEGER,
  "icse_id" INTEGER,
  "isco_id" INTEGER,
  "isced_id" INTEGER,
  "occupied_id" INTEGER,
  "general_economic_condition" INTEGER,
  "weights" REAL
)
COPY "economy"."fact_occupational_status_nene" ("year","month","region_id","age","age_range_id","sex_id","icse_id","isco_id","isced_id","occupied_id","general_economic_condition","weights") FROM STDIN WITH CSV HEADER DELIMITER ',';


### Indexes & FK

In [6]:
engine.execute("""
ALTER TABLE economy.fact_occupational_status_nene
  ADD COLUMN date_id INTEGER; 
""")

engine.execute("""
UPDATE economy.fact_occupational_status_nene
SET date_id = dim_date.id
FROM public.dim_date
WHERE dim_date.the_year = economy.fact_occupational_status_nene.year
      AND dim_date.month_of_year = economy.fact_occupational_status_nene.month
      AND dim_date.day_of_month = 1
""")

engine.execute("""
CREATE INDEX fact_occupational_status_nene_region_id 
ON economy.fact_occupational_status_nene (region_id)
""")

<sqlalchemy.engine.result.ResultProxy at 0x10d0e0668>

### Related dim

In [7]:
d1 = download_file(remote_path,local_path,'age_range_id.csv')
print (inline_table_xml(d1, 'age_range', 'age_range_id', 'age_range'))


<InlineTable alias="age_range">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">0</Value>
      <Value column="description">nan</Value>
      <Value column="es_description">nan</Value>
    </Row>
  <Row>
      <Value column="id">1</Value>
      <Value column="description">Menos de 15</Value>
      <Value column="es_description">Menos de 15</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">15 a 19</Value>
      <Value column="es_description">15 a 19</Value>
    </Row>
  <Row>
      <Value column="id">3</Value>
      <Value column="description">20 a 24</Value>
      <Value column="es_description">20 a 24</Value>
    </Row>
  <Row>
      <Value column="id">4</Value>
      <Value column="description">25 a 29</Value>
      <Value column="es_description">25 a 29</Value>
   

In [8]:
d2 = download_file(remote_path,local_path,'icse_id.csv')
print (inline_table_xml(d2, 'icse', 'icse_id', 'icse'))


<InlineTable alias="icse">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">0</Value>
      <Value column="description">No corresponde</Value>
      <Value column="es_description">No corresponde</Value>
    </Row>
  <Row>
      <Value column="id">1</Value>
      <Value column="description">Empleador o patrón</Value>
      <Value column="es_description">Empleador o patrón</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">Trabajador por cuenta propia</Value>
      <Value column="es_description">Trabajador por cuenta propia</Value>
    </Row>
  <Row>
      <Value column="id">3</Value>
      <Value column="description">Asalariado sector privado</Value>
      <Value column="es_description">Asalariado sector privado</Value>
    </Row>
  <Row>
      <Value column="id">4</Valu

In [9]:
d3 = download_file(remote_path,local_path,'isced_id.csv')
print (inline_table_xml(d3, 'isced', 'isced_id', 'isced'))


<InlineTable alias="isced">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">0</Value>
      <Value column="description">nan</Value>
      <Value column="es_description">nan</Value>
    </Row>
  <Row>
      <Value column="id">1</Value>
      <Value column="description">Nunca estudió</Value>
      <Value column="es_description">Nunca estudió</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">Educación preescolar</Value>
      <Value column="es_description">Educación preescolar</Value>
    </Row>
  <Row>
      <Value column="id">3</Value>
      <Value column="description">Educación primaria (nivel 1)</Value>
      <Value column="es_description">Educación primaria (nivel 1)</Value>
    </Row>
  <Row>
      <Value column="id">4</Value>
      <Value column="description">Educ

In [10]:
d4 = download_file(remote_path,local_path,'isco_id.csv')
print (inline_table_xml(d4, 'isco', 'isco_id', 'isco'))


<InlineTable alias="isco">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">0</Value>
      <Value column="description">nan</Value>
      <Value column="es_description">nan</Value>
    </Row>
  <Row>
      <Value column="id">1</Value>
      <Value column="description">Miembros del poder ejecutivo, legislativo y de la administración pública y de empresas públicas</Value>
      <Value column="es_description">Miembros del poder ejecutivo, legislativo y de la administración pública y de empresas públicas</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">Profesionales científicos e intelectuales</Value>
      <Value column="es_description">Profesionales científicos e intelectuales</Value>
    </Row>
  <Row>
      <Value column="id">3</Value>
      <Value column="description

In [11]:
d5 = download_file(remote_path,local_path,'sex_id.csv')
print (inline_table_xml(d5, 'sex', 'sex_id', 'sex'))


<InlineTable alias="sex">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">0</Value>
      <Value column="description">nan</Value>
      <Value column="es_description">nan</Value>
    </Row>
  <Row>
      <Value column="id">1</Value>
      <Value column="description">Mujer</Value>
      <Value column="es_description">Mujer</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">Hombre</Value>
      <Value column="es_description">Hombre</Value>
    </Row>
  </Rows>
</InlineTable>
    


In [12]:
d6 = download_file(remote_path,local_path,'general_economic_condition_id.csv')
print (inline_table_xml(d6, 'general_economic_condition', 'general_economic_condition_id', 'general_economic_condition'))


<InlineTable alias="general_economic_condition">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">0</Value>
      <Value column="description">Menor de quince años</Value>
      <Value column="es_description">Menor de quince años</Value>
    </Row>
  <Row>
      <Value column="id">1</Value>
      <Value column="description">Ocupado tradicional</Value>
      <Value column="es_description">Ocupado tradicional</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">Ocupado no tradicional</Value>
      <Value column="es_description">Ocupado no tradicional</Value>
    </Row>
  <Row>
      <Value column="id">3</Value>
      <Value column="description">Ocupado ausente</Value>
      <Value column="es_description">Ocupado ausente</Value>
    </Row>
  <Row>
      <Value column="id">4</

In [13]:
d7 = download_file(remote_path,local_path,'occupied_id.csv')
print (inline_table_xml(d7, 'occupied', 'occupied_id', 'occupied'))


<InlineTable alias="occupied">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">0</Value>
      <Value column="description">nan</Value>
      <Value column="es_description">nan</Value>
    </Row>
  <Row>
      <Value column="id">1</Value>
      <Value column="description">ocupado</Value>
      <Value column="es_description">ocupado</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">desocupado</Value>
      <Value column="es_description">desocupado</Value>
    </Row>
  <Row>
      <Value column="id">3</Value>
      <Value column="description">inactivo</Value>
      <Value column="es_description">inactivo</Value>
    </Row>
  </Rows>
</InlineTable>
    
