# Import & Load CASEN household data

### Config

In [1]:
import sys
import configparser
config = configparser.ConfigParser()
config.read_file(open('../../settings.ini'))

sys.path.insert(0, config.get('PATHS','libs_path'))
engine_path = config.get('DATABASE','engine_path')

In [2]:
remote_path = 'http://pacha.datawheel.us/economia/casen/analysis/3_household/'
local_path = '../data/'

In [3]:
import postgres #from local file postgres.py
from commons import inline_table_xml, download_file, download_zip_file, extract_zip_file #from local file commons.py

import json
import pandas as pd
from sqlalchemy import create_engine

### Load data

In [4]:
df1 = download_file(remote_path,local_path,'household_for_db.csv')
list(df1)

Downloading... http://pacha.datawheel.us/economia/casen/analysis/3_household/household_for_db.csv
Encoding: ascii


['comuna_id',
 'household_type',
 'walls_material',
 'floor_material',
 'ceiling_material',
 'households_in_land',
 'land_using',
 'household_sq_meters',
 'household_using',
 'family_member_owner',
 'family_member_owner_2',
 'subsidy_or_program',
 'credit',
 'paying_credit',
 'water_source',
 'water_distribution',
 'waste_disposal',
 'electricity',
 'rooms',
 'bathrooms',
 'families_in_household',
 'reason_to_share',
 'cooking_energy_source',
 'heating_energy_source',
 'hot_water_energy_source',
 'less_than_8_blocks_public_transport',
 'less_than_20_blocks_educational_center',
 'less_than_20_blocks_health_center',
 'less_than_20_blocks_market',
 'less_than_20_blocks_atm',
 'less_than_20_blocks_sports_center',
 'less_than_20_blocks_green_areas',
 'less_than_20_blocks_community_equipment',
 'less_than_20_blocks_pharmacy',
 'affected_by_acoustic_contamination',
 'affected_by_air_contamination',
 'affected_by_river_or_lake_contamination',
 'affected_by_public_water_source_contamination',
 

In [5]:
df1[['family_member_owner']] = df1[['family_member_owner']].fillna(value=-1)
df1[['family_member_owner_2']] = df1[['family_member_owner_2']].fillna(value=-1)
df1[['subsidy_or_program']] = df1[['subsidy_or_program']].fillna(value=-1)
df1[['credit']] = df1[['credit']].fillna(value=-1)
df1[['paying_credit']] = df1[['paying_credit']].fillna(value=-1)
df1[['reason_to_share']] = df1[['reason_to_share']].fillna(value=-1)

df1 = df1.astype({'year':'int','comuna_datachile_id':'int','household_type':'int','walls_material':'int','floor_material':'int','ceiling_material':'int','households_in_land':'int','land_using':'int','household_sq_meters':'int','household_using':'int','water_source':'int','water_distribution':'int','waste_disposal':'int','electricity':'int','rooms':'int','bathrooms':'int','families_in_household':'int','cooking_energy_source':'int','heating_energy_source':'int','hot_water_energy_source':'int','less_than_8_blocks_public_transport':'int','less_than_20_blocks_educational_center':'int','less_than_20_blocks_health_center':'int','less_than_20_blocks_market':'int','less_than_20_blocks_atm':'int','less_than_20_blocks_sports_center':'int','less_than_20_blocks_green_areas':'int','less_than_20_blocks_community_equipment':'int','less_than_20_blocks_pharmacy':'int','affected_by_acoustic_contamination':'int','affected_by_air_contamination':'int','affected_by_river_or_lake_contamination':'int','affected_by_public_water_source_contamination':'int','affected_by_graffitis_or_advertising':'int','affected_by_accumulation_of_trash_public_areas':'int','affected_by_animal_or_insect_plague':'int','expr':'int','expc':'int','family_member_owner':'int','family_member_owner_2':'int','subsidy_or_program':'int','credit':'int','paying_credit':'int','reason_to_share':'int'})

engine = create_engine(engine_path)
db = postgres.PostgresDriver(engine)
db.to_sql(df1, 'environment', 'fact_household_casen')

DROP TABLE IF EXISTS environment.fact_household_casen;
CREATE TABLE "environment"."fact_household_casen" (
"comuna_id" INTEGER,
  "household_type" INTEGER,
  "walls_material" INTEGER,
  "floor_material" INTEGER,
  "ceiling_material" INTEGER,
  "households_in_land" INTEGER,
  "land_using" INTEGER,
  "household_sq_meters" INTEGER,
  "household_using" INTEGER,
  "family_member_owner" INTEGER,
  "family_member_owner_2" INTEGER,
  "subsidy_or_program" INTEGER,
  "credit" INTEGER,
  "paying_credit" INTEGER,
  "water_source" INTEGER,
  "water_distribution" INTEGER,
  "waste_disposal" INTEGER,
  "electricity" INTEGER,
  "rooms" INTEGER,
  "bathrooms" INTEGER,
  "families_in_household" INTEGER,
  "reason_to_share" INTEGER,
  "cooking_energy_source" INTEGER,
  "heating_energy_source" INTEGER,
  "hot_water_energy_source" INTEGER,
  "less_than_8_blocks_public_transport" INTEGER,
  "less_than_20_blocks_educational_center" INTEGER,
  "less_than_20_blocks_health_center" INTEGER,
  "less_than_20_block

### Updates

In [6]:
engine.execute("""
UPDATE environment.fact_household_casen SET family_member_owner = NULL where family_member_owner = -1;
UPDATE environment.fact_household_casen SET family_member_owner_2 = NULL where family_member_owner_2 = -1;
UPDATE environment.fact_household_casen SET subsidy_or_program = NULL where subsidy_or_program = -1;
UPDATE environment.fact_household_casen SET credit = NULL where credit = -1;
UPDATE environment.fact_household_casen SET paying_credit = NULL where paying_credit = -1;
UPDATE environment.fact_household_casen SET reason_to_share = NULL where reason_to_share = -1;
""")

<sqlalchemy.engine.result.ResultProxy at 0x10af06080>

In [7]:
engine.execute("""
ALTER TABLE environment.fact_household_casen
  ADD COLUMN date_id INTEGER; 
""")

engine.execute("""
UPDATE environment.fact_household_casen
SET date_id = dim_date.id
FROM public.dim_date
WHERE dim_date.the_year = environment.fact_household_casen.year
      AND dim_date.month_of_year = 1
      AND dim_date.day_of_month = 1
""")


<sqlalchemy.engine.result.ResultProxy at 0x115496cf8>

### Dims

In [8]:
dims = list(df1)
dims.remove('comuna_id')
dims.remove('expr')
dims.remove('expc')
dims.remove('year')
dims.remove('comuna_datachile_id')

for d in dims:
    print(d);
    d2 = download_file(remote_path+'ids/',local_path,d+'_id.csv')
    print (inline_table_xml(d2, 'value', 'id', 'value'))

    


household_type
Downloading... http://pacha.datawheel.us/economia/casen/analysis/3_household/ids/household_type_id.csv
Encoding: ISO-8859-2

<InlineTable alias="value">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">1</Value>
      <Value column="description">casa aislada (no pareada)</Value>
      <Value column="es_description">casa aislada (no pareada)</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">casa pareada por un lado</Value>
      <Value column="es_description">casa pareada por un lado</Value>
    </Row>
  <Row>
      <Value column="id">3</Value>
      <Value column="description">casa pareada por ambos lados</Value>
      <Value column="es_description">casa pareada por ambos lados</Value>
    </Row>
  <Row>
      <Value column="id">4</Value>
      <Value col