# Import & Load CASEN health data

### Config

In [1]:
import sys
import configparser
config = configparser.ConfigParser()
config.read_file(open('../../settings.ini'))

sys.path.insert(0, config.get('PATHS','libs_path'))
engine_path = config.get('DATABASE','engine_path')

In [2]:
remote_path = 'http://pacha.datawheel.us/economia/casen/analysis/4_health/'
local_path = '../data/'

In [3]:
import postgres #from local file postgres.py
from commons import inline_table_xml, inline_dimension_xml, download_file, download_zip_file, extract_zip_file #from local file commons.py

import json
import pandas as pd
from sqlalchemy import create_engine

### Load data

In [4]:
df1 = download_file(remote_path,local_path,'health_system_2000_2015.csv')
df1 = df1.astype({'year':'int','comuna_datachile_id':'int','health_system_id':'int'})
list(df1)

Downloading... http://pacha.datawheel.us/economia/casen/analysis/4_health/health_system_2000_2015.csv
Encoding: ascii


['health_system_id', 'expc', 'expr', 'year', 'comuna_datachile_id']

In [5]:
engine = create_engine(engine_path)
db = postgres.PostgresDriver(engine)
db.to_sql(df1, 'health', 'fact_health_system_casen')

DROP TABLE IF EXISTS health.fact_health_system_casen;
CREATE TABLE "health"."fact_health_system_casen" (
"health_system_id" INTEGER,
  "expc" INTEGER,
  "expr" INTEGER,
  "year" INTEGER,
  "comuna_datachile_id" INTEGER
)
COPY "health"."fact_health_system_casen" ("health_system_id","expc","expr","year","comuna_datachile_id") FROM STDIN WITH CSV HEADER DELIMITER ',';


### Updates

In [6]:
engine.execute("""
ALTER TABLE health.fact_health_system_casen
  ADD COLUMN date_id INTEGER; 
""")

engine.execute("""
UPDATE health.fact_health_system_casen
SET date_id = dim_date.id
FROM public.dim_date
WHERE dim_date.the_year = health.fact_health_system_casen.year
      AND dim_date.month_of_year = 1
      AND dim_date.day_of_month = 1
""")


<sqlalchemy.engine.result.ResultProxy at 0x101dd64e0>

### Dims

In [7]:
d2 = download_file(remote_path,local_path,'health_system_id.csv')
print (inline_table_xml(d2, 'health_system', 'health_system_id', 'health_system'))

Downloading... http://pacha.datawheel.us/economia/casen/analysis/4_health/health_system_id.csv
Encoding: windows-1252

<InlineTable alias="health_system">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">1</Value>
      <Value column="description">FONASA Grupo A</Value>
      <Value column="es_description">FONASA Grupo A</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">FONASA Grupo B</Value>
      <Value column="es_description">FONASA Grupo B</Value>
    </Row>
  <Row>
      <Value column="id">3</Value>
      <Value column="description">FONASA Grupo C</Value>
      <Value column="es_description">FONASA Grupo C</Value>
    </Row>
  <Row>
      <Value column="id">4</Value>
      <Value column="description">FONASA Grupo D</Value>
      <Value column="es_description">FONAS