# Import & Load health data - 3 tables

### Config

In [12]:
import sys
import configparser
config = configparser.ConfigParser()
config.read_file(open('../../settings.ini'))

sys.path.insert(0, config.get('PATHS','libs_path'))
engine_path = config.get('DATABASE','engine_path')

### Main params

In [13]:
remote_path = 'http://pacha.datawheel.us/datachile/health/basic_health_indicators/'
local_path = '../data/'

### Imports

In [14]:
import postgres #from local file postgres.py
from commons import inline_table_xml, download_file #from local file commons.py

import json
import pandas as pd
from sqlalchemy import create_engine

### Load death causes

In [4]:
df = download_file(remote_path,local_path,'death_causes.csv')
df = df.fillna(0)
df = df.astype({'year':'int','region_id':'int','sex_id':'int','casualities_count':'int'})
list(df)

Already downloaded. Using: ../data/death_causes.csv
Encoding: ascii


['year',
 'region_id',
 'cie_10_id',
 'sex_id',
 'casualities_count',
 'casualities_rate_per_100_inhabitants']

In [5]:
engine = create_engine(engine_path)
db = postgres.PostgresDriver(engine)
db.to_sql(df, 'health', 'fact_death_causes')

DROP TABLE IF EXISTS health.fact_death_causes;
CREATE TABLE "health"."fact_death_causes" (
"year" INTEGER,
  "region_id" INTEGER,
  "cie_10_id" TEXT,
  "sex_id" INTEGER,
  "casualities_count" INTEGER,
  "casualities_rate_per_100_inhabitants" REAL
)
COPY "health"."fact_death_causes" ("year","region_id","cie_10_id","sex_id","casualities_count","casualities_rate_per_100_inhabitants") FROM STDIN WITH CSV HEADER DELIMITER ',';


In [6]:
engine.execute("""
CREATE INDEX fact_death_causes_region_id 
ON health.fact_death_causes (region_id)
""")

engine.execute("""
ALTER TABLE health.fact_death_causes
  ADD COLUMN date_id INTEGER; 
""")

engine.execute("""
UPDATE health.fact_death_causes
SET date_id = dim_date.id
FROM public.dim_date
WHERE dim_date.the_year = health.fact_death_causes.year
      AND dim_date.month_of_year = 1
      AND dim_date.day_of_month = 1
""")

<sqlalchemy.engine.result.ResultProxy at 0x10c075240>

### Load life expectancy

In [15]:
df = download_file(remote_path,local_path,'life_expectancy.csv')
df = df.astype({'year':'int','region_id':'int','sex_id':'int'})
list(df)

Already downloaded. Using: ../data/life_expectancy.csv


['year',
 'region_id',
 'sex_id',
 'life_expectancy',
 'mortality_rate_per_100_inhabitants']

In [16]:
engine = create_engine(engine_path)
db = postgres.PostgresDriver(engine)
db.to_sql(df, 'health', 'fact_life_expectancy')

DROP TABLE IF EXISTS health.fact_life_expectancy;
CREATE TABLE "health"."fact_life_expectancy" (
"year" INTEGER,
  "region_id" INTEGER,
  "sex_id" INTEGER,
  "life_expectancy" REAL,
  "mortality_rate_per_100_inhabitants" REAL
)
COPY "health"."fact_life_expectancy" ("year","region_id","sex_id","life_expectancy","mortality_rate_per_100_inhabitants") FROM STDIN WITH CSV HEADER DELIMITER ',';


In [17]:
engine.execute("""
CREATE INDEX fact_life_expectancy_region_id 
ON health.fact_life_expectancy (region_id)
""")

engine.execute("""
ALTER TABLE health.fact_life_expectancy
  ADD COLUMN date_id INTEGER; 
""")

engine.execute("""
UPDATE health.fact_life_expectancy
SET date_id = dim_date.id
FROM public.dim_date
WHERE dim_date.the_year = health.fact_life_expectancy.year
      AND dim_date.month_of_year = 1
      AND dim_date.day_of_month = 1
""")

<sqlalchemy.engine.result.ResultProxy at 0x10f6d7710>

### Load health access

In [5]:
dfha = download_file(remote_path,local_path,'health_access_indicators.csv')
dfha = dfha.astype({'year':'int','region_id':'int','primary_healthcare':'int','specialized_healthcare':'int','urgency_healthcare':'int'})
list(dfha)

Downloading... http://pacha.datawheel.us/datachile/health/basic_health_indicators/health_access_indicators.csv


['year',
 'region_id',
 'primary_healthcare',
 'specialized_healthcare',
 'urgency_healthcare',
 'dental_discharges_per_100_inhabitants']

In [6]:
engine = create_engine(engine_path)
db = postgres.PostgresDriver(engine)
db.to_sql(dfha, 'health', 'fact_health_access')

DROP TABLE IF EXISTS health.fact_health_access;
CREATE TABLE "health"."fact_health_access" (
"year" INTEGER,
  "region_id" INTEGER,
  "primary_healthcare" INTEGER,
  "specialized_healthcare" INTEGER,
  "urgency_healthcare" INTEGER,
  "dental_discharges_per_100_inhabitants" REAL
)
COPY "health"."fact_health_access" ("year","region_id","primary_healthcare","specialized_healthcare","urgency_healthcare","dental_discharges_per_100_inhabitants") FROM STDIN WITH CSV HEADER DELIMITER ',';


In [7]:
engine.execute("""
CREATE INDEX fact_health_access_region_id 
ON health.fact_health_access (region_id)
""")

engine.execute("""
ALTER TABLE health.fact_health_access
  ADD COLUMN date_id INTEGER; 
""")

engine.execute("""
UPDATE health.fact_health_access
SET date_id = dim_date.id
FROM public.dim_date
WHERE dim_date.the_year = health.fact_health_access.year
      AND dim_date.month_of_year = 1
      AND dim_date.day_of_month = 1
""")

<sqlalchemy.engine.result.ResultProxy at 0x10f6a8eb8>

### Related dim CIE codes

In [13]:
d1 = download_file(remote_path,local_path,'cie_10_id.csv')
list(d1)
print (inline_table_xml(d1, 'cie_10', 'cie_10_id', 'cie_10'))

Downloading... http://pacha.datawheel.us/health/basic_health_indicators/cie_10_id.csv
Encoding: TIS-620

<InlineTable alias="cie_10">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">C00-D48</Value>
      <Value column="description">Tumores (neoplasias)</Value>
      <Value column="es_description">Tumores (neoplasias)</Value>
    </Row>
  <Row>
      <Value column="id">E00-E90</Value>
      <Value column="description">Enfermedades endocrinas, nutricionales y metabรณlicas</Value>
      <Value column="es_description">Enfermedades endocrinas, nutricionales y metabรณlicas</Value>
    </Row>
  <Row>
      <Value column="id">I00-I99</Value>
      <Value column="description">Enfermedades del sistema circulatorio</Value>
      <Value column="es_description">Enfermedades del sistema circulatorio</Value>
    </Row>
  <Row>
 